Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Added TSC passthrough specification to time handling
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
42 #endif
43
44 #include <palacios/vmm_direct_paging.h>
45
46 #include <palacios/vmm_ctrl_regs.h>
47 #include <palacios/svm_io.h>
48
49 #include <palacios/vmm_sprintf.h>
50
51
52 #ifndef V3_CONFIG_DEBUG_SVM
53 #undef PrintDebug
54 #define PrintDebug(fmt, args...)
55 #endif
56
57
58 uint32_t v3_last_exit;
59
60 // This is a global pointer to the host's VMCB
61 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
62
63
64
65 extern void v3_stgi();
66 extern void v3_clgi();
67 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
68 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
69
70
71 static vmcb_t * Allocate_VMCB() {
72     vmcb_t * vmcb_page = NULL;
73     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
74
75     if ((void *)vmcb_pa == NULL) {
76         PrintError("Error allocating VMCB\n");
77         return NULL;
78     }
79
80     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
81
82     memset(vmcb_page, 0, 4096);
83
84     return vmcb_page;
85 }
86
87
88 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
89 {
90     int status;
91
92     // Call arch-independent handler
93     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
94         return status;
95     }
96
97     // SVM-specific code
98     {
99         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
100         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
101         hw_efer->svme = 1;
102     }
103
104     return 0;
105 }
106
107
108 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
109     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
110     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
111     uint_t i;
112
113
114     //
115     ctrl_area->svm_instrs.VMRUN = 1;
116     ctrl_area->svm_instrs.VMMCALL = 1;
117     ctrl_area->svm_instrs.VMLOAD = 1;
118     ctrl_area->svm_instrs.VMSAVE = 1;
119     ctrl_area->svm_instrs.STGI = 1;
120     ctrl_area->svm_instrs.CLGI = 1;
121     ctrl_area->svm_instrs.SKINIT = 1;
122     ctrl_area->svm_instrs.ICEBP = 1;
123     ctrl_area->svm_instrs.WBINVD = 1;
124     ctrl_area->svm_instrs.MONITOR = 1;
125     ctrl_area->svm_instrs.MWAIT_always = 1;
126     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
127     ctrl_area->instrs.INVLPGA = 1;
128     ctrl_area->instrs.CPUID = 1;
129
130     ctrl_area->instrs.HLT = 1;
131
132     /* Set at VMM launch as needed */
133     ctrl_area->instrs.RDTSC = 0;
134     ctrl_area->svm_instrs.RDTSCP = 0;
135
136     // guest_state->cr0 = 0x00000001;    // PE 
137   
138     /*
139       ctrl_area->exceptions.de = 1;
140       ctrl_area->exceptions.df = 1;
141       
142       ctrl_area->exceptions.ts = 1;
143       ctrl_area->exceptions.ss = 1;
144       ctrl_area->exceptions.ac = 1;
145       ctrl_area->exceptions.mc = 1;
146       ctrl_area->exceptions.gp = 1;
147       ctrl_area->exceptions.ud = 1;
148       ctrl_area->exceptions.np = 1;
149       ctrl_area->exceptions.of = 1;
150       
151       ctrl_area->exceptions.nmi = 1;
152     */
153     
154
155     ctrl_area->instrs.NMI = 1;
156     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
157     ctrl_area->instrs.INIT = 1;
158     //    ctrl_area->instrs.PAUSE = 1;
159     ctrl_area->instrs.shutdown_evts = 1;
160
161
162     /* DEBUG FOR RETURN CODE */
163     ctrl_area->exit_code = 1;
164
165
166     /* Setup Guest Machine state */
167
168     core->vm_regs.rsp = 0x00;
169     core->rip = 0xfff0;
170
171     core->vm_regs.rdx = 0x00000f00;
172
173
174     core->cpl = 0;
175
176     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
177     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
178     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
179
180
181
182
183
184     core->segments.cs.selector = 0xf000;
185     core->segments.cs.limit = 0xffff;
186     core->segments.cs.base = 0x0000000f0000LL;
187
188     // (raw attributes = 0xf3)
189     core->segments.cs.type = 0x3;
190     core->segments.cs.system = 0x1;
191     core->segments.cs.dpl = 0x3;
192     core->segments.cs.present = 1;
193
194
195
196     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
197                                       &(core->segments.es), &(core->segments.fs), 
198                                       &(core->segments.gs), NULL};
199
200     for ( i = 0; segregs[i] != NULL; i++) {
201         struct v3_segment * seg = segregs[i];
202         
203         seg->selector = 0x0000;
204         //    seg->base = seg->selector << 4;
205         seg->base = 0x00000000;
206         seg->limit = ~0u;
207
208         // (raw attributes = 0xf3)
209         seg->type = 0x3;
210         seg->system = 0x1;
211         seg->dpl = 0x3;
212         seg->present = 1;
213     }
214
215     core->segments.gdtr.limit = 0x0000ffff;
216     core->segments.gdtr.base = 0x0000000000000000LL;
217     core->segments.idtr.limit = 0x0000ffff;
218     core->segments.idtr.base = 0x0000000000000000LL;
219
220     core->segments.ldtr.selector = 0x0000;
221     core->segments.ldtr.limit = 0x0000ffff;
222     core->segments.ldtr.base = 0x0000000000000000LL;
223     core->segments.tr.selector = 0x0000;
224     core->segments.tr.limit = 0x0000ffff;
225     core->segments.tr.base = 0x0000000000000000LL;
226
227
228     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
229     core->dbg_regs.dr7 = 0x0000000000000400LL;
230
231
232     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
233     ctrl_area->instrs.IOIO_PROT = 1;
234             
235     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
236     ctrl_area->instrs.MSR_PROT = 1;   
237
238
239     PrintDebug("Exiting on interrupts\n");
240     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
241     ctrl_area->instrs.INTR = 1;
242
243
244     v3_hook_msr(core->vm_info, EFER_MSR, 
245                 &v3_handle_efer_read,
246                 &v3_svm_handle_efer_write, 
247                 core);
248
249     if (core->shdw_pg_mode == SHADOW_PAGING) {
250         PrintDebug("Creating initial shadow page table\n");
251         
252         /* JRL: This is a performance killer, and a simplistic solution */
253         /* We need to fix this */
254         ctrl_area->TLB_CONTROL = 1;
255         ctrl_area->guest_ASID = 1;
256         
257         
258         if (v3_init_passthrough_pts(core) == -1) {
259             PrintError("Could not initialize passthrough page tables\n");
260             return ;
261         }
262
263
264         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
265         PrintDebug("Created\n");
266         
267         core->ctrl_regs.cr0 |= 0x80000000;
268         core->ctrl_regs.cr3 = core->direct_map_pt;
269
270         ctrl_area->cr_reads.cr0 = 1;
271         ctrl_area->cr_writes.cr0 = 1;
272         //ctrl_area->cr_reads.cr4 = 1;
273         ctrl_area->cr_writes.cr4 = 1;
274         ctrl_area->cr_reads.cr3 = 1;
275         ctrl_area->cr_writes.cr3 = 1;
276
277
278
279         ctrl_area->instrs.INVLPG = 1;
280
281         ctrl_area->exceptions.pf = 1;
282
283         guest_state->g_pat = 0x7040600070406ULL;
284
285
286
287     } else if (core->shdw_pg_mode == NESTED_PAGING) {
288         // Flush the TLB on entries/exits
289         ctrl_area->TLB_CONTROL = 1;
290         ctrl_area->guest_ASID = 1;
291
292         // Enable Nested Paging
293         ctrl_area->NP_ENABLE = 1;
294
295         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
296
297         // Set the Nested Page Table pointer
298         if (v3_init_passthrough_pts(core) == -1) {
299             PrintError("Could not initialize Nested page tables\n");
300             return ;
301         }
302
303         ctrl_area->N_CR3 = core->direct_map_pt;
304
305         guest_state->g_pat = 0x7040600070406ULL;
306     }
307     
308     /* tell the guest that we don't support SVM */
309     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
310         &v3_handle_vm_cr_read,
311         &v3_handle_vm_cr_write, 
312         core);
313
314
315     {
316 #define INT_PENDING_AMD_MSR             0xc0010055
317
318         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
319         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
320         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
321         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
322         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
323
324         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
325         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
326         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
327
328
329         v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
330         v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
331
332         // Passthrough read operations are ok.
333         v3_hook_msr(core->vm_info, INT_PENDING_AMD_MSR, NULL, v3_msr_unhandled_write, NULL);
334     }
335 }
336
337
338 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
339
340     PrintDebug("Allocating VMCB\n");
341     core->vmm_data = (void *)Allocate_VMCB();
342     
343     if (core->vmm_data == NULL) {
344         PrintError("Could not allocate VMCB, Exiting...\n");
345         return -1;
346     }
347
348     if (vm_class == V3_PC_VM) {
349         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
350         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
351     } else {
352         PrintError("Invalid VM class\n");
353         return -1;
354     }
355
356     core->core_run_state = CORE_STOPPED;
357
358     return 0;
359 }
360
361
362 int v3_deinit_svm_vmcb(struct guest_info * core) {
363     V3_FreePages(V3_PAddr(core->vmm_data), 1);
364     return 0;
365 }
366
367
368 #ifdef V3_CONFIG_CHECKPOINT
369 int v3_svm_save_core(struct guest_info * core, void * ctx){
370
371     v3_chkpt_save_8(ctx, "cpl", &(core->cpl));
372     v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data);
373
374     return 0;
375 }
376
377 int v3_svm_load_core(struct guest_info * core, void * ctx){
378     
379     v3_chkpt_load_8(ctx, "cpl", &(core->cpl));
380
381     if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
382         return -1;
383     }
384
385     return 0;
386 }
387 #endif
388
389 static int update_irq_exit_state(struct guest_info * info) {
390     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
391
392     // Fix for QEMU bug using EVENTINJ as an internal cache
393     guest_ctrl->EVENTINJ.valid = 0;
394
395     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
396         
397 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
398         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
399 #endif
400
401         info->intr_core_state.irq_started = 1;
402         info->intr_core_state.irq_pending = 0;
403
404         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
405     }
406
407     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
408 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
409         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
410 #endif
411
412         // Interrupt was taken fully vectored
413         info->intr_core_state.irq_started = 0;
414
415     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
416 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
417         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
418 #endif
419     }
420
421     return 0;
422 }
423
424
425 static int update_irq_entry_state(struct guest_info * info) {
426     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
427
428
429     if (info->intr_core_state.irq_pending == 0) {
430         guest_ctrl->guest_ctrl.V_IRQ = 0;
431         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
432     }
433     
434     if (v3_excp_pending(info)) {
435         uint_t excp = v3_get_excp_number(info);
436         
437         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
438         
439         if (info->excp_state.excp_error_code_valid) {
440             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
441             guest_ctrl->EVENTINJ.ev = 1;
442 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
443             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
444 #endif
445         }
446         
447         guest_ctrl->EVENTINJ.vector = excp;
448         
449         guest_ctrl->EVENTINJ.valid = 1;
450
451 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
452         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
453                    (int)info->num_exits, 
454                    guest_ctrl->EVENTINJ.vector, 
455                    (void *)(addr_t)info->ctrl_regs.cr2,
456                    (void *)(addr_t)info->rip);
457 #endif
458
459         v3_injecting_excp(info, excp);
460     } else if (info->intr_core_state.irq_started == 1) {
461 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
462         PrintDebug("IRQ pending from previous injection\n");
463 #endif
464         guest_ctrl->guest_ctrl.V_IRQ = 1;
465         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
466         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
467         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
468
469     } else {
470         switch (v3_intr_pending(info)) {
471             case V3_EXTERNAL_IRQ: {
472                 uint32_t irq = v3_get_intr(info);
473
474                 guest_ctrl->guest_ctrl.V_IRQ = 1;
475                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
476                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
477                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
478
479 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
480                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
481                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
482                            (void *)(addr_t)info->rip);
483 #endif
484
485                 info->intr_core_state.irq_pending = 1;
486                 info->intr_core_state.irq_vector = irq;
487                 
488                 break;
489             }
490             case V3_NMI:
491                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
492                 break;
493             case V3_SOFTWARE_INTR:
494                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
495
496 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
497                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
498                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
499 #endif
500                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
501                 guest_ctrl->EVENTINJ.valid = 1;
502             
503                 /* reset swintr state */
504                 info->intr_core_state.swintr_posted = 0;
505                 info->intr_core_state.swintr_vector = 0;
506                 
507                 break;
508             case V3_VIRTUAL_IRQ:
509                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
510                 break;
511
512             case V3_INVALID_INTR:
513             default:
514                 break;
515         }
516         
517     }
518
519     return 0;
520 }
521
522 int 
523 v3_svm_config_tsc_virtualization(struct guest_info * info) {
524     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
525
526     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
527         ctrl_area->instrs.RDTSC = 1;
528         ctrl_area->svm_instrs.RDTSCP = 1;
529     } else {
530         ctrl_area->instrs.RDTSC = 0;
531         ctrl_area->svm_instrs.RDTSCP = 0;
532         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
533                 ctrl_area->TSC_OFFSET = 0;
534         } else {
535                 ctrl_area->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
536         }
537     }
538     return 0;
539 }
540
541 /* 
542  * CAUTION and DANGER!!! 
543  * 
544  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
545  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
546  * on its contents will cause things to break. The contents at the time of the exit WILL 
547  * change before the exit handler is executed.
548  */
549 int v3_svm_enter(struct guest_info * info) {
550     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
551     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
552     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
553     uint64_t guest_cycles = 0;
554
555     // Conditionally yield the CPU if the timeslice has expired
556     v3_yield_cond(info);
557
558     // Update timer devices after being in the VM before doing 
559     // IRQ updates, so that any interrupts they raise get seen 
560     // immediately.
561     v3_advance_time(info, NULL);
562     v3_update_timers(info);
563
564     // disable global interrupts for vm state transition
565     v3_clgi();
566
567     // Synchronize the guest state to the VMCB
568     guest_state->cr0 = info->ctrl_regs.cr0;
569     guest_state->cr2 = info->ctrl_regs.cr2;
570     guest_state->cr3 = info->ctrl_regs.cr3;
571     guest_state->cr4 = info->ctrl_regs.cr4;
572     guest_state->dr6 = info->dbg_regs.dr6;
573     guest_state->dr7 = info->dbg_regs.dr7;
574     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
575     guest_state->rflags = info->ctrl_regs.rflags;
576     guest_state->efer = info->ctrl_regs.efer;
577     
578     /* Synchronize MSRs */
579     guest_state->star = info->msrs.star;
580     guest_state->lstar = info->msrs.lstar;
581     guest_state->sfmask = info->msrs.sfmask;
582     guest_state->KernelGsBase = info->msrs.kern_gs_base;
583
584     guest_state->cpl = info->cpl;
585
586     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
587
588     guest_state->rax = info->vm_regs.rax;
589     guest_state->rip = info->rip;
590     guest_state->rsp = info->vm_regs.rsp;
591
592 #ifdef V3_CONFIG_SYMCALL
593     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
594         update_irq_entry_state(info);
595     }
596 #else 
597     update_irq_entry_state(info);
598 #endif
599
600
601     /* ** */
602
603     /*
604       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
605       (void *)(addr_t)info->segments.cs.base, 
606       (void *)(addr_t)info->rip);
607     */
608
609 #ifdef V3_CONFIG_SYMCALL
610     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
611         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
612             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
613         }
614     }
615 #endif
616
617     v3_svm_config_tsc_virtualization(info);
618
619     //V3_Print("Calling v3_svm_launch\n");
620     {   
621         uint64_t entry_tsc = 0;
622         uint64_t exit_tsc = 0;
623         
624         rdtscll(entry_tsc);
625
626         v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
627
628         rdtscll(exit_tsc);
629
630         guest_cycles = exit_tsc - entry_tsc;
631     }
632
633
634     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
635
636     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
637
638     v3_advance_time(info, &guest_cycles);
639
640     info->num_exits++;
641
642     // Save Guest state from VMCB
643     info->rip = guest_state->rip;
644     info->vm_regs.rsp = guest_state->rsp;
645     info->vm_regs.rax = guest_state->rax;
646
647     info->cpl = guest_state->cpl;
648
649     info->ctrl_regs.cr0 = guest_state->cr0;
650     info->ctrl_regs.cr2 = guest_state->cr2;
651     info->ctrl_regs.cr3 = guest_state->cr3;
652     info->ctrl_regs.cr4 = guest_state->cr4;
653     info->dbg_regs.dr6 = guest_state->dr6;
654     info->dbg_regs.dr7 = guest_state->dr7;
655     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
656     info->ctrl_regs.rflags = guest_state->rflags;
657     info->ctrl_regs.efer = guest_state->efer;
658     
659     /* Synchronize MSRs */
660     info->msrs.star =  guest_state->star;
661     info->msrs.lstar = guest_state->lstar;
662     info->msrs.sfmask = guest_state->sfmask;
663     info->msrs.kern_gs_base = guest_state->KernelGsBase;
664
665     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
666     info->cpu_mode = v3_get_vm_cpu_mode(info);
667     info->mem_mode = v3_get_vm_mem_mode(info);
668     /* ** */
669
670     // save exit info here
671     exit_code = guest_ctrl->exit_code;
672     exit_info1 = guest_ctrl->exit_info1;
673     exit_info2 = guest_ctrl->exit_info2;
674
675 #ifdef V3_CONFIG_SYMCALL
676     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
677         update_irq_exit_state(info);
678     }
679 #else
680     update_irq_exit_state(info);
681 #endif
682
683     // reenable global interrupts after vm exit
684     v3_stgi();
685  
686     // Conditionally yield the CPU if the timeslice has expired
687     v3_yield_cond(info);
688
689     // This update timers is for time-dependent handlers
690     // if we're slaved to host time
691     v3_advance_time(info, NULL);
692     v3_update_timers(info);
693
694     {
695         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
696         
697         if (ret != 0) {
698             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
699             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
700             return -1;
701         }
702     }
703
704     if (info->timeouts.timeout_active) {
705         /* Check to see if any timeouts have expired */
706         v3_handle_timeouts(info, guest_cycles);
707     }
708
709
710     return 0;
711 }
712
713
714 int v3_start_svm_guest(struct guest_info * info) {
715     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
716     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
717
718     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
719
720     if (info->vcpu_id == 0) {
721         info->core_run_state = CORE_RUNNING;
722     } else  { 
723         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
724
725         while (info->core_run_state == CORE_STOPPED) {
726             
727             if (info->vm_info->run_state == VM_STOPPED) {
728                 // The VM was stopped before this core was initialized. 
729                 return 0;
730             }
731
732             v3_yield(info);
733             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
734         }
735
736         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
737
738         // We'll be paranoid about race conditions here
739         v3_wait_at_barrier(info);
740     } 
741
742     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
743                info->vcpu_id, info->pcpu_id, 
744                info->segments.cs.selector, (void *)(info->segments.cs.base), 
745                info->segments.cs.limit, (void *)(info->rip));
746
747
748
749     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
750                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
751     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
752     
753     v3_start_time(info);
754
755     while (1) {
756
757         if (info->vm_info->run_state == VM_STOPPED) {
758             info->core_run_state = CORE_STOPPED;
759             break;
760         }
761         
762         if (v3_svm_enter(info) == -1) {
763             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
764             addr_t host_addr;
765             addr_t linear_addr = 0;
766             
767             info->vm_info->run_state = VM_ERROR;
768             
769             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
770             
771             v3_print_guest_state(info);
772             
773             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
774             
775             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
776             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
777             
778             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
779             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
780             
781             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
782             
783             if (info->mem_mode == PHYSICAL_MEM) {
784                 v3_gpa_to_hva(info, linear_addr, &host_addr);
785             } else if (info->mem_mode == VIRTUAL_MEM) {
786                 v3_gva_to_hva(info, linear_addr, &host_addr);
787             }
788             
789             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
790             
791             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
792             v3_dump_mem((uint8_t *)host_addr, 15);
793             
794             v3_print_stack(info);
795
796             break;
797         }
798
799         v3_wait_at_barrier(info);
800
801
802         if (info->vm_info->run_state == VM_STOPPED) {
803             info->core_run_state = CORE_STOPPED;
804             break;
805         }
806
807         
808
809 /*
810         if ((info->num_exits % 50000) == 0) {
811             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
812             v3_print_guest_state(info);
813         }
814 */
815         
816     }
817
818     // Need to take down the other cores on error... 
819
820     return 0;
821 }
822
823
824
825
826 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
827     // init vmcb_bios
828
829     // Write the RIP, CS, and descriptor
830     // assume the rest is already good to go
831     //
832     // vector VV -> rip at 0
833     //              CS = VV00
834     //  This means we start executing at linear address VV000
835     //
836     // So the selector needs to be VV00
837     // and the base needs to be VV000
838     //
839     core->rip = 0;
840     core->segments.cs.selector = rip << 8;
841     core->segments.cs.limit = 0xffff;
842     core->segments.cs.base = rip << 12;
843
844     return 0;
845 }
846
847
848
849
850
851
852 /* Checks machine SVM capability */
853 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
854 int v3_is_svm_capable() {
855     uint_t vm_cr_low = 0, vm_cr_high = 0;
856     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
857
858     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
859   
860     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
861
862     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
863       V3_Print("SVM Not Available\n");
864       return 0;
865     }  else {
866         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
867         
868         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
869         
870         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
871             V3_Print("SVM is available but is disabled.\n");
872             
873             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
874             
875             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
876             
877             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
878                 V3_Print("SVM BIOS Disabled, not unlockable\n");
879             } else {
880                 V3_Print("SVM is locked with a key\n");
881             }
882             return 0;
883
884         } else {
885             V3_Print("SVM is available and  enabled.\n");
886
887             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
888             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
889             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
890             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
891             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
892
893             return 1;
894         }
895     }
896 }
897
898 static int has_svm_nested_paging() {
899     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
900     
901     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
902     
903     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
904     
905     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
906         V3_Print("SVM Nested Paging not supported\n");
907         return 0;
908     } else {
909         V3_Print("SVM Nested Paging supported\n");
910         return 1;
911     }
912  }
913  
914
915
916 void v3_init_svm_cpu(int cpu_id) {
917     reg_ex_t msr;
918     extern v3_cpu_arch_t v3_cpu_types[];
919
920     // Enable SVM on the CPU
921     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
922     msr.e_reg.low |= EFER_MSR_svm_enable;
923     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
924
925     V3_Print("SVM Enabled\n");
926
927     // Setup the host state save area
928     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
929
930     /* 64-BIT-ISSUE */
931     //  msr.e_reg.high = 0;
932     //msr.e_reg.low = (uint_t)host_vmcb;
933     msr.r_reg = host_vmcbs[cpu_id];
934
935     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
936     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
937
938
939     if (has_svm_nested_paging() == 1) {
940         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
941     } else {
942         v3_cpu_types[cpu_id] = V3_SVM_CPU;
943     }
944 }
945
946
947
948 void v3_deinit_svm_cpu(int cpu_id) {
949     reg_ex_t msr;
950     extern v3_cpu_arch_t v3_cpu_types[];
951
952     // reset SVM_VM_HSAVE_PA_MSR
953     // Does setting it to NULL disable??
954     msr.r_reg = 0;
955     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
956
957     // Disable SVM?
958     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
959     msr.e_reg.low &= ~EFER_MSR_svm_enable;
960     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
961
962     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
963
964     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
965
966     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
967     return;
968 }
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019 #if 0
1020 /* 
1021  * Test VMSAVE/VMLOAD Latency 
1022  */
1023 #define vmsave ".byte 0x0F,0x01,0xDB ; "
1024 #define vmload ".byte 0x0F,0x01,0xDA ; "
1025 {
1026     uint32_t start_lo, start_hi;
1027     uint32_t end_lo, end_hi;
1028     uint64_t start, end;
1029     
1030     __asm__ __volatile__ (
1031                           "rdtsc ; "
1032                           "movl %%eax, %%esi ; "
1033                           "movl %%edx, %%edi ; "
1034                           "movq  %%rcx, %%rax ; "
1035                           vmsave
1036                           "rdtsc ; "
1037                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1038                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1039                           );
1040     
1041     start = start_hi;
1042     start <<= 32;
1043     start += start_lo;
1044     
1045     end = end_hi;
1046     end <<= 32;
1047     end += end_lo;
1048     
1049     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1050     
1051     __asm__ __volatile__ (
1052                           "rdtsc ; "
1053                           "movl %%eax, %%esi ; "
1054                           "movl %%edx, %%edi ; "
1055                           "movq  %%rcx, %%rax ; "
1056                           vmload
1057                           "rdtsc ; "
1058                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1059                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1060                               );
1061         
1062         start = start_hi;
1063         start <<= 32;
1064         start += start_lo;
1065
1066         end = end_hi;
1067         end <<= 32;
1068         end += end_lo;
1069
1070
1071         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1072     }
1073     /* End Latency Test */
1074
1075 #endif
1076
1077
1078
1079
1080
1081
1082
1083 #if 0
1084 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1085   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1086   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1087   uint_t i = 0;
1088
1089
1090   guest_state->rsp = vm_info.vm_regs.rsp;
1091   guest_state->rip = vm_info.rip;
1092
1093
1094   /* I pretty much just gutted this from TVMM */
1095   /* Note: That means its probably wrong */
1096
1097   // set the segment registers to mirror ours
1098   guest_state->cs.selector = 1<<3;
1099   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1100   guest_state->cs.attrib.fields.S = 1;
1101   guest_state->cs.attrib.fields.P = 1;
1102   guest_state->cs.attrib.fields.db = 1;
1103   guest_state->cs.attrib.fields.G = 1;
1104   guest_state->cs.limit = 0xfffff;
1105   guest_state->cs.base = 0;
1106   
1107   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1108   for ( i = 0; segregs[i] != NULL; i++) {
1109     struct vmcb_selector * seg = segregs[i];
1110     
1111     seg->selector = 2<<3;
1112     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1113     seg->attrib.fields.S = 1;
1114     seg->attrib.fields.P = 1;
1115     seg->attrib.fields.db = 1;
1116     seg->attrib.fields.G = 1;
1117     seg->limit = 0xfffff;
1118     seg->base = 0;
1119   }
1120
1121
1122   {
1123     /* JRL THIS HAS TO GO */
1124     
1125     //    guest_state->tr.selector = GetTR_Selector();
1126     guest_state->tr.attrib.fields.type = 0x9; 
1127     guest_state->tr.attrib.fields.P = 1;
1128     // guest_state->tr.limit = GetTR_Limit();
1129     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1130     /* ** */
1131   }
1132
1133
1134   /* ** */
1135
1136
1137   guest_state->efer |= EFER_MSR_svm_enable;
1138   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1139   ctrl_area->svm_instrs.VMRUN = 1;
1140   guest_state->cr0 = 0x00000001;    // PE 
1141   ctrl_area->guest_ASID = 1;
1142
1143
1144   //  guest_state->cpl = 0;
1145
1146
1147
1148   // Setup exits
1149
1150   ctrl_area->cr_writes.cr4 = 1;
1151   
1152   ctrl_area->exceptions.de = 1;
1153   ctrl_area->exceptions.df = 1;
1154   ctrl_area->exceptions.pf = 1;
1155   ctrl_area->exceptions.ts = 1;
1156   ctrl_area->exceptions.ss = 1;
1157   ctrl_area->exceptions.ac = 1;
1158   ctrl_area->exceptions.mc = 1;
1159   ctrl_area->exceptions.gp = 1;
1160   ctrl_area->exceptions.ud = 1;
1161   ctrl_area->exceptions.np = 1;
1162   ctrl_area->exceptions.of = 1;
1163   ctrl_area->exceptions.nmi = 1;
1164
1165   
1166
1167   ctrl_area->instrs.IOIO_PROT = 1;
1168   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1169   
1170   {
1171     reg_ex_t tmp_reg;
1172     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1173     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1174   }
1175
1176   ctrl_area->instrs.INTR = 1;
1177
1178   
1179   {
1180     char gdt_buf[6];
1181     char idt_buf[6];
1182
1183     memset(gdt_buf, 0, 6);
1184     memset(idt_buf, 0, 6);
1185
1186
1187     uint_t gdt_base, idt_base;
1188     ushort_t gdt_limit, idt_limit;
1189     
1190     GetGDTR(gdt_buf);
1191     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1192     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1193     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1194
1195     GetIDTR(idt_buf);
1196     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1197     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1198     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1199
1200
1201     // gdt_base -= 0x2000;
1202     //idt_base -= 0x2000;
1203
1204     guest_state->gdtr.base = gdt_base;
1205     guest_state->gdtr.limit = gdt_limit;
1206     guest_state->idtr.base = idt_base;
1207     guest_state->idtr.limit = idt_limit;
1208
1209
1210   }
1211   
1212   
1213   // also determine if CPU supports nested paging
1214   /*
1215   if (vm_info.page_tables) {
1216     //   if (0) {
1217     // Flush the TLB on entries/exits
1218     ctrl_area->TLB_CONTROL = 1;
1219
1220     // Enable Nested Paging
1221     ctrl_area->NP_ENABLE = 1;
1222
1223     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1224
1225         // Set the Nested Page Table pointer
1226     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1227
1228
1229     //   ctrl_area->N_CR3 = Get_CR3();
1230     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1231
1232     guest_state->g_pat = 0x7040600070406ULL;
1233
1234     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1235     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1236     // Enable Paging
1237     //    guest_state->cr0 |= 0x80000000;
1238   }
1239   */
1240
1241 }
1242
1243
1244
1245
1246
1247 #endif
1248
1249