Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Fixed merge conflict in vmm_halt.c
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
42 #endif
43
44 #include <palacios/vmm_direct_paging.h>
45
46 #include <palacios/vmm_ctrl_regs.h>
47 #include <palacios/svm_io.h>
48
49 #include <palacios/vmm_sprintf.h>
50
51
52 #ifndef V3_CONFIG_DEBUG_SVM
53 #undef PrintDebug
54 #define PrintDebug(fmt, args...)
55 #endif
56
57
58 uint32_t v3_last_exit;
59
60 // This is a global pointer to the host's VMCB
61 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
62
63
64
65 extern void v3_stgi();
66 extern void v3_clgi();
67 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
68 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
69
70
71 static vmcb_t * Allocate_VMCB() {
72     vmcb_t * vmcb_page = NULL;
73     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
74
75     if ((void *)vmcb_pa == NULL) {
76         PrintError("Error allocating VMCB\n");
77         return NULL;
78     }
79
80     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
81
82     memset(vmcb_page, 0, 4096);
83
84     return vmcb_page;
85 }
86
87
88 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
89 {
90     int status;
91
92     // Call arch-independent handler
93     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
94         return status;
95     }
96
97     // SVM-specific code
98     {
99         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
100         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
101         hw_efer->svme = 1;
102     }
103
104     return 0;
105 }
106
107
108 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
109     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
110     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
111     uint_t i;
112
113
114     //
115     ctrl_area->svm_instrs.VMRUN = 1;
116     ctrl_area->svm_instrs.VMMCALL = 1;
117     ctrl_area->svm_instrs.VMLOAD = 1;
118     ctrl_area->svm_instrs.VMSAVE = 1;
119     ctrl_area->svm_instrs.STGI = 1;
120     ctrl_area->svm_instrs.CLGI = 1;
121     ctrl_area->svm_instrs.SKINIT = 1;
122     ctrl_area->svm_instrs.ICEBP = 1;
123     ctrl_area->svm_instrs.WBINVD = 1;
124     ctrl_area->svm_instrs.MONITOR = 1;
125     ctrl_area->svm_instrs.MWAIT_always = 1;
126     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
127     ctrl_area->instrs.INVLPGA = 1;
128     ctrl_area->instrs.CPUID = 1;
129
130     ctrl_area->instrs.HLT = 1;
131
132     /* Set at VMM launch as needed */
133     ctrl_area->instrs.RDTSC = 0;
134     ctrl_area->svm_instrs.RDTSCP = 0;
135
136     // guest_state->cr0 = 0x00000001;    // PE 
137   
138     /*
139       ctrl_area->exceptions.de = 1;
140       ctrl_area->exceptions.df = 1;
141       
142       ctrl_area->exceptions.ts = 1;
143       ctrl_area->exceptions.ss = 1;
144       ctrl_area->exceptions.ac = 1;
145       ctrl_area->exceptions.mc = 1;
146       ctrl_area->exceptions.gp = 1;
147       ctrl_area->exceptions.ud = 1;
148       ctrl_area->exceptions.np = 1;
149       ctrl_area->exceptions.of = 1;
150       
151       ctrl_area->exceptions.nmi = 1;
152     */
153     
154
155     ctrl_area->instrs.NMI = 1;
156     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
157     ctrl_area->instrs.INIT = 1;
158     //    ctrl_area->instrs.PAUSE = 1;
159     ctrl_area->instrs.shutdown_evts = 1;
160
161
162     /* DEBUG FOR RETURN CODE */
163     ctrl_area->exit_code = 1;
164
165
166     /* Setup Guest Machine state */
167
168     core->vm_regs.rsp = 0x00;
169     core->rip = 0xfff0;
170
171     core->vm_regs.rdx = 0x00000f00;
172
173
174     core->cpl = 0;
175
176     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
177     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
178     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
179
180
181
182
183
184     core->segments.cs.selector = 0xf000;
185     core->segments.cs.limit = 0xffff;
186     core->segments.cs.base = 0x0000000f0000LL;
187
188     // (raw attributes = 0xf3)
189     core->segments.cs.type = 0x3;
190     core->segments.cs.system = 0x1;
191     core->segments.cs.dpl = 0x3;
192     core->segments.cs.present = 1;
193
194
195
196     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
197                                       &(core->segments.es), &(core->segments.fs), 
198                                       &(core->segments.gs), NULL};
199
200     for ( i = 0; segregs[i] != NULL; i++) {
201         struct v3_segment * seg = segregs[i];
202         
203         seg->selector = 0x0000;
204         //    seg->base = seg->selector << 4;
205         seg->base = 0x00000000;
206         seg->limit = ~0u;
207
208         // (raw attributes = 0xf3)
209         seg->type = 0x3;
210         seg->system = 0x1;
211         seg->dpl = 0x3;
212         seg->present = 1;
213     }
214
215     core->segments.gdtr.limit = 0x0000ffff;
216     core->segments.gdtr.base = 0x0000000000000000LL;
217     core->segments.idtr.limit = 0x0000ffff;
218     core->segments.idtr.base = 0x0000000000000000LL;
219
220     core->segments.ldtr.selector = 0x0000;
221     core->segments.ldtr.limit = 0x0000ffff;
222     core->segments.ldtr.base = 0x0000000000000000LL;
223     core->segments.tr.selector = 0x0000;
224     core->segments.tr.limit = 0x0000ffff;
225     core->segments.tr.base = 0x0000000000000000LL;
226
227
228     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
229     core->dbg_regs.dr7 = 0x0000000000000400LL;
230
231
232     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
233     ctrl_area->instrs.IOIO_PROT = 1;
234             
235     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
236     ctrl_area->instrs.MSR_PROT = 1;   
237
238
239     PrintDebug("Exiting on interrupts\n");
240     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
241     ctrl_area->instrs.INTR = 1;
242
243
244     v3_hook_msr(core->vm_info, EFER_MSR, 
245                 &v3_handle_efer_read,
246                 &v3_svm_handle_efer_write, 
247                 core);
248
249     if (core->shdw_pg_mode == SHADOW_PAGING) {
250         PrintDebug("Creating initial shadow page table\n");
251         
252         /* JRL: This is a performance killer, and a simplistic solution */
253         /* We need to fix this */
254         ctrl_area->TLB_CONTROL = 1;
255         ctrl_area->guest_ASID = 1;
256         
257         
258         if (v3_init_passthrough_pts(core) == -1) {
259             PrintError("Could not initialize passthrough page tables\n");
260             return ;
261         }
262
263
264         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
265         PrintDebug("Created\n");
266         
267         core->ctrl_regs.cr0 |= 0x80000000;
268         core->ctrl_regs.cr3 = core->direct_map_pt;
269
270         ctrl_area->cr_reads.cr0 = 1;
271         ctrl_area->cr_writes.cr0 = 1;
272         //ctrl_area->cr_reads.cr4 = 1;
273         ctrl_area->cr_writes.cr4 = 1;
274         ctrl_area->cr_reads.cr3 = 1;
275         ctrl_area->cr_writes.cr3 = 1;
276
277
278
279         ctrl_area->instrs.INVLPG = 1;
280
281         ctrl_area->exceptions.pf = 1;
282
283         guest_state->g_pat = 0x7040600070406ULL;
284
285
286
287     } else if (core->shdw_pg_mode == NESTED_PAGING) {
288         // Flush the TLB on entries/exits
289         ctrl_area->TLB_CONTROL = 1;
290         ctrl_area->guest_ASID = 1;
291
292         // Enable Nested Paging
293         ctrl_area->NP_ENABLE = 1;
294
295         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
296
297         // Set the Nested Page Table pointer
298         if (v3_init_passthrough_pts(core) == -1) {
299             PrintError("Could not initialize Nested page tables\n");
300             return ;
301         }
302
303         ctrl_area->N_CR3 = core->direct_map_pt;
304
305         guest_state->g_pat = 0x7040600070406ULL;
306     }
307     
308     /* tell the guest that we don't support SVM */
309     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
310         &v3_handle_vm_cr_read,
311         &v3_handle_vm_cr_write, 
312         core);
313
314
315     {
316 #define INT_PENDING_AMD_MSR             0xc0010055
317
318         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
319         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
320         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
321         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
322         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
323
324         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
325         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
326         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
327
328
329         v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
330         v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
331
332         // Passthrough read operations are ok.
333         v3_hook_msr(core->vm_info, INT_PENDING_AMD_MSR, NULL, v3_msr_unhandled_write, NULL);
334     }
335 }
336
337
338 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
339
340     PrintDebug("Allocating VMCB\n");
341     core->vmm_data = (void *)Allocate_VMCB();
342     
343     if (core->vmm_data == NULL) {
344         PrintError("Could not allocate VMCB, Exiting...\n");
345         return -1;
346     }
347
348     if (vm_class == V3_PC_VM) {
349         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
350         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
351     } else {
352         PrintError("Invalid VM class\n");
353         return -1;
354     }
355
356     return 0;
357 }
358
359
360 int v3_deinit_svm_vmcb(struct guest_info * core) {
361     V3_FreePages(V3_PAddr(core->vmm_data), 1);
362     return 0;
363 }
364
365
366 #ifdef V3_CONFIG_CHECKPOINT
367 int v3_svm_save_core(struct guest_info * core, void * ctx){
368
369     v3_chkpt_save_8(ctx, "cpl", &(core->cpl));
370     v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data);
371
372     return 0;
373 }
374
375 int v3_svm_load_core(struct guest_info * core, void * ctx){
376     
377     v3_chkpt_load_8(ctx, "cpl", &(core->cpl));
378
379     if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
380         return -1;
381     }
382
383     return 0;
384 }
385 #endif
386
387 static int update_irq_exit_state(struct guest_info * info) {
388     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
389
390     // Fix for QEMU bug using EVENTINJ as an internal cache
391     guest_ctrl->EVENTINJ.valid = 0;
392
393     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
394         
395 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
396         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
397 #endif
398
399         info->intr_core_state.irq_started = 1;
400         info->intr_core_state.irq_pending = 0;
401
402         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
403     }
404
405     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
406 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
407         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
408 #endif
409
410         // Interrupt was taken fully vectored
411         info->intr_core_state.irq_started = 0;
412
413     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
414 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
415         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
416 #endif
417     }
418
419     return 0;
420 }
421
422
423 static int update_irq_entry_state(struct guest_info * info) {
424     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
425
426
427     if (info->intr_core_state.irq_pending == 0) {
428         guest_ctrl->guest_ctrl.V_IRQ = 0;
429         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
430     }
431     
432     if (v3_excp_pending(info)) {
433         uint_t excp = v3_get_excp_number(info);
434         
435         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
436         
437         if (info->excp_state.excp_error_code_valid) {
438             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
439             guest_ctrl->EVENTINJ.ev = 1;
440 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
441             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
442 #endif
443         }
444         
445         guest_ctrl->EVENTINJ.vector = excp;
446         
447         guest_ctrl->EVENTINJ.valid = 1;
448
449 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
450         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
451                    (int)info->num_exits, 
452                    guest_ctrl->EVENTINJ.vector, 
453                    (void *)(addr_t)info->ctrl_regs.cr2,
454                    (void *)(addr_t)info->rip);
455 #endif
456
457         v3_injecting_excp(info, excp);
458     } else if (info->intr_core_state.irq_started == 1) {
459 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
460         PrintDebug("IRQ pending from previous injection\n");
461 #endif
462         guest_ctrl->guest_ctrl.V_IRQ = 1;
463         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
464         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
465         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
466
467     } else {
468         switch (v3_intr_pending(info)) {
469             case V3_EXTERNAL_IRQ: {
470                 uint32_t irq = v3_get_intr(info);
471
472                 guest_ctrl->guest_ctrl.V_IRQ = 1;
473                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
474                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
475                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
476
477 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
478                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
479                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
480                            (void *)(addr_t)info->rip);
481 #endif
482
483                 info->intr_core_state.irq_pending = 1;
484                 info->intr_core_state.irq_vector = irq;
485                 
486                 break;
487             }
488             case V3_NMI:
489                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
490                 break;
491             case V3_SOFTWARE_INTR:
492                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
493
494 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
495                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
496                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
497 #endif
498                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
499                 guest_ctrl->EVENTINJ.valid = 1;
500             
501                 /* reset swintr state */
502                 info->intr_core_state.swintr_posted = 0;
503                 info->intr_core_state.swintr_vector = 0;
504                 
505                 break;
506             case V3_VIRTUAL_IRQ:
507                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
508                 break;
509
510             case V3_INVALID_INTR:
511             default:
512                 break;
513         }
514         
515     }
516
517     return 0;
518 }
519
520 int 
521 v3_svm_config_tsc_virtualization(struct guest_info * info) {
522     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
523
524     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
525         ctrl_area->instrs.RDTSC = 1;
526         ctrl_area->svm_instrs.RDTSCP = 1;
527     } else {
528         ctrl_area->instrs.RDTSC = 0;
529         ctrl_area->svm_instrs.RDTSCP = 0;
530         ctrl_area->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
531     }
532     return 0;
533 }
534
535 /* 
536  * CAUTION and DANGER!!! 
537  * 
538  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
539  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
540  * on its contents will cause things to break. The contents at the time of the exit WILL 
541  * change before the exit handler is executed.
542  */
543 int v3_svm_enter(struct guest_info * info) {
544     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
545     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
546     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
547     uint64_t guest_cycles = 0;
548
549     // Conditionally yield the CPU if the timeslice has expired
550     v3_yield_cond(info);
551
552     // Update timer devices after being in the VM before doing 
553     // IRQ updates, so that any interrupts they raise get seen 
554     // immediately.
555     v3_advance_time(info, NULL);
556     v3_update_timers(info);
557
558     // disable global interrupts for vm state transition
559     v3_clgi();
560
561     // Synchronize the guest state to the VMCB
562     guest_state->cr0 = info->ctrl_regs.cr0;
563     guest_state->cr2 = info->ctrl_regs.cr2;
564     guest_state->cr3 = info->ctrl_regs.cr3;
565     guest_state->cr4 = info->ctrl_regs.cr4;
566     guest_state->dr6 = info->dbg_regs.dr6;
567     guest_state->dr7 = info->dbg_regs.dr7;
568     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
569     guest_state->rflags = info->ctrl_regs.rflags;
570     guest_state->efer = info->ctrl_regs.efer;
571     
572     /* Synchronize MSRs */
573     guest_state->star = info->msrs.star;
574     guest_state->lstar = info->msrs.lstar;
575     guest_state->sfmask = info->msrs.sfmask;
576     guest_state->KernelGsBase = info->msrs.kern_gs_base;
577
578     guest_state->cpl = info->cpl;
579
580     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
581
582     guest_state->rax = info->vm_regs.rax;
583     guest_state->rip = info->rip;
584     guest_state->rsp = info->vm_regs.rsp;
585
586 #ifdef V3_CONFIG_SYMCALL
587     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
588         update_irq_entry_state(info);
589     }
590 #else 
591     update_irq_entry_state(info);
592 #endif
593
594
595     /* ** */
596
597     /*
598       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
599       (void *)(addr_t)info->segments.cs.base, 
600       (void *)(addr_t)info->rip);
601     */
602
603 #ifdef V3_CONFIG_SYMCALL
604     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
605         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
606             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
607         }
608     }
609 #endif
610
611     v3_svm_config_tsc_virtualization(info);
612
613     //V3_Print("Calling v3_svm_launch\n");
614     {   
615         uint64_t entry_tsc = 0;
616         uint64_t exit_tsc = 0;
617         
618         rdtscll(entry_tsc);
619
620         v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
621
622         rdtscll(exit_tsc);
623
624         guest_cycles = exit_tsc - entry_tsc;
625     }
626
627
628     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
629
630     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
631
632     v3_advance_time(info, &guest_cycles);
633
634     info->num_exits++;
635
636     // Save Guest state from VMCB
637     info->rip = guest_state->rip;
638     info->vm_regs.rsp = guest_state->rsp;
639     info->vm_regs.rax = guest_state->rax;
640
641     info->cpl = guest_state->cpl;
642
643     info->ctrl_regs.cr0 = guest_state->cr0;
644     info->ctrl_regs.cr2 = guest_state->cr2;
645     info->ctrl_regs.cr3 = guest_state->cr3;
646     info->ctrl_regs.cr4 = guest_state->cr4;
647     info->dbg_regs.dr6 = guest_state->dr6;
648     info->dbg_regs.dr7 = guest_state->dr7;
649     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
650     info->ctrl_regs.rflags = guest_state->rflags;
651     info->ctrl_regs.efer = guest_state->efer;
652     
653     /* Synchronize MSRs */
654     info->msrs.star =  guest_state->star;
655     info->msrs.lstar = guest_state->lstar;
656     info->msrs.sfmask = guest_state->sfmask;
657     info->msrs.kern_gs_base = guest_state->KernelGsBase;
658
659     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
660     info->cpu_mode = v3_get_vm_cpu_mode(info);
661     info->mem_mode = v3_get_vm_mem_mode(info);
662     /* ** */
663
664     // save exit info here
665     exit_code = guest_ctrl->exit_code;
666     exit_info1 = guest_ctrl->exit_info1;
667     exit_info2 = guest_ctrl->exit_info2;
668
669 #ifdef V3_CONFIG_SYMCALL
670     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
671         update_irq_exit_state(info);
672     }
673 #else
674     update_irq_exit_state(info);
675 #endif
676
677     // reenable global interrupts after vm exit
678     v3_stgi();
679  
680     // Conditionally yield the CPU if the timeslice has expired
681     v3_yield_cond(info);
682
683     // This update timers is for time-dependent handlers
684     // if we're slaved to host time
685     v3_advance_time(info, NULL);
686     v3_update_timers(info);
687
688     {
689         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
690         
691         if (ret != 0) {
692             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
693             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
694             return -1;
695         }
696     }
697
698     if (info->timeouts.timeout_active) {
699         /* Check to see if any timeouts have expired */
700         v3_handle_timeouts(info, guest_cycles);
701     }
702
703
704     return 0;
705 }
706
707
708 int v3_start_svm_guest(struct guest_info * info) {
709     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
710     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
711
712     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
713
714     if (info->vcpu_id == 0) {
715         info->core_run_state = CORE_RUNNING;
716     } else  { 
717         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
718
719         while (info->core_run_state == CORE_STOPPED) {
720             
721             if (info->vm_info->run_state == VM_STOPPED) {
722                 // The VM was stopped before this core was initialized. 
723                 return 0;
724             }
725
726             v3_yield(info);
727             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
728         }
729
730         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
731
732         // We'll be paranoid about race conditions here
733         v3_wait_at_barrier(info);
734     } 
735
736     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
737                info->vcpu_id, info->pcpu_id, 
738                info->segments.cs.selector, (void *)(info->segments.cs.base), 
739                info->segments.cs.limit, (void *)(info->rip));
740
741
742
743     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
744                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
745     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
746     
747     v3_start_time(info);
748
749     while (1) {
750
751         if (info->vm_info->run_state == VM_STOPPED) {
752             info->core_run_state = CORE_STOPPED;
753             break;
754         }
755         
756         if (v3_svm_enter(info) == -1) {
757             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
758             addr_t host_addr;
759             addr_t linear_addr = 0;
760             
761             info->vm_info->run_state = VM_ERROR;
762             
763             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
764             
765             v3_print_guest_state(info);
766             
767             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
768             
769             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
770             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
771             
772             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
773             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
774             
775             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
776             
777             if (info->mem_mode == PHYSICAL_MEM) {
778                 v3_gpa_to_hva(info, linear_addr, &host_addr);
779             } else if (info->mem_mode == VIRTUAL_MEM) {
780                 v3_gva_to_hva(info, linear_addr, &host_addr);
781             }
782             
783             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
784             
785             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
786             v3_dump_mem((uint8_t *)host_addr, 15);
787             
788             v3_print_stack(info);
789
790             break;
791         }
792
793         v3_wait_at_barrier(info);
794
795
796         if (info->vm_info->run_state == VM_STOPPED) {
797             info->core_run_state = CORE_STOPPED;
798             break;
799         }
800
801         
802
803 /*
804         if ((info->num_exits % 50000) == 0) {
805             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
806             v3_print_guest_state(info);
807         }
808 */
809         
810     }
811
812     // Need to take down the other cores on error... 
813
814     return 0;
815 }
816
817
818
819
820 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
821     // init vmcb_bios
822
823     // Write the RIP, CS, and descriptor
824     // assume the rest is already good to go
825     //
826     // vector VV -> rip at 0
827     //              CS = VV00
828     //  This means we start executing at linear address VV000
829     //
830     // So the selector needs to be VV00
831     // and the base needs to be VV000
832     //
833     core->rip = 0;
834     core->segments.cs.selector = rip << 8;
835     core->segments.cs.limit = 0xffff;
836     core->segments.cs.base = rip << 12;
837
838     return 0;
839 }
840
841
842
843
844
845
846 /* Checks machine SVM capability */
847 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
848 int v3_is_svm_capable() {
849     uint_t vm_cr_low = 0, vm_cr_high = 0;
850     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
851
852     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
853   
854     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
855
856     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
857       V3_Print("SVM Not Available\n");
858       return 0;
859     }  else {
860         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
861         
862         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
863         
864         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
865             V3_Print("SVM is available but is disabled.\n");
866             
867             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
868             
869             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
870             
871             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
872                 V3_Print("SVM BIOS Disabled, not unlockable\n");
873             } else {
874                 V3_Print("SVM is locked with a key\n");
875             }
876             return 0;
877
878         } else {
879             V3_Print("SVM is available and  enabled.\n");
880
881             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
882             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
883             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
884             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
885             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
886
887             return 1;
888         }
889     }
890 }
891
892 static int has_svm_nested_paging() {
893     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
894     
895     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
896     
897     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
898     
899     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
900         V3_Print("SVM Nested Paging not supported\n");
901         return 0;
902     } else {
903         V3_Print("SVM Nested Paging supported\n");
904         return 1;
905     }
906  }
907  
908
909
910 void v3_init_svm_cpu(int cpu_id) {
911     reg_ex_t msr;
912     extern v3_cpu_arch_t v3_cpu_types[];
913
914     // Enable SVM on the CPU
915     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
916     msr.e_reg.low |= EFER_MSR_svm_enable;
917     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
918
919     V3_Print("SVM Enabled\n");
920
921     // Setup the host state save area
922     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
923
924     /* 64-BIT-ISSUE */
925     //  msr.e_reg.high = 0;
926     //msr.e_reg.low = (uint_t)host_vmcb;
927     msr.r_reg = host_vmcbs[cpu_id];
928
929     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
930     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
931
932
933     if (has_svm_nested_paging() == 1) {
934         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
935     } else {
936         v3_cpu_types[cpu_id] = V3_SVM_CPU;
937     }
938 }
939
940
941
942 void v3_deinit_svm_cpu(int cpu_id) {
943     reg_ex_t msr;
944     extern v3_cpu_arch_t v3_cpu_types[];
945
946     // reset SVM_VM_HSAVE_PA_MSR
947     // Does setting it to NULL disable??
948     msr.r_reg = 0;
949     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
950
951     // Disable SVM?
952     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
953     msr.e_reg.low &= ~EFER_MSR_svm_enable;
954     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
955
956     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
957
958     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
959
960     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
961     return;
962 }
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013 #if 0
1014 /* 
1015  * Test VMSAVE/VMLOAD Latency 
1016  */
1017 #define vmsave ".byte 0x0F,0x01,0xDB ; "
1018 #define vmload ".byte 0x0F,0x01,0xDA ; "
1019 {
1020     uint32_t start_lo, start_hi;
1021     uint32_t end_lo, end_hi;
1022     uint64_t start, end;
1023     
1024     __asm__ __volatile__ (
1025                           "rdtsc ; "
1026                           "movl %%eax, %%esi ; "
1027                           "movl %%edx, %%edi ; "
1028                           "movq  %%rcx, %%rax ; "
1029                           vmsave
1030                           "rdtsc ; "
1031                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1032                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1033                           );
1034     
1035     start = start_hi;
1036     start <<= 32;
1037     start += start_lo;
1038     
1039     end = end_hi;
1040     end <<= 32;
1041     end += end_lo;
1042     
1043     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1044     
1045     __asm__ __volatile__ (
1046                           "rdtsc ; "
1047                           "movl %%eax, %%esi ; "
1048                           "movl %%edx, %%edi ; "
1049                           "movq  %%rcx, %%rax ; "
1050                           vmload
1051                           "rdtsc ; "
1052                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1053                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1054                               );
1055         
1056         start = start_hi;
1057         start <<= 32;
1058         start += start_lo;
1059
1060         end = end_hi;
1061         end <<= 32;
1062         end += end_lo;
1063
1064
1065         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1066     }
1067     /* End Latency Test */
1068
1069 #endif
1070
1071
1072
1073
1074
1075
1076
1077 #if 0
1078 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1079   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1080   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1081   uint_t i = 0;
1082
1083
1084   guest_state->rsp = vm_info.vm_regs.rsp;
1085   guest_state->rip = vm_info.rip;
1086
1087
1088   /* I pretty much just gutted this from TVMM */
1089   /* Note: That means its probably wrong */
1090
1091   // set the segment registers to mirror ours
1092   guest_state->cs.selector = 1<<3;
1093   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1094   guest_state->cs.attrib.fields.S = 1;
1095   guest_state->cs.attrib.fields.P = 1;
1096   guest_state->cs.attrib.fields.db = 1;
1097   guest_state->cs.attrib.fields.G = 1;
1098   guest_state->cs.limit = 0xfffff;
1099   guest_state->cs.base = 0;
1100   
1101   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1102   for ( i = 0; segregs[i] != NULL; i++) {
1103     struct vmcb_selector * seg = segregs[i];
1104     
1105     seg->selector = 2<<3;
1106     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1107     seg->attrib.fields.S = 1;
1108     seg->attrib.fields.P = 1;
1109     seg->attrib.fields.db = 1;
1110     seg->attrib.fields.G = 1;
1111     seg->limit = 0xfffff;
1112     seg->base = 0;
1113   }
1114
1115
1116   {
1117     /* JRL THIS HAS TO GO */
1118     
1119     //    guest_state->tr.selector = GetTR_Selector();
1120     guest_state->tr.attrib.fields.type = 0x9; 
1121     guest_state->tr.attrib.fields.P = 1;
1122     // guest_state->tr.limit = GetTR_Limit();
1123     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1124     /* ** */
1125   }
1126
1127
1128   /* ** */
1129
1130
1131   guest_state->efer |= EFER_MSR_svm_enable;
1132   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1133   ctrl_area->svm_instrs.VMRUN = 1;
1134   guest_state->cr0 = 0x00000001;    // PE 
1135   ctrl_area->guest_ASID = 1;
1136
1137
1138   //  guest_state->cpl = 0;
1139
1140
1141
1142   // Setup exits
1143
1144   ctrl_area->cr_writes.cr4 = 1;
1145   
1146   ctrl_area->exceptions.de = 1;
1147   ctrl_area->exceptions.df = 1;
1148   ctrl_area->exceptions.pf = 1;
1149   ctrl_area->exceptions.ts = 1;
1150   ctrl_area->exceptions.ss = 1;
1151   ctrl_area->exceptions.ac = 1;
1152   ctrl_area->exceptions.mc = 1;
1153   ctrl_area->exceptions.gp = 1;
1154   ctrl_area->exceptions.ud = 1;
1155   ctrl_area->exceptions.np = 1;
1156   ctrl_area->exceptions.of = 1;
1157   ctrl_area->exceptions.nmi = 1;
1158
1159   
1160
1161   ctrl_area->instrs.IOIO_PROT = 1;
1162   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1163   
1164   {
1165     reg_ex_t tmp_reg;
1166     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1167     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1168   }
1169
1170   ctrl_area->instrs.INTR = 1;
1171
1172   
1173   {
1174     char gdt_buf[6];
1175     char idt_buf[6];
1176
1177     memset(gdt_buf, 0, 6);
1178     memset(idt_buf, 0, 6);
1179
1180
1181     uint_t gdt_base, idt_base;
1182     ushort_t gdt_limit, idt_limit;
1183     
1184     GetGDTR(gdt_buf);
1185     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1186     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1187     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1188
1189     GetIDTR(idt_buf);
1190     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1191     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1192     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1193
1194
1195     // gdt_base -= 0x2000;
1196     //idt_base -= 0x2000;
1197
1198     guest_state->gdtr.base = gdt_base;
1199     guest_state->gdtr.limit = gdt_limit;
1200     guest_state->idtr.base = idt_base;
1201     guest_state->idtr.limit = idt_limit;
1202
1203
1204   }
1205   
1206   
1207   // also determine if CPU supports nested paging
1208   /*
1209   if (vm_info.page_tables) {
1210     //   if (0) {
1211     // Flush the TLB on entries/exits
1212     ctrl_area->TLB_CONTROL = 1;
1213
1214     // Enable Nested Paging
1215     ctrl_area->NP_ENABLE = 1;
1216
1217     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1218
1219         // Set the Nested Page Table pointer
1220     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1221
1222
1223     //   ctrl_area->N_CR3 = Get_CR3();
1224     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1225
1226     guest_state->g_pat = 0x7040600070406ULL;
1227
1228     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1229     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1230     // Enable Paging
1231     //    guest_state->cr0 |= 0x80000000;
1232   }
1233   */
1234
1235 }
1236
1237
1238
1239
1240
1241 #endif
1242
1243