Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


disable pause exiting
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
42 #endif
43
44 #include <palacios/vmm_direct_paging.h>
45
46 #include <palacios/vmm_ctrl_regs.h>
47 #include <palacios/svm_io.h>
48
49 #include <palacios/vmm_sprintf.h>
50
51
52 #ifndef V3_CONFIG_DEBUG_SVM
53 #undef PrintDebug
54 #define PrintDebug(fmt, args...)
55 #endif
56
57
58 uint32_t v3_last_exit;
59
60 // This is a global pointer to the host's VMCB
61 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
62
63
64
65 extern void v3_stgi();
66 extern void v3_clgi();
67 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
68 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
69
70
71 static vmcb_t * Allocate_VMCB() {
72     vmcb_t * vmcb_page = NULL;
73     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
74
75     if ((void *)vmcb_pa == NULL) {
76         PrintError("Error allocating VMCB\n");
77         return NULL;
78     }
79
80     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
81
82     memset(vmcb_page, 0, 4096);
83
84     return vmcb_page;
85 }
86
87
88 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
89 {
90     int status;
91
92     // Call arch-independent handler
93     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
94         return status;
95     }
96
97     // SVM-specific code
98     {
99         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
100         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
101         hw_efer->svme = 1;
102     }
103
104     return 0;
105 }
106
107
108 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
109     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
110     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
111     uint_t i;
112
113
114     //
115     ctrl_area->svm_instrs.VMRUN = 1;
116     ctrl_area->svm_instrs.VMMCALL = 1;
117     ctrl_area->svm_instrs.VMLOAD = 1;
118     ctrl_area->svm_instrs.VMSAVE = 1;
119     ctrl_area->svm_instrs.STGI = 1;
120     ctrl_area->svm_instrs.CLGI = 1;
121     ctrl_area->svm_instrs.SKINIT = 1;
122     ctrl_area->svm_instrs.ICEBP = 1;
123     ctrl_area->svm_instrs.WBINVD = 1;
124     ctrl_area->svm_instrs.MONITOR = 1;
125     ctrl_area->svm_instrs.MWAIT_always = 1;
126     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
127     ctrl_area->instrs.INVLPGA = 1;
128     ctrl_area->instrs.CPUID = 1;
129
130     ctrl_area->instrs.HLT = 1;
131
132     /* Set at VMM launch as needed */
133     ctrl_area->instrs.RDTSC = 0;
134     ctrl_area->svm_instrs.RDTSCP = 0;
135
136     // guest_state->cr0 = 0x00000001;    // PE 
137   
138     /*
139       ctrl_area->exceptions.de = 1;
140       ctrl_area->exceptions.df = 1;
141       
142       ctrl_area->exceptions.ts = 1;
143       ctrl_area->exceptions.ss = 1;
144       ctrl_area->exceptions.ac = 1;
145       ctrl_area->exceptions.mc = 1;
146       ctrl_area->exceptions.gp = 1;
147       ctrl_area->exceptions.ud = 1;
148       ctrl_area->exceptions.np = 1;
149       ctrl_area->exceptions.of = 1;
150       
151       ctrl_area->exceptions.nmi = 1;
152     */
153     
154
155     ctrl_area->instrs.NMI = 1;
156     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
157     ctrl_area->instrs.INIT = 1;
158     //    ctrl_area->instrs.PAUSE = 1;
159     ctrl_area->instrs.shutdown_evts = 1;
160
161
162     /* DEBUG FOR RETURN CODE */
163     ctrl_area->exit_code = 1;
164
165
166     /* Setup Guest Machine state */
167
168     core->vm_regs.rsp = 0x00;
169     core->rip = 0xfff0;
170
171     core->vm_regs.rdx = 0x00000f00;
172
173
174     core->cpl = 0;
175
176     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
177     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
178     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
179
180
181
182
183
184     core->segments.cs.selector = 0xf000;
185     core->segments.cs.limit = 0xffff;
186     core->segments.cs.base = 0x0000000f0000LL;
187
188     // (raw attributes = 0xf3)
189     core->segments.cs.type = 0x3;
190     core->segments.cs.system = 0x1;
191     core->segments.cs.dpl = 0x3;
192     core->segments.cs.present = 1;
193
194
195
196     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
197                                       &(core->segments.es), &(core->segments.fs), 
198                                       &(core->segments.gs), NULL};
199
200     for ( i = 0; segregs[i] != NULL; i++) {
201         struct v3_segment * seg = segregs[i];
202         
203         seg->selector = 0x0000;
204         //    seg->base = seg->selector << 4;
205         seg->base = 0x00000000;
206         seg->limit = ~0u;
207
208         // (raw attributes = 0xf3)
209         seg->type = 0x3;
210         seg->system = 0x1;
211         seg->dpl = 0x3;
212         seg->present = 1;
213     }
214
215     core->segments.gdtr.limit = 0x0000ffff;
216     core->segments.gdtr.base = 0x0000000000000000LL;
217     core->segments.idtr.limit = 0x0000ffff;
218     core->segments.idtr.base = 0x0000000000000000LL;
219
220     core->segments.ldtr.selector = 0x0000;
221     core->segments.ldtr.limit = 0x0000ffff;
222     core->segments.ldtr.base = 0x0000000000000000LL;
223     core->segments.tr.selector = 0x0000;
224     core->segments.tr.limit = 0x0000ffff;
225     core->segments.tr.base = 0x0000000000000000LL;
226
227
228     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
229     core->dbg_regs.dr7 = 0x0000000000000400LL;
230
231
232     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
233     ctrl_area->instrs.IOIO_PROT = 1;
234             
235     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
236     ctrl_area->instrs.MSR_PROT = 1;   
237
238
239     PrintDebug("Exiting on interrupts\n");
240     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
241     ctrl_area->instrs.INTR = 1;
242
243
244     v3_hook_msr(core->vm_info, EFER_MSR, 
245                 &v3_handle_efer_read,
246                 &v3_svm_handle_efer_write, 
247                 core);
248
249     if (core->shdw_pg_mode == SHADOW_PAGING) {
250         PrintDebug("Creating initial shadow page table\n");
251         
252         /* JRL: This is a performance killer, and a simplistic solution */
253         /* We need to fix this */
254         ctrl_area->TLB_CONTROL = 1;
255         ctrl_area->guest_ASID = 1;
256         
257         
258         if (v3_init_passthrough_pts(core) == -1) {
259             PrintError("Could not initialize passthrough page tables\n");
260             return ;
261         }
262
263
264         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
265         PrintDebug("Created\n");
266         
267         core->ctrl_regs.cr0 |= 0x80000000;
268         core->ctrl_regs.cr3 = core->direct_map_pt;
269
270         ctrl_area->cr_reads.cr0 = 1;
271         ctrl_area->cr_writes.cr0 = 1;
272         //ctrl_area->cr_reads.cr4 = 1;
273         ctrl_area->cr_writes.cr4 = 1;
274         ctrl_area->cr_reads.cr3 = 1;
275         ctrl_area->cr_writes.cr3 = 1;
276
277
278
279         ctrl_area->instrs.INVLPG = 1;
280
281         ctrl_area->exceptions.pf = 1;
282
283         guest_state->g_pat = 0x7040600070406ULL;
284
285
286
287     } else if (core->shdw_pg_mode == NESTED_PAGING) {
288         // Flush the TLB on entries/exits
289         ctrl_area->TLB_CONTROL = 1;
290         ctrl_area->guest_ASID = 1;
291
292         // Enable Nested Paging
293         ctrl_area->NP_ENABLE = 1;
294
295         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
296
297         // Set the Nested Page Table pointer
298         if (v3_init_passthrough_pts(core) == -1) {
299             PrintError("Could not initialize Nested page tables\n");
300             return ;
301         }
302
303         ctrl_area->N_CR3 = core->direct_map_pt;
304
305         guest_state->g_pat = 0x7040600070406ULL;
306     }
307     
308     /* tell the guest that we don't support SVM */
309     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
310         &v3_handle_vm_cr_read,
311         &v3_handle_vm_cr_write, 
312         core);
313
314
315     {
316 #define INT_PENDING_AMD_MSR             0xc0010055
317
318         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
319         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
320         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
321         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
322         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
323
324         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
325         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
326         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
327
328
329         v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
330         v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
331
332         // Passthrough read operations are ok.
333         v3_hook_msr(core->vm_info, INT_PENDING_AMD_MSR, NULL, v3_msr_unhandled_write, NULL);
334     }
335 }
336
337
338 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
339
340     PrintDebug("Allocating VMCB\n");
341     core->vmm_data = (void *)Allocate_VMCB();
342     
343     if (core->vmm_data == NULL) {
344         PrintError("Could not allocate VMCB, Exiting...\n");
345         return -1;
346     }
347
348     if (vm_class == V3_PC_VM) {
349         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
350         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
351     } else {
352         PrintError("Invalid VM class\n");
353         return -1;
354     }
355
356     return 0;
357 }
358
359
360 int v3_deinit_svm_vmcb(struct guest_info * core) {
361     V3_FreePages(V3_PAddr(core->vmm_data), 1);
362     return 0;
363 }
364
365
366 #ifdef V3_CONFIG_CHECKPOINT
367 int v3_svm_save_core(struct guest_info * core, void * ctx){
368
369     v3_chkpt_save_8(ctx, "cpl", &(core->cpl));
370     v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data);
371
372     return 0;
373 }
374
375 int v3_svm_load_core(struct guest_info * core, void * ctx){
376     
377     v3_chkpt_load_8(ctx, "cpl", &(core->cpl));
378
379     if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
380         return -1;
381     }
382
383     return 0;
384 }
385 #endif
386
387 static int update_irq_exit_state(struct guest_info * info) {
388     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
389
390     // Fix for QEMU bug using EVENTINJ as an internal cache
391     guest_ctrl->EVENTINJ.valid = 0;
392
393     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
394         
395 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
396         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
397 #endif
398
399         info->intr_core_state.irq_started = 1;
400         info->intr_core_state.irq_pending = 0;
401
402         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
403     }
404
405     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
406 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
407         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
408 #endif
409
410         // Interrupt was taken fully vectored
411         info->intr_core_state.irq_started = 0;
412
413     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
414 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
415         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
416 #endif
417     }
418
419     return 0;
420 }
421
422
423 static int update_irq_entry_state(struct guest_info * info) {
424     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
425
426
427     if (info->intr_core_state.irq_pending == 0) {
428         guest_ctrl->guest_ctrl.V_IRQ = 0;
429         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
430     }
431     
432     if (v3_excp_pending(info)) {
433         uint_t excp = v3_get_excp_number(info);
434         
435         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
436         
437         if (info->excp_state.excp_error_code_valid) {
438             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
439             guest_ctrl->EVENTINJ.ev = 1;
440 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
441             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
442 #endif
443         }
444         
445         guest_ctrl->EVENTINJ.vector = excp;
446         
447         guest_ctrl->EVENTINJ.valid = 1;
448
449 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
450         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
451                    (int)info->num_exits, 
452                    guest_ctrl->EVENTINJ.vector, 
453                    (void *)(addr_t)info->ctrl_regs.cr2,
454                    (void *)(addr_t)info->rip);
455 #endif
456
457         v3_injecting_excp(info, excp);
458     } else if (info->intr_core_state.irq_started == 1) {
459 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
460         PrintDebug("IRQ pending from previous injection\n");
461 #endif
462         guest_ctrl->guest_ctrl.V_IRQ = 1;
463         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
464         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
465         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
466
467     } else {
468         switch (v3_intr_pending(info)) {
469             case V3_EXTERNAL_IRQ: {
470                 uint32_t irq = v3_get_intr(info);
471
472                 guest_ctrl->guest_ctrl.V_IRQ = 1;
473                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
474                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
475                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
476
477 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
478                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
479                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
480                            (void *)(addr_t)info->rip);
481 #endif
482
483                 info->intr_core_state.irq_pending = 1;
484                 info->intr_core_state.irq_vector = irq;
485                 
486                 break;
487             }
488             case V3_NMI:
489                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
490                 break;
491             case V3_SOFTWARE_INTR:
492                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
493
494 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
495                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
496                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
497 #endif
498                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
499                 guest_ctrl->EVENTINJ.valid = 1;
500             
501                 /* reset swintr state */
502                 info->intr_core_state.swintr_posted = 0;
503                 info->intr_core_state.swintr_vector = 0;
504                 
505                 break;
506             case V3_VIRTUAL_IRQ:
507                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
508                 break;
509
510             case V3_INVALID_INTR:
511             default:
512                 break;
513         }
514         
515     }
516
517     return 0;
518 }
519
520 int 
521 v3_svm_config_tsc_virtualization(struct guest_info * info) {
522     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
523
524     if (info->time_state.time_flags & V3_TIME_TRAP_RDTSC) {
525         ctrl_area->instrs.RDTSC = 1;
526         ctrl_area->svm_instrs.RDTSCP = 1;
527     } else {
528         ctrl_area->instrs.RDTSC = 0;
529         ctrl_area->svm_instrs.RDTSCP = 0;
530         ctrl_area->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
531     }
532     return 0;
533 }
534
535 /* 
536  * CAUTION and DANGER!!! 
537  * 
538  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
539  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
540  * on its contents will cause things to break. The contents at the time of the exit WILL 
541  * change before the exit handler is executed.
542  */
543 int v3_svm_enter(struct guest_info * info) {
544     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
545     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
546     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
547     uint64_t guest_cycles = 0;
548
549     // Conditionally yield the CPU if the timeslice has expired
550     v3_yield_cond(info);
551
552     // disable global interrupts for vm state transition
553     v3_clgi();
554
555     // Update timer devices after being in the VM, with interupts
556     // disabled, but before doing IRQ updates, so that any interrupts they 
557     //raise get seen immediately.
558     v3_advance_time(info);
559     v3_update_timers(info);
560
561     // Synchronize the guest state to the VMCB
562     guest_state->cr0 = info->ctrl_regs.cr0;
563     guest_state->cr2 = info->ctrl_regs.cr2;
564     guest_state->cr3 = info->ctrl_regs.cr3;
565     guest_state->cr4 = info->ctrl_regs.cr4;
566     guest_state->dr6 = info->dbg_regs.dr6;
567     guest_state->dr7 = info->dbg_regs.dr7;
568     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
569     guest_state->rflags = info->ctrl_regs.rflags;
570     guest_state->efer = info->ctrl_regs.efer;
571     
572     /* Synchronize MSRs */
573     guest_state->star = info->msrs.star;
574     guest_state->lstar = info->msrs.lstar;
575     guest_state->sfmask = info->msrs.sfmask;
576     guest_state->KernelGsBase = info->msrs.kern_gs_base;
577
578     guest_state->cpl = info->cpl;
579
580     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
581
582     guest_state->rax = info->vm_regs.rax;
583     guest_state->rip = info->rip;
584     guest_state->rsp = info->vm_regs.rsp;
585
586 #ifdef V3_CONFIG_SYMCALL
587     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
588         update_irq_entry_state(info);
589     }
590 #else 
591     update_irq_entry_state(info);
592 #endif
593
594
595     /* ** */
596
597     /*
598       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
599       (void *)(addr_t)info->segments.cs.base, 
600       (void *)(addr_t)info->rip);
601     */
602
603 #ifdef V3_CONFIG_SYMCALL
604     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
605         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
606             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
607         }
608     }
609 #endif
610
611     v3_time_enter_vm(info);
612     v3_svm_config_tsc_virtualization(info);
613
614     //V3_Print("Calling v3_svm_launch\n");
615     {   
616         uint64_t entry_tsc = 0;
617         uint64_t exit_tsc = 0;
618         
619         rdtscll(entry_tsc);
620
621         v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
622
623         rdtscll(exit_tsc);
624
625         guest_cycles = exit_tsc - entry_tsc;
626     }
627
628
629     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
630
631     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
632
633     // Immediate exit from VM time bookkeeping
634     v3_time_exit_vm(info, &guest_cycles);
635
636     info->num_exits++;
637
638     // Save Guest state from VMCB
639     info->rip = guest_state->rip;
640     info->vm_regs.rsp = guest_state->rsp;
641     info->vm_regs.rax = guest_state->rax;
642
643     info->cpl = guest_state->cpl;
644
645     info->ctrl_regs.cr0 = guest_state->cr0;
646     info->ctrl_regs.cr2 = guest_state->cr2;
647     info->ctrl_regs.cr3 = guest_state->cr3;
648     info->ctrl_regs.cr4 = guest_state->cr4;
649     info->dbg_regs.dr6 = guest_state->dr6;
650     info->dbg_regs.dr7 = guest_state->dr7;
651     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
652     info->ctrl_regs.rflags = guest_state->rflags;
653     info->ctrl_regs.efer = guest_state->efer;
654     
655     /* Synchronize MSRs */
656     info->msrs.star =  guest_state->star;
657     info->msrs.lstar = guest_state->lstar;
658     info->msrs.sfmask = guest_state->sfmask;
659     info->msrs.kern_gs_base = guest_state->KernelGsBase;
660
661     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
662     info->cpu_mode = v3_get_vm_cpu_mode(info);
663     info->mem_mode = v3_get_vm_mem_mode(info);
664     /* ** */
665
666     // save exit info here
667     exit_code = guest_ctrl->exit_code;
668     exit_info1 = guest_ctrl->exit_info1;
669     exit_info2 = guest_ctrl->exit_info2;
670
671 #ifdef V3_CONFIG_SYMCALL
672     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
673         update_irq_exit_state(info);
674     }
675 #else
676     update_irq_exit_state(info);
677 #endif
678
679     // reenable global interrupts after vm exit
680     v3_stgi();
681  
682     // Conditionally yield the CPU if the timeslice has expired
683     v3_yield_cond(info);
684
685     {
686         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
687         
688         if (ret != 0) {
689             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
690             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
691             return -1;
692         }
693     }
694
695     if (info->timeouts.timeout_active) {
696         /* Check to see if any timeouts have expired */
697         v3_handle_timeouts(info, guest_cycles);
698     }
699
700
701     return 0;
702 }
703
704
705 int v3_start_svm_guest(struct guest_info * info) {
706     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
707     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
708
709     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
710
711     if (info->vcpu_id == 0) {
712         info->core_run_state = CORE_RUNNING;
713     } else  { 
714         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
715
716         while (info->core_run_state == CORE_STOPPED) {
717             
718             if (info->vm_info->run_state == VM_STOPPED) {
719                 // The VM was stopped before this core was initialized. 
720                 return 0;
721             }
722
723             v3_yield(info);
724             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
725         }
726
727         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
728
729         // We'll be paranoid about race conditions here
730         v3_wait_at_barrier(info);
731     } 
732
733     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
734                info->vcpu_id, info->pcpu_id, 
735                info->segments.cs.selector, (void *)(info->segments.cs.base), 
736                info->segments.cs.limit, (void *)(info->rip));
737
738
739
740     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
741                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
742     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
743     
744     v3_start_time(info);
745
746     while (1) {
747
748         if (info->vm_info->run_state == VM_STOPPED) {
749             info->core_run_state = CORE_STOPPED;
750             break;
751         }
752         
753         if (v3_svm_enter(info) == -1) {
754             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
755             addr_t host_addr;
756             addr_t linear_addr = 0;
757             
758             info->vm_info->run_state = VM_ERROR;
759             
760             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
761             
762             v3_print_guest_state(info);
763             
764             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
765             
766             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
767             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
768             
769             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
770             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
771             
772             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
773             
774             if (info->mem_mode == PHYSICAL_MEM) {
775                 v3_gpa_to_hva(info, linear_addr, &host_addr);
776             } else if (info->mem_mode == VIRTUAL_MEM) {
777                 v3_gva_to_hva(info, linear_addr, &host_addr);
778             }
779             
780             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
781             
782             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
783             v3_dump_mem((uint8_t *)host_addr, 15);
784             
785             v3_print_stack(info);
786
787             break;
788         }
789
790         v3_wait_at_barrier(info);
791
792
793         if (info->vm_info->run_state == VM_STOPPED) {
794             info->core_run_state = CORE_STOPPED;
795             break;
796         }
797
798         
799
800 /*
801         if ((info->num_exits % 50000) == 0) {
802             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
803             v3_print_guest_state(info);
804         }
805 */
806         
807     }
808
809     // Need to take down the other cores on error... 
810
811     return 0;
812 }
813
814
815
816
817 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
818     // init vmcb_bios
819
820     // Write the RIP, CS, and descriptor
821     // assume the rest is already good to go
822     //
823     // vector VV -> rip at 0
824     //              CS = VV00
825     //  This means we start executing at linear address VV000
826     //
827     // So the selector needs to be VV00
828     // and the base needs to be VV000
829     //
830     core->rip = 0;
831     core->segments.cs.selector = rip << 8;
832     core->segments.cs.limit = 0xffff;
833     core->segments.cs.base = rip << 12;
834
835     return 0;
836 }
837
838
839
840
841
842
843 /* Checks machine SVM capability */
844 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
845 int v3_is_svm_capable() {
846     uint_t vm_cr_low = 0, vm_cr_high = 0;
847     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
848
849     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
850   
851     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
852
853     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
854       V3_Print("SVM Not Available\n");
855       return 0;
856     }  else {
857         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
858         
859         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
860         
861         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
862             V3_Print("SVM is available but is disabled.\n");
863             
864             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
865             
866             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
867             
868             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
869                 V3_Print("SVM BIOS Disabled, not unlockable\n");
870             } else {
871                 V3_Print("SVM is locked with a key\n");
872             }
873             return 0;
874
875         } else {
876             V3_Print("SVM is available and  enabled.\n");
877
878             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
879             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
880             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
881             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
882             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
883
884             return 1;
885         }
886     }
887 }
888
889 static int has_svm_nested_paging() {
890     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
891     
892     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
893     
894     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
895     
896     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
897         V3_Print("SVM Nested Paging not supported\n");
898         return 0;
899     } else {
900         V3_Print("SVM Nested Paging supported\n");
901         return 1;
902     }
903  }
904  
905
906
907 void v3_init_svm_cpu(int cpu_id) {
908     reg_ex_t msr;
909     extern v3_cpu_arch_t v3_cpu_types[];
910
911     // Enable SVM on the CPU
912     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
913     msr.e_reg.low |= EFER_MSR_svm_enable;
914     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
915
916     V3_Print("SVM Enabled\n");
917
918     // Setup the host state save area
919     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
920
921     /* 64-BIT-ISSUE */
922     //  msr.e_reg.high = 0;
923     //msr.e_reg.low = (uint_t)host_vmcb;
924     msr.r_reg = host_vmcbs[cpu_id];
925
926     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
927     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
928
929
930     if (has_svm_nested_paging() == 1) {
931         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
932     } else {
933         v3_cpu_types[cpu_id] = V3_SVM_CPU;
934     }
935 }
936
937
938
939 void v3_deinit_svm_cpu(int cpu_id) {
940     reg_ex_t msr;
941     extern v3_cpu_arch_t v3_cpu_types[];
942
943     // reset SVM_VM_HSAVE_PA_MSR
944     // Does setting it to NULL disable??
945     msr.r_reg = 0;
946     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
947
948     // Disable SVM?
949     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
950     msr.e_reg.low &= ~EFER_MSR_svm_enable;
951     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
952
953     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
954
955     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
956
957     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
958     return;
959 }
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010 #if 0
1011 /* 
1012  * Test VMSAVE/VMLOAD Latency 
1013  */
1014 #define vmsave ".byte 0x0F,0x01,0xDB ; "
1015 #define vmload ".byte 0x0F,0x01,0xDA ; "
1016 {
1017     uint32_t start_lo, start_hi;
1018     uint32_t end_lo, end_hi;
1019     uint64_t start, end;
1020     
1021     __asm__ __volatile__ (
1022                           "rdtsc ; "
1023                           "movl %%eax, %%esi ; "
1024                           "movl %%edx, %%edi ; "
1025                           "movq  %%rcx, %%rax ; "
1026                           vmsave
1027                           "rdtsc ; "
1028                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1029                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1030                           );
1031     
1032     start = start_hi;
1033     start <<= 32;
1034     start += start_lo;
1035     
1036     end = end_hi;
1037     end <<= 32;
1038     end += end_lo;
1039     
1040     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1041     
1042     __asm__ __volatile__ (
1043                           "rdtsc ; "
1044                           "movl %%eax, %%esi ; "
1045                           "movl %%edx, %%edi ; "
1046                           "movq  %%rcx, %%rax ; "
1047                           vmload
1048                           "rdtsc ; "
1049                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1050                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1051                               );
1052         
1053         start = start_hi;
1054         start <<= 32;
1055         start += start_lo;
1056
1057         end = end_hi;
1058         end <<= 32;
1059         end += end_lo;
1060
1061
1062         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1063     }
1064     /* End Latency Test */
1065
1066 #endif
1067
1068
1069
1070
1071
1072
1073
1074 #if 0
1075 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1076   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1077   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1078   uint_t i = 0;
1079
1080
1081   guest_state->rsp = vm_info.vm_regs.rsp;
1082   guest_state->rip = vm_info.rip;
1083
1084
1085   /* I pretty much just gutted this from TVMM */
1086   /* Note: That means its probably wrong */
1087
1088   // set the segment registers to mirror ours
1089   guest_state->cs.selector = 1<<3;
1090   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1091   guest_state->cs.attrib.fields.S = 1;
1092   guest_state->cs.attrib.fields.P = 1;
1093   guest_state->cs.attrib.fields.db = 1;
1094   guest_state->cs.attrib.fields.G = 1;
1095   guest_state->cs.limit = 0xfffff;
1096   guest_state->cs.base = 0;
1097   
1098   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1099   for ( i = 0; segregs[i] != NULL; i++) {
1100     struct vmcb_selector * seg = segregs[i];
1101     
1102     seg->selector = 2<<3;
1103     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1104     seg->attrib.fields.S = 1;
1105     seg->attrib.fields.P = 1;
1106     seg->attrib.fields.db = 1;
1107     seg->attrib.fields.G = 1;
1108     seg->limit = 0xfffff;
1109     seg->base = 0;
1110   }
1111
1112
1113   {
1114     /* JRL THIS HAS TO GO */
1115     
1116     //    guest_state->tr.selector = GetTR_Selector();
1117     guest_state->tr.attrib.fields.type = 0x9; 
1118     guest_state->tr.attrib.fields.P = 1;
1119     // guest_state->tr.limit = GetTR_Limit();
1120     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1121     /* ** */
1122   }
1123
1124
1125   /* ** */
1126
1127
1128   guest_state->efer |= EFER_MSR_svm_enable;
1129   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1130   ctrl_area->svm_instrs.VMRUN = 1;
1131   guest_state->cr0 = 0x00000001;    // PE 
1132   ctrl_area->guest_ASID = 1;
1133
1134
1135   //  guest_state->cpl = 0;
1136
1137
1138
1139   // Setup exits
1140
1141   ctrl_area->cr_writes.cr4 = 1;
1142   
1143   ctrl_area->exceptions.de = 1;
1144   ctrl_area->exceptions.df = 1;
1145   ctrl_area->exceptions.pf = 1;
1146   ctrl_area->exceptions.ts = 1;
1147   ctrl_area->exceptions.ss = 1;
1148   ctrl_area->exceptions.ac = 1;
1149   ctrl_area->exceptions.mc = 1;
1150   ctrl_area->exceptions.gp = 1;
1151   ctrl_area->exceptions.ud = 1;
1152   ctrl_area->exceptions.np = 1;
1153   ctrl_area->exceptions.of = 1;
1154   ctrl_area->exceptions.nmi = 1;
1155
1156   
1157
1158   ctrl_area->instrs.IOIO_PROT = 1;
1159   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1160   
1161   {
1162     reg_ex_t tmp_reg;
1163     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1164     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1165   }
1166
1167   ctrl_area->instrs.INTR = 1;
1168
1169   
1170   {
1171     char gdt_buf[6];
1172     char idt_buf[6];
1173
1174     memset(gdt_buf, 0, 6);
1175     memset(idt_buf, 0, 6);
1176
1177
1178     uint_t gdt_base, idt_base;
1179     ushort_t gdt_limit, idt_limit;
1180     
1181     GetGDTR(gdt_buf);
1182     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1183     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1184     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1185
1186     GetIDTR(idt_buf);
1187     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1188     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1189     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1190
1191
1192     // gdt_base -= 0x2000;
1193     //idt_base -= 0x2000;
1194
1195     guest_state->gdtr.base = gdt_base;
1196     guest_state->gdtr.limit = gdt_limit;
1197     guest_state->idtr.base = idt_base;
1198     guest_state->idtr.limit = idt_limit;
1199
1200
1201   }
1202   
1203   
1204   // also determine if CPU supports nested paging
1205   /*
1206   if (vm_info.page_tables) {
1207     //   if (0) {
1208     // Flush the TLB on entries/exits
1209     ctrl_area->TLB_CONTROL = 1;
1210
1211     // Enable Nested Paging
1212     ctrl_area->NP_ENABLE = 1;
1213
1214     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1215
1216         // Set the Nested Page Table pointer
1217     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1218
1219
1220     //   ctrl_area->N_CR3 = Get_CR3();
1221     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1222
1223     guest_state->g_pat = 0x7040600070406ULL;
1224
1225     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1226     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1227     // Enable Paging
1228     //    guest_state->cr0 |= 0x80000000;
1229   }
1230   */
1231
1232 }
1233
1234
1235
1236
1237
1238 #endif
1239
1240