Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


More extensive error checking in checkpoint/restore + other cleanup
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
42 #endif
43
44 #include <palacios/vmm_direct_paging.h>
45
46 #include <palacios/vmm_ctrl_regs.h>
47 #include <palacios/svm_io.h>
48
49 #include <palacios/vmm_sprintf.h>
50
51
52 #ifndef V3_CONFIG_DEBUG_SVM
53 #undef PrintDebug
54 #define PrintDebug(fmt, args...)
55 #endif
56
57
58 uint32_t v3_last_exit;
59
60 // This is a global pointer to the host's VMCB
61 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
62
63
64
65 extern void v3_stgi();
66 extern void v3_clgi();
67 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
68 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
69
70
71 static vmcb_t * Allocate_VMCB() {
72     vmcb_t * vmcb_page = NULL;
73     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
74
75     if ((void *)vmcb_pa == NULL) {
76         PrintError("Error allocating VMCB\n");
77         return NULL;
78     }
79
80     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
81
82     memset(vmcb_page, 0, 4096);
83
84     return vmcb_page;
85 }
86
87
88 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
89 {
90     int status;
91
92     // Call arch-independent handler
93     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
94         return status;
95     }
96
97     // SVM-specific code
98     {
99         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
100         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
101         hw_efer->svme = 1;
102     }
103
104     return 0;
105 }
106
107
108 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
109     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
110     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
111     uint_t i;
112
113
114     //
115     ctrl_area->svm_instrs.VMRUN = 1;
116     ctrl_area->svm_instrs.VMMCALL = 1;
117     ctrl_area->svm_instrs.VMLOAD = 1;
118     ctrl_area->svm_instrs.VMSAVE = 1;
119     ctrl_area->svm_instrs.STGI = 1;
120     ctrl_area->svm_instrs.CLGI = 1;
121     ctrl_area->svm_instrs.SKINIT = 1;
122     ctrl_area->svm_instrs.ICEBP = 1;
123     ctrl_area->svm_instrs.WBINVD = 1;
124     ctrl_area->svm_instrs.MONITOR = 1;
125     ctrl_area->svm_instrs.MWAIT_always = 1;
126     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
127     ctrl_area->instrs.INVLPGA = 1;
128     ctrl_area->instrs.CPUID = 1;
129
130     ctrl_area->instrs.HLT = 1;
131
132     /* Set at VMM launch as needed */
133     ctrl_area->instrs.RDTSC = 0;
134     ctrl_area->svm_instrs.RDTSCP = 0;
135
136     // guest_state->cr0 = 0x00000001;    // PE 
137   
138     /*
139       ctrl_area->exceptions.de = 1;
140       ctrl_area->exceptions.df = 1;
141       
142       ctrl_area->exceptions.ts = 1;
143       ctrl_area->exceptions.ss = 1;
144       ctrl_area->exceptions.ac = 1;
145       ctrl_area->exceptions.mc = 1;
146       ctrl_area->exceptions.gp = 1;
147       ctrl_area->exceptions.ud = 1;
148       ctrl_area->exceptions.np = 1;
149       ctrl_area->exceptions.of = 1;
150       
151       ctrl_area->exceptions.nmi = 1;
152     */
153     
154
155     ctrl_area->instrs.NMI = 1;
156     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
157     ctrl_area->instrs.INIT = 1;
158     //    ctrl_area->instrs.PAUSE = 1;
159     ctrl_area->instrs.shutdown_evts = 1;
160
161
162     /* DEBUG FOR RETURN CODE */
163     ctrl_area->exit_code = 1;
164
165
166     /* Setup Guest Machine state */
167
168     core->vm_regs.rsp = 0x00;
169     core->rip = 0xfff0;
170
171     core->vm_regs.rdx = 0x00000f00;
172
173
174     core->cpl = 0;
175
176     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
177     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
178     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
179
180
181
182
183
184     core->segments.cs.selector = 0xf000;
185     core->segments.cs.limit = 0xffff;
186     core->segments.cs.base = 0x0000000f0000LL;
187
188     // (raw attributes = 0xf3)
189     core->segments.cs.type = 0x3;
190     core->segments.cs.system = 0x1;
191     core->segments.cs.dpl = 0x3;
192     core->segments.cs.present = 1;
193
194
195
196     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
197                                       &(core->segments.es), &(core->segments.fs), 
198                                       &(core->segments.gs), NULL};
199
200     for ( i = 0; segregs[i] != NULL; i++) {
201         struct v3_segment * seg = segregs[i];
202         
203         seg->selector = 0x0000;
204         //    seg->base = seg->selector << 4;
205         seg->base = 0x00000000;
206         seg->limit = ~0u;
207
208         // (raw attributes = 0xf3)
209         seg->type = 0x3;
210         seg->system = 0x1;
211         seg->dpl = 0x3;
212         seg->present = 1;
213     }
214
215     core->segments.gdtr.limit = 0x0000ffff;
216     core->segments.gdtr.base = 0x0000000000000000LL;
217     core->segments.idtr.limit = 0x0000ffff;
218     core->segments.idtr.base = 0x0000000000000000LL;
219
220     core->segments.ldtr.selector = 0x0000;
221     core->segments.ldtr.limit = 0x0000ffff;
222     core->segments.ldtr.base = 0x0000000000000000LL;
223     core->segments.tr.selector = 0x0000;
224     core->segments.tr.limit = 0x0000ffff;
225     core->segments.tr.base = 0x0000000000000000LL;
226
227
228     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
229     core->dbg_regs.dr7 = 0x0000000000000400LL;
230
231
232     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
233     ctrl_area->instrs.IOIO_PROT = 1;
234             
235     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
236     ctrl_area->instrs.MSR_PROT = 1;   
237
238
239     PrintDebug("Exiting on interrupts\n");
240     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
241     ctrl_area->instrs.INTR = 1;
242
243
244     v3_hook_msr(core->vm_info, EFER_MSR, 
245                 &v3_handle_efer_read,
246                 &v3_svm_handle_efer_write, 
247                 core);
248
249     if (core->shdw_pg_mode == SHADOW_PAGING) {
250         PrintDebug("Creating initial shadow page table\n");
251         
252         /* JRL: This is a performance killer, and a simplistic solution */
253         /* We need to fix this */
254         ctrl_area->TLB_CONTROL = 1;
255         ctrl_area->guest_ASID = 1;
256         
257         
258         if (v3_init_passthrough_pts(core) == -1) {
259             PrintError("Could not initialize passthrough page tables\n");
260             return ;
261         }
262
263
264         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
265         PrintDebug("Created\n");
266         
267         core->ctrl_regs.cr0 |= 0x80000000;
268         core->ctrl_regs.cr3 = core->direct_map_pt;
269
270         ctrl_area->cr_reads.cr0 = 1;
271         ctrl_area->cr_writes.cr0 = 1;
272         //ctrl_area->cr_reads.cr4 = 1;
273         ctrl_area->cr_writes.cr4 = 1;
274         ctrl_area->cr_reads.cr3 = 1;
275         ctrl_area->cr_writes.cr3 = 1;
276
277
278
279         ctrl_area->instrs.INVLPG = 1;
280
281         ctrl_area->exceptions.pf = 1;
282
283         guest_state->g_pat = 0x7040600070406ULL;
284
285
286
287     } else if (core->shdw_pg_mode == NESTED_PAGING) {
288         // Flush the TLB on entries/exits
289         ctrl_area->TLB_CONTROL = 1;
290         ctrl_area->guest_ASID = 1;
291
292         // Enable Nested Paging
293         ctrl_area->NP_ENABLE = 1;
294
295         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
296
297         // Set the Nested Page Table pointer
298         if (v3_init_passthrough_pts(core) == -1) {
299             PrintError("Could not initialize Nested page tables\n");
300             return ;
301         }
302
303         ctrl_area->N_CR3 = core->direct_map_pt;
304
305         guest_state->g_pat = 0x7040600070406ULL;
306     }
307     
308     /* tell the guest that we don't support SVM */
309     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
310         &v3_handle_vm_cr_read,
311         &v3_handle_vm_cr_write, 
312         core);
313
314
315     {
316 #define INT_PENDING_AMD_MSR             0xc0010055
317
318         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
319         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
320         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
321         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
322         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
323
324         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
325         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
326         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
327
328
329         v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
330         v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
331
332         // Passthrough read operations are ok.
333         v3_hook_msr(core->vm_info, INT_PENDING_AMD_MSR, NULL, v3_msr_unhandled_write, NULL);
334     }
335 }
336
337
338 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
339
340     PrintDebug("Allocating VMCB\n");
341     core->vmm_data = (void *)Allocate_VMCB();
342     
343     if (core->vmm_data == NULL) {
344         PrintError("Could not allocate VMCB, Exiting...\n");
345         return -1;
346     }
347
348     if (vm_class == V3_PC_VM) {
349         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
350         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
351     } else {
352         PrintError("Invalid VM class\n");
353         return -1;
354     }
355
356     core->core_run_state = CORE_STOPPED;
357
358     return 0;
359 }
360
361
362 int v3_deinit_svm_vmcb(struct guest_info * core) {
363     V3_FreePages(V3_PAddr(core->vmm_data), 1);
364     return 0;
365 }
366
367
368 #ifdef V3_CONFIG_CHECKPOINT
369 int v3_svm_save_core(struct guest_info * core, void * ctx){
370
371     if (v3_chkpt_save_8(ctx, "cpl", &(core->cpl)) == -1) { 
372         PrintError("Could not save SVM cpl\n");
373         return -1;
374     }
375
376     if (v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) { 
377         PrintError("Could not save SVM vmcb\n");
378         return -1;
379     }
380
381     return 0;
382 }
383
384 int v3_svm_load_core(struct guest_info * core, void * ctx){
385     
386     if (v3_chkpt_load_8(ctx, "cpl", &(core->cpl)) == -1) { 
387         PrintError("Could not load SVM cpl\n");
388         return -1;
389     }
390
391     if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
392         return -1;
393     }
394
395     return 0;
396 }
397 #endif
398
399 static int update_irq_exit_state(struct guest_info * info) {
400     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
401
402     // Fix for QEMU bug using EVENTINJ as an internal cache
403     guest_ctrl->EVENTINJ.valid = 0;
404
405     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
406         
407 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
408         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
409 #endif
410
411         info->intr_core_state.irq_started = 1;
412         info->intr_core_state.irq_pending = 0;
413
414         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
415     }
416
417     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
418 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
419         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
420 #endif
421
422         // Interrupt was taken fully vectored
423         info->intr_core_state.irq_started = 0;
424
425     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
426 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
427         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
428 #endif
429     }
430
431     return 0;
432 }
433
434
435 static int update_irq_entry_state(struct guest_info * info) {
436     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
437
438
439     if (info->intr_core_state.irq_pending == 0) {
440         guest_ctrl->guest_ctrl.V_IRQ = 0;
441         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
442     }
443     
444     if (v3_excp_pending(info)) {
445         uint_t excp = v3_get_excp_number(info);
446         
447         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
448         
449         if (info->excp_state.excp_error_code_valid) {
450             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
451             guest_ctrl->EVENTINJ.ev = 1;
452 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
453             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
454 #endif
455         }
456         
457         guest_ctrl->EVENTINJ.vector = excp;
458         
459         guest_ctrl->EVENTINJ.valid = 1;
460
461 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
462         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
463                    (int)info->num_exits, 
464                    guest_ctrl->EVENTINJ.vector, 
465                    (void *)(addr_t)info->ctrl_regs.cr2,
466                    (void *)(addr_t)info->rip);
467 #endif
468
469         v3_injecting_excp(info, excp);
470     } else if (info->intr_core_state.irq_started == 1) {
471 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
472         PrintDebug("IRQ pending from previous injection\n");
473 #endif
474         guest_ctrl->guest_ctrl.V_IRQ = 1;
475         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
476         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
477         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
478
479     } else {
480         switch (v3_intr_pending(info)) {
481             case V3_EXTERNAL_IRQ: {
482                 uint32_t irq = v3_get_intr(info);
483
484                 guest_ctrl->guest_ctrl.V_IRQ = 1;
485                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
486                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
487                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
488
489 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
490                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
491                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
492                            (void *)(addr_t)info->rip);
493 #endif
494
495                 info->intr_core_state.irq_pending = 1;
496                 info->intr_core_state.irq_vector = irq;
497                 
498                 break;
499             }
500             case V3_NMI:
501                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
502                 break;
503             case V3_SOFTWARE_INTR:
504                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
505
506 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
507                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
508                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
509 #endif
510                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
511                 guest_ctrl->EVENTINJ.valid = 1;
512             
513                 /* reset swintr state */
514                 info->intr_core_state.swintr_posted = 0;
515                 info->intr_core_state.swintr_vector = 0;
516                 
517                 break;
518             case V3_VIRTUAL_IRQ:
519                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
520                 break;
521
522             case V3_INVALID_INTR:
523             default:
524                 break;
525         }
526         
527     }
528
529     return 0;
530 }
531
532 int 
533 v3_svm_config_tsc_virtualization(struct guest_info * info) {
534     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
535
536     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
537         ctrl_area->instrs.RDTSC = 1;
538         ctrl_area->svm_instrs.RDTSCP = 1;
539     } else {
540         ctrl_area->instrs.RDTSC = 0;
541         ctrl_area->svm_instrs.RDTSCP = 0;
542         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
543                 ctrl_area->TSC_OFFSET = 0;
544         } else {
545                 ctrl_area->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
546         }
547     }
548     return 0;
549 }
550
551 /* 
552  * CAUTION and DANGER!!! 
553  * 
554  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
555  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
556  * on its contents will cause things to break. The contents at the time of the exit WILL 
557  * change before the exit handler is executed.
558  */
559 int v3_svm_enter(struct guest_info * info) {
560     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
561     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
562     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
563     uint64_t guest_cycles = 0;
564
565     // Conditionally yield the CPU if the timeslice has expired
566     v3_yield_cond(info);
567
568     // Update timer devices after being in the VM before doing 
569     // IRQ updates, so that any interrupts they raise get seen 
570     // immediately.
571     v3_advance_time(info, NULL);
572     v3_update_timers(info);
573
574     // disable global interrupts for vm state transition
575     v3_clgi();
576
577     // Synchronize the guest state to the VMCB
578     guest_state->cr0 = info->ctrl_regs.cr0;
579     guest_state->cr2 = info->ctrl_regs.cr2;
580     guest_state->cr3 = info->ctrl_regs.cr3;
581     guest_state->cr4 = info->ctrl_regs.cr4;
582     guest_state->dr6 = info->dbg_regs.dr6;
583     guest_state->dr7 = info->dbg_regs.dr7;
584     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
585     guest_state->rflags = info->ctrl_regs.rflags;
586     guest_state->efer = info->ctrl_regs.efer;
587     
588     /* Synchronize MSRs */
589     guest_state->star = info->msrs.star;
590     guest_state->lstar = info->msrs.lstar;
591     guest_state->sfmask = info->msrs.sfmask;
592     guest_state->KernelGsBase = info->msrs.kern_gs_base;
593
594     guest_state->cpl = info->cpl;
595
596     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
597
598     guest_state->rax = info->vm_regs.rax;
599     guest_state->rip = info->rip;
600     guest_state->rsp = info->vm_regs.rsp;
601
602 #ifdef V3_CONFIG_SYMCALL
603     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
604         update_irq_entry_state(info);
605     }
606 #else 
607     update_irq_entry_state(info);
608 #endif
609
610
611     /* ** */
612
613     /*
614       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
615       (void *)(addr_t)info->segments.cs.base, 
616       (void *)(addr_t)info->rip);
617     */
618
619 #ifdef V3_CONFIG_SYMCALL
620     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
621         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
622             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
623         }
624     }
625 #endif
626
627     v3_svm_config_tsc_virtualization(info);
628
629     //V3_Print("Calling v3_svm_launch\n");
630     {   
631         uint64_t entry_tsc = 0;
632         uint64_t exit_tsc = 0;
633         
634         rdtscll(entry_tsc);
635
636         v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
637
638         rdtscll(exit_tsc);
639
640         guest_cycles = exit_tsc - entry_tsc;
641     }
642
643
644     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
645
646     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
647
648     v3_advance_time(info, &guest_cycles);
649
650     info->num_exits++;
651
652     // Save Guest state from VMCB
653     info->rip = guest_state->rip;
654     info->vm_regs.rsp = guest_state->rsp;
655     info->vm_regs.rax = guest_state->rax;
656
657     info->cpl = guest_state->cpl;
658
659     info->ctrl_regs.cr0 = guest_state->cr0;
660     info->ctrl_regs.cr2 = guest_state->cr2;
661     info->ctrl_regs.cr3 = guest_state->cr3;
662     info->ctrl_regs.cr4 = guest_state->cr4;
663     info->dbg_regs.dr6 = guest_state->dr6;
664     info->dbg_regs.dr7 = guest_state->dr7;
665     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
666     info->ctrl_regs.rflags = guest_state->rflags;
667     info->ctrl_regs.efer = guest_state->efer;
668     
669     /* Synchronize MSRs */
670     info->msrs.star =  guest_state->star;
671     info->msrs.lstar = guest_state->lstar;
672     info->msrs.sfmask = guest_state->sfmask;
673     info->msrs.kern_gs_base = guest_state->KernelGsBase;
674
675     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
676     info->cpu_mode = v3_get_vm_cpu_mode(info);
677     info->mem_mode = v3_get_vm_mem_mode(info);
678     /* ** */
679
680     // save exit info here
681     exit_code = guest_ctrl->exit_code;
682     exit_info1 = guest_ctrl->exit_info1;
683     exit_info2 = guest_ctrl->exit_info2;
684
685 #ifdef V3_CONFIG_SYMCALL
686     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
687         update_irq_exit_state(info);
688     }
689 #else
690     update_irq_exit_state(info);
691 #endif
692
693     // reenable global interrupts after vm exit
694     v3_stgi();
695  
696     // Conditionally yield the CPU if the timeslice has expired
697     v3_yield_cond(info);
698
699     // This update timers is for time-dependent handlers
700     // if we're slaved to host time
701     v3_advance_time(info, NULL);
702     v3_update_timers(info);
703
704     {
705         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
706         
707         if (ret != 0) {
708             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
709             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
710             return -1;
711         }
712     }
713
714     if (info->timeouts.timeout_active) {
715         /* Check to see if any timeouts have expired */
716         v3_handle_timeouts(info, guest_cycles);
717     }
718
719
720     return 0;
721 }
722
723
724 int v3_start_svm_guest(struct guest_info * info) {
725     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
726     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
727
728     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
729
730     if (info->vcpu_id == 0) {
731         info->core_run_state = CORE_RUNNING;
732     } else  { 
733         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
734
735         while (info->core_run_state == CORE_STOPPED) {
736             
737             if (info->vm_info->run_state == VM_STOPPED) {
738                 // The VM was stopped before this core was initialized. 
739                 return 0;
740             }
741
742             v3_yield(info);
743             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
744         }
745
746         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
747
748         // We'll be paranoid about race conditions here
749         v3_wait_at_barrier(info);
750     } 
751
752     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
753                info->vcpu_id, info->pcpu_id, 
754                info->segments.cs.selector, (void *)(info->segments.cs.base), 
755                info->segments.cs.limit, (void *)(info->rip));
756
757
758
759     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
760                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
761     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
762     
763     v3_start_time(info);
764
765     while (1) {
766
767         if (info->vm_info->run_state == VM_STOPPED) {
768             info->core_run_state = CORE_STOPPED;
769             break;
770         }
771         
772         if (v3_svm_enter(info) == -1) {
773             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
774             addr_t host_addr;
775             addr_t linear_addr = 0;
776             
777             info->vm_info->run_state = VM_ERROR;
778             
779             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
780             
781             v3_print_guest_state(info);
782             
783             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
784             
785             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
786             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
787             
788             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
789             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
790             
791             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
792             
793             if (info->mem_mode == PHYSICAL_MEM) {
794                 v3_gpa_to_hva(info, linear_addr, &host_addr);
795             } else if (info->mem_mode == VIRTUAL_MEM) {
796                 v3_gva_to_hva(info, linear_addr, &host_addr);
797             }
798             
799             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
800             
801             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
802             v3_dump_mem((uint8_t *)host_addr, 15);
803             
804             v3_print_stack(info);
805
806             break;
807         }
808
809         v3_wait_at_barrier(info);
810
811
812         if (info->vm_info->run_state == VM_STOPPED) {
813             info->core_run_state = CORE_STOPPED;
814             break;
815         }
816
817         
818
819 /*
820         if ((info->num_exits % 50000) == 0) {
821             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
822             v3_print_guest_state(info);
823         }
824 */
825         
826     }
827
828     // Need to take down the other cores on error... 
829
830     return 0;
831 }
832
833
834
835
836 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
837     // init vmcb_bios
838
839     // Write the RIP, CS, and descriptor
840     // assume the rest is already good to go
841     //
842     // vector VV -> rip at 0
843     //              CS = VV00
844     //  This means we start executing at linear address VV000
845     //
846     // So the selector needs to be VV00
847     // and the base needs to be VV000
848     //
849     core->rip = 0;
850     core->segments.cs.selector = rip << 8;
851     core->segments.cs.limit = 0xffff;
852     core->segments.cs.base = rip << 12;
853
854     return 0;
855 }
856
857
858
859
860
861
862 /* Checks machine SVM capability */
863 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
864 int v3_is_svm_capable() {
865     uint_t vm_cr_low = 0, vm_cr_high = 0;
866     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
867
868     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
869   
870     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
871
872     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
873       V3_Print("SVM Not Available\n");
874       return 0;
875     }  else {
876         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
877         
878         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
879         
880         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
881             V3_Print("SVM is available but is disabled.\n");
882             
883             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
884             
885             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
886             
887             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
888                 V3_Print("SVM BIOS Disabled, not unlockable\n");
889             } else {
890                 V3_Print("SVM is locked with a key\n");
891             }
892             return 0;
893
894         } else {
895             V3_Print("SVM is available and  enabled.\n");
896
897             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
898             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
899             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
900             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
901             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
902
903             return 1;
904         }
905     }
906 }
907
908 static int has_svm_nested_paging() {
909     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
910     
911     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
912     
913     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
914     
915     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
916         V3_Print("SVM Nested Paging not supported\n");
917         return 0;
918     } else {
919         V3_Print("SVM Nested Paging supported\n");
920         return 1;
921     }
922  }
923  
924
925
926 void v3_init_svm_cpu(int cpu_id) {
927     reg_ex_t msr;
928     extern v3_cpu_arch_t v3_cpu_types[];
929
930     // Enable SVM on the CPU
931     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
932     msr.e_reg.low |= EFER_MSR_svm_enable;
933     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
934
935     V3_Print("SVM Enabled\n");
936
937     // Setup the host state save area
938     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
939
940     /* 64-BIT-ISSUE */
941     //  msr.e_reg.high = 0;
942     //msr.e_reg.low = (uint_t)host_vmcb;
943     msr.r_reg = host_vmcbs[cpu_id];
944
945     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
946     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
947
948
949     if (has_svm_nested_paging() == 1) {
950         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
951     } else {
952         v3_cpu_types[cpu_id] = V3_SVM_CPU;
953     }
954 }
955
956
957
958 void v3_deinit_svm_cpu(int cpu_id) {
959     reg_ex_t msr;
960     extern v3_cpu_arch_t v3_cpu_types[];
961
962     // reset SVM_VM_HSAVE_PA_MSR
963     // Does setting it to NULL disable??
964     msr.r_reg = 0;
965     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
966
967     // Disable SVM?
968     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
969     msr.e_reg.low &= ~EFER_MSR_svm_enable;
970     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
971
972     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
973
974     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
975
976     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
977     return;
978 }
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029 #if 0
1030 /* 
1031  * Test VMSAVE/VMLOAD Latency 
1032  */
1033 #define vmsave ".byte 0x0F,0x01,0xDB ; "
1034 #define vmload ".byte 0x0F,0x01,0xDA ; "
1035 {
1036     uint32_t start_lo, start_hi;
1037     uint32_t end_lo, end_hi;
1038     uint64_t start, end;
1039     
1040     __asm__ __volatile__ (
1041                           "rdtsc ; "
1042                           "movl %%eax, %%esi ; "
1043                           "movl %%edx, %%edi ; "
1044                           "movq  %%rcx, %%rax ; "
1045                           vmsave
1046                           "rdtsc ; "
1047                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1048                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1049                           );
1050     
1051     start = start_hi;
1052     start <<= 32;
1053     start += start_lo;
1054     
1055     end = end_hi;
1056     end <<= 32;
1057     end += end_lo;
1058     
1059     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1060     
1061     __asm__ __volatile__ (
1062                           "rdtsc ; "
1063                           "movl %%eax, %%esi ; "
1064                           "movl %%edx, %%edi ; "
1065                           "movq  %%rcx, %%rax ; "
1066                           vmload
1067                           "rdtsc ; "
1068                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1069                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1070                               );
1071         
1072         start = start_hi;
1073         start <<= 32;
1074         start += start_lo;
1075
1076         end = end_hi;
1077         end <<= 32;
1078         end += end_lo;
1079
1080
1081         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1082     }
1083     /* End Latency Test */
1084
1085 #endif
1086
1087
1088
1089
1090
1091
1092
1093 #if 0
1094 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1095   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1096   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1097   uint_t i = 0;
1098
1099
1100   guest_state->rsp = vm_info.vm_regs.rsp;
1101   guest_state->rip = vm_info.rip;
1102
1103
1104   /* I pretty much just gutted this from TVMM */
1105   /* Note: That means its probably wrong */
1106
1107   // set the segment registers to mirror ours
1108   guest_state->cs.selector = 1<<3;
1109   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1110   guest_state->cs.attrib.fields.S = 1;
1111   guest_state->cs.attrib.fields.P = 1;
1112   guest_state->cs.attrib.fields.db = 1;
1113   guest_state->cs.attrib.fields.G = 1;
1114   guest_state->cs.limit = 0xfffff;
1115   guest_state->cs.base = 0;
1116   
1117   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1118   for ( i = 0; segregs[i] != NULL; i++) {
1119     struct vmcb_selector * seg = segregs[i];
1120     
1121     seg->selector = 2<<3;
1122     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1123     seg->attrib.fields.S = 1;
1124     seg->attrib.fields.P = 1;
1125     seg->attrib.fields.db = 1;
1126     seg->attrib.fields.G = 1;
1127     seg->limit = 0xfffff;
1128     seg->base = 0;
1129   }
1130
1131
1132   {
1133     /* JRL THIS HAS TO GO */
1134     
1135     //    guest_state->tr.selector = GetTR_Selector();
1136     guest_state->tr.attrib.fields.type = 0x9; 
1137     guest_state->tr.attrib.fields.P = 1;
1138     // guest_state->tr.limit = GetTR_Limit();
1139     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1140     /* ** */
1141   }
1142
1143
1144   /* ** */
1145
1146
1147   guest_state->efer |= EFER_MSR_svm_enable;
1148   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1149   ctrl_area->svm_instrs.VMRUN = 1;
1150   guest_state->cr0 = 0x00000001;    // PE 
1151   ctrl_area->guest_ASID = 1;
1152
1153
1154   //  guest_state->cpl = 0;
1155
1156
1157
1158   // Setup exits
1159
1160   ctrl_area->cr_writes.cr4 = 1;
1161   
1162   ctrl_area->exceptions.de = 1;
1163   ctrl_area->exceptions.df = 1;
1164   ctrl_area->exceptions.pf = 1;
1165   ctrl_area->exceptions.ts = 1;
1166   ctrl_area->exceptions.ss = 1;
1167   ctrl_area->exceptions.ac = 1;
1168   ctrl_area->exceptions.mc = 1;
1169   ctrl_area->exceptions.gp = 1;
1170   ctrl_area->exceptions.ud = 1;
1171   ctrl_area->exceptions.np = 1;
1172   ctrl_area->exceptions.of = 1;
1173   ctrl_area->exceptions.nmi = 1;
1174
1175   
1176
1177   ctrl_area->instrs.IOIO_PROT = 1;
1178   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1179   
1180   {
1181     reg_ex_t tmp_reg;
1182     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1183     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1184   }
1185
1186   ctrl_area->instrs.INTR = 1;
1187
1188   
1189   {
1190     char gdt_buf[6];
1191     char idt_buf[6];
1192
1193     memset(gdt_buf, 0, 6);
1194     memset(idt_buf, 0, 6);
1195
1196
1197     uint_t gdt_base, idt_base;
1198     ushort_t gdt_limit, idt_limit;
1199     
1200     GetGDTR(gdt_buf);
1201     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1202     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1203     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1204
1205     GetIDTR(idt_buf);
1206     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1207     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1208     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1209
1210
1211     // gdt_base -= 0x2000;
1212     //idt_base -= 0x2000;
1213
1214     guest_state->gdtr.base = gdt_base;
1215     guest_state->gdtr.limit = gdt_limit;
1216     guest_state->idtr.base = idt_base;
1217     guest_state->idtr.limit = idt_limit;
1218
1219
1220   }
1221   
1222   
1223   // also determine if CPU supports nested paging
1224   /*
1225   if (vm_info.page_tables) {
1226     //   if (0) {
1227     // Flush the TLB on entries/exits
1228     ctrl_area->TLB_CONTROL = 1;
1229
1230     // Enable Nested Paging
1231     ctrl_area->NP_ENABLE = 1;
1232
1233     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1234
1235         // Set the Nested Page Table pointer
1236     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1237
1238
1239     //   ctrl_area->N_CR3 = Get_CR3();
1240     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1241
1242     guest_state->g_pat = 0x7040600070406ULL;
1243
1244     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1245     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1246     // Enable Paging
1247     //    guest_state->cr0 |= 0x80000000;
1248   }
1249   */
1250
1251 }
1252
1253
1254
1255
1256
1257 #endif
1258
1259