Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


added dedicated debugging framework with associated interface
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39 #include <palacios/vmm_debug.h>
40
41 #ifdef V3_CONFIG_CHECKPOINT
42 #include <palacios/vmm_checkpoint.h>
43 #endif
44
45 #include <palacios/vmm_direct_paging.h>
46
47 #include <palacios/vmm_ctrl_regs.h>
48 #include <palacios/svm_io.h>
49
50 #include <palacios/vmm_sprintf.h>
51
52
53 #ifndef V3_CONFIG_DEBUG_SVM
54 #undef PrintDebug
55 #define PrintDebug(fmt, args...)
56 #endif
57
58
59 uint32_t v3_last_exit;
60
61 // This is a global pointer to the host's VMCB
62 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
63
64
65
66 extern void v3_stgi();
67 extern void v3_clgi();
68 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
69 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
70
71
72 static vmcb_t * Allocate_VMCB() {
73     vmcb_t * vmcb_page = NULL;
74     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
75
76     if ((void *)vmcb_pa == NULL) {
77         PrintError("Error allocating VMCB\n");
78         return NULL;
79     }
80
81     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
82
83     memset(vmcb_page, 0, 4096);
84
85     return vmcb_page;
86 }
87
88
89 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
90 {
91     int status;
92
93     // Call arch-independent handler
94     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
95         return status;
96     }
97
98     // SVM-specific code
99     {
100         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
101         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
102         hw_efer->svme = 1;
103     }
104
105     return 0;
106 }
107
108
109 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
110     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
111     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
112     uint_t i;
113
114
115     //
116     ctrl_area->svm_instrs.VMRUN = 1;
117     ctrl_area->svm_instrs.VMMCALL = 1;
118     ctrl_area->svm_instrs.VMLOAD = 1;
119     ctrl_area->svm_instrs.VMSAVE = 1;
120     ctrl_area->svm_instrs.STGI = 1;
121     ctrl_area->svm_instrs.CLGI = 1;
122     ctrl_area->svm_instrs.SKINIT = 1;
123     ctrl_area->svm_instrs.ICEBP = 1;
124     ctrl_area->svm_instrs.WBINVD = 1;
125     ctrl_area->svm_instrs.MONITOR = 1;
126     ctrl_area->svm_instrs.MWAIT_always = 1;
127     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
128     ctrl_area->instrs.INVLPGA = 1;
129     ctrl_area->instrs.CPUID = 1;
130
131     ctrl_area->instrs.HLT = 1;
132
133     /* Set at VMM launch as needed */
134     ctrl_area->instrs.RDTSC = 0;
135     ctrl_area->svm_instrs.RDTSCP = 0;
136
137     // guest_state->cr0 = 0x00000001;    // PE 
138   
139     /*
140       ctrl_area->exceptions.de = 1;
141       ctrl_area->exceptions.df = 1;
142       
143       ctrl_area->exceptions.ts = 1;
144       ctrl_area->exceptions.ss = 1;
145       ctrl_area->exceptions.ac = 1;
146       ctrl_area->exceptions.mc = 1;
147       ctrl_area->exceptions.gp = 1;
148       ctrl_area->exceptions.ud = 1;
149       ctrl_area->exceptions.np = 1;
150       ctrl_area->exceptions.of = 1;
151       
152       ctrl_area->exceptions.nmi = 1;
153     */
154     
155
156     ctrl_area->instrs.NMI = 1;
157     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
158     ctrl_area->instrs.INIT = 1;
159     //    ctrl_area->instrs.PAUSE = 1;
160     ctrl_area->instrs.shutdown_evts = 1;
161
162
163     /* DEBUG FOR RETURN CODE */
164     ctrl_area->exit_code = 1;
165
166
167     /* Setup Guest Machine state */
168
169     core->vm_regs.rsp = 0x00;
170     core->rip = 0xfff0;
171
172     core->vm_regs.rdx = 0x00000f00;
173
174
175     core->cpl = 0;
176
177     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
178     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
179     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
180
181
182
183
184
185     core->segments.cs.selector = 0xf000;
186     core->segments.cs.limit = 0xffff;
187     core->segments.cs.base = 0x0000000f0000LL;
188
189     // (raw attributes = 0xf3)
190     core->segments.cs.type = 0x3;
191     core->segments.cs.system = 0x1;
192     core->segments.cs.dpl = 0x3;
193     core->segments.cs.present = 1;
194
195
196
197     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
198                                       &(core->segments.es), &(core->segments.fs), 
199                                       &(core->segments.gs), NULL};
200
201     for ( i = 0; segregs[i] != NULL; i++) {
202         struct v3_segment * seg = segregs[i];
203         
204         seg->selector = 0x0000;
205         //    seg->base = seg->selector << 4;
206         seg->base = 0x00000000;
207         seg->limit = ~0u;
208
209         // (raw attributes = 0xf3)
210         seg->type = 0x3;
211         seg->system = 0x1;
212         seg->dpl = 0x3;
213         seg->present = 1;
214     }
215
216     core->segments.gdtr.limit = 0x0000ffff;
217     core->segments.gdtr.base = 0x0000000000000000LL;
218     core->segments.idtr.limit = 0x0000ffff;
219     core->segments.idtr.base = 0x0000000000000000LL;
220
221     core->segments.ldtr.selector = 0x0000;
222     core->segments.ldtr.limit = 0x0000ffff;
223     core->segments.ldtr.base = 0x0000000000000000LL;
224     core->segments.tr.selector = 0x0000;
225     core->segments.tr.limit = 0x0000ffff;
226     core->segments.tr.base = 0x0000000000000000LL;
227
228
229     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
230     core->dbg_regs.dr7 = 0x0000000000000400LL;
231
232
233     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
234     ctrl_area->instrs.IOIO_PROT = 1;
235             
236     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
237     ctrl_area->instrs.MSR_PROT = 1;   
238
239
240     PrintDebug("Exiting on interrupts\n");
241     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
242     ctrl_area->instrs.INTR = 1;
243
244
245     v3_hook_msr(core->vm_info, EFER_MSR, 
246                 &v3_handle_efer_read,
247                 &v3_svm_handle_efer_write, 
248                 core);
249
250     if (core->shdw_pg_mode == SHADOW_PAGING) {
251         PrintDebug("Creating initial shadow page table\n");
252         
253         /* JRL: This is a performance killer, and a simplistic solution */
254         /* We need to fix this */
255         ctrl_area->TLB_CONTROL = 1;
256         ctrl_area->guest_ASID = 1;
257         
258         
259         if (v3_init_passthrough_pts(core) == -1) {
260             PrintError("Could not initialize passthrough page tables\n");
261             return ;
262         }
263
264
265         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
266         PrintDebug("Created\n");
267         
268         core->ctrl_regs.cr0 |= 0x80000000;
269         core->ctrl_regs.cr3 = core->direct_map_pt;
270
271         ctrl_area->cr_reads.cr0 = 1;
272         ctrl_area->cr_writes.cr0 = 1;
273         //ctrl_area->cr_reads.cr4 = 1;
274         ctrl_area->cr_writes.cr4 = 1;
275         ctrl_area->cr_reads.cr3 = 1;
276         ctrl_area->cr_writes.cr3 = 1;
277
278
279
280         ctrl_area->instrs.INVLPG = 1;
281
282         ctrl_area->exceptions.pf = 1;
283
284         guest_state->g_pat = 0x7040600070406ULL;
285
286
287
288     } else if (core->shdw_pg_mode == NESTED_PAGING) {
289         // Flush the TLB on entries/exits
290         ctrl_area->TLB_CONTROL = 1;
291         ctrl_area->guest_ASID = 1;
292
293         // Enable Nested Paging
294         ctrl_area->NP_ENABLE = 1;
295
296         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
297
298         // Set the Nested Page Table pointer
299         if (v3_init_passthrough_pts(core) == -1) {
300             PrintError("Could not initialize Nested page tables\n");
301             return ;
302         }
303
304         ctrl_area->N_CR3 = core->direct_map_pt;
305
306         guest_state->g_pat = 0x7040600070406ULL;
307     }
308     
309     /* tell the guest that we don't support SVM */
310     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
311         &v3_handle_vm_cr_read,
312         &v3_handle_vm_cr_write, 
313         core);
314
315
316     {
317 #define INT_PENDING_AMD_MSR             0xc0010055
318
319         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
320         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
321         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
322         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
323         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
324
325         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
326         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
327         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
328
329
330         v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
331         v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
332
333         // Passthrough read operations are ok.
334         v3_hook_msr(core->vm_info, INT_PENDING_AMD_MSR, NULL, v3_msr_unhandled_write, NULL);
335     }
336 }
337
338
339 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
340
341     PrintDebug("Allocating VMCB\n");
342     core->vmm_data = (void *)Allocate_VMCB();
343     
344     if (core->vmm_data == NULL) {
345         PrintError("Could not allocate VMCB, Exiting...\n");
346         return -1;
347     }
348
349     if (vm_class == V3_PC_VM) {
350         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
351         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
352     } else {
353         PrintError("Invalid VM class\n");
354         return -1;
355     }
356
357     core->core_run_state = CORE_STOPPED;
358
359     return 0;
360 }
361
362
363 int v3_deinit_svm_vmcb(struct guest_info * core) {
364     V3_FreePages(V3_PAddr(core->vmm_data), 1);
365     return 0;
366 }
367
368
369 #ifdef V3_CONFIG_CHECKPOINT
370 int v3_svm_save_core(struct guest_info * core, void * ctx){
371
372     if (v3_chkpt_save_8(ctx, "cpl", &(core->cpl)) == -1) { 
373         PrintError("Could not save SVM cpl\n");
374         return -1;
375     }
376
377     if (v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) { 
378         PrintError("Could not save SVM vmcb\n");
379         return -1;
380     }
381
382     return 0;
383 }
384
385 int v3_svm_load_core(struct guest_info * core, void * ctx){
386     
387     if (v3_chkpt_load_8(ctx, "cpl", &(core->cpl)) == -1) { 
388         PrintError("Could not load SVM cpl\n");
389         return -1;
390     }
391
392     if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
393         return -1;
394     }
395
396     return 0;
397 }
398 #endif
399
400 static int update_irq_exit_state(struct guest_info * info) {
401     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
402
403     // Fix for QEMU bug using EVENTINJ as an internal cache
404     guest_ctrl->EVENTINJ.valid = 0;
405
406     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
407         
408 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
409         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
410 #endif
411
412         info->intr_core_state.irq_started = 1;
413         info->intr_core_state.irq_pending = 0;
414
415         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
416     }
417
418     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
419 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
420         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
421 #endif
422
423         // Interrupt was taken fully vectored
424         info->intr_core_state.irq_started = 0;
425
426     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
427 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
428         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
429 #endif
430     }
431
432     return 0;
433 }
434
435
436 static int update_irq_entry_state(struct guest_info * info) {
437     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
438
439
440     if (info->intr_core_state.irq_pending == 0) {
441         guest_ctrl->guest_ctrl.V_IRQ = 0;
442         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
443     }
444     
445     if (v3_excp_pending(info)) {
446         uint_t excp = v3_get_excp_number(info);
447         
448         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
449         
450         if (info->excp_state.excp_error_code_valid) {
451             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
452             guest_ctrl->EVENTINJ.ev = 1;
453 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
454             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
455 #endif
456         }
457         
458         guest_ctrl->EVENTINJ.vector = excp;
459         
460         guest_ctrl->EVENTINJ.valid = 1;
461
462 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
463         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
464                    (int)info->num_exits, 
465                    guest_ctrl->EVENTINJ.vector, 
466                    (void *)(addr_t)info->ctrl_regs.cr2,
467                    (void *)(addr_t)info->rip);
468 #endif
469
470         v3_injecting_excp(info, excp);
471     } else if (info->intr_core_state.irq_started == 1) {
472 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
473         PrintDebug("IRQ pending from previous injection\n");
474 #endif
475         guest_ctrl->guest_ctrl.V_IRQ = 1;
476         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
477         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
478         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
479
480     } else {
481         switch (v3_intr_pending(info)) {
482             case V3_EXTERNAL_IRQ: {
483                 uint32_t irq = v3_get_intr(info);
484
485                 guest_ctrl->guest_ctrl.V_IRQ = 1;
486                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
487                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
488                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
489
490 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
491                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
492                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
493                            (void *)(addr_t)info->rip);
494 #endif
495
496                 info->intr_core_state.irq_pending = 1;
497                 info->intr_core_state.irq_vector = irq;
498                 
499                 break;
500             }
501             case V3_NMI:
502                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
503                 break;
504             case V3_SOFTWARE_INTR:
505                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
506
507 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
508                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
509                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
510 #endif
511                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
512                 guest_ctrl->EVENTINJ.valid = 1;
513             
514                 /* reset swintr state */
515                 info->intr_core_state.swintr_posted = 0;
516                 info->intr_core_state.swintr_vector = 0;
517                 
518                 break;
519             case V3_VIRTUAL_IRQ:
520                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
521                 break;
522
523             case V3_INVALID_INTR:
524             default:
525                 break;
526         }
527         
528     }
529
530     return 0;
531 }
532
533 int 
534 v3_svm_config_tsc_virtualization(struct guest_info * info) {
535     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
536
537     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
538         ctrl_area->instrs.RDTSC = 1;
539         ctrl_area->svm_instrs.RDTSCP = 1;
540     } else {
541         ctrl_area->instrs.RDTSC = 0;
542         ctrl_area->svm_instrs.RDTSCP = 0;
543         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
544                 ctrl_area->TSC_OFFSET = 0;
545         } else {
546                 ctrl_area->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
547         }
548     }
549     return 0;
550 }
551
552 /* 
553  * CAUTION and DANGER!!! 
554  * 
555  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
556  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
557  * on its contents will cause things to break. The contents at the time of the exit WILL 
558  * change before the exit handler is executed.
559  */
560 int v3_svm_enter(struct guest_info * info) {
561     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
562     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
563     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
564     uint64_t guest_cycles = 0;
565
566     // Conditionally yield the CPU if the timeslice has expired
567     v3_yield_cond(info);
568
569     // Update timer devices after being in the VM before doing 
570     // IRQ updates, so that any interrupts they raise get seen 
571     // immediately.
572     v3_advance_time(info, NULL);
573     v3_update_timers(info);
574
575     // disable global interrupts for vm state transition
576     v3_clgi();
577
578     // Synchronize the guest state to the VMCB
579     guest_state->cr0 = info->ctrl_regs.cr0;
580     guest_state->cr2 = info->ctrl_regs.cr2;
581     guest_state->cr3 = info->ctrl_regs.cr3;
582     guest_state->cr4 = info->ctrl_regs.cr4;
583     guest_state->dr6 = info->dbg_regs.dr6;
584     guest_state->dr7 = info->dbg_regs.dr7;
585     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
586     guest_state->rflags = info->ctrl_regs.rflags;
587     guest_state->efer = info->ctrl_regs.efer;
588     
589     /* Synchronize MSRs */
590     guest_state->star = info->msrs.star;
591     guest_state->lstar = info->msrs.lstar;
592     guest_state->sfmask = info->msrs.sfmask;
593     guest_state->KernelGsBase = info->msrs.kern_gs_base;
594
595     guest_state->cpl = info->cpl;
596
597     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
598
599     guest_state->rax = info->vm_regs.rax;
600     guest_state->rip = info->rip;
601     guest_state->rsp = info->vm_regs.rsp;
602
603 #ifdef V3_CONFIG_SYMCALL
604     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
605         update_irq_entry_state(info);
606     }
607 #else 
608     update_irq_entry_state(info);
609 #endif
610
611
612     /* ** */
613
614     /*
615       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
616       (void *)(addr_t)info->segments.cs.base, 
617       (void *)(addr_t)info->rip);
618     */
619
620 #ifdef V3_CONFIG_SYMCALL
621     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
622         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
623             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
624         }
625     }
626 #endif
627
628     v3_svm_config_tsc_virtualization(info);
629
630     //V3_Print("Calling v3_svm_launch\n");
631     {   
632         uint64_t entry_tsc = 0;
633         uint64_t exit_tsc = 0;
634         
635         rdtscll(entry_tsc);
636
637         v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
638
639         rdtscll(exit_tsc);
640
641         guest_cycles = exit_tsc - entry_tsc;
642     }
643
644
645     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
646
647     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
648
649     v3_advance_time(info, &guest_cycles);
650
651     info->num_exits++;
652
653     // Save Guest state from VMCB
654     info->rip = guest_state->rip;
655     info->vm_regs.rsp = guest_state->rsp;
656     info->vm_regs.rax = guest_state->rax;
657
658     info->cpl = guest_state->cpl;
659
660     info->ctrl_regs.cr0 = guest_state->cr0;
661     info->ctrl_regs.cr2 = guest_state->cr2;
662     info->ctrl_regs.cr3 = guest_state->cr3;
663     info->ctrl_regs.cr4 = guest_state->cr4;
664     info->dbg_regs.dr6 = guest_state->dr6;
665     info->dbg_regs.dr7 = guest_state->dr7;
666     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
667     info->ctrl_regs.rflags = guest_state->rflags;
668     info->ctrl_regs.efer = guest_state->efer;
669     
670     /* Synchronize MSRs */
671     info->msrs.star =  guest_state->star;
672     info->msrs.lstar = guest_state->lstar;
673     info->msrs.sfmask = guest_state->sfmask;
674     info->msrs.kern_gs_base = guest_state->KernelGsBase;
675
676     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
677     info->cpu_mode = v3_get_vm_cpu_mode(info);
678     info->mem_mode = v3_get_vm_mem_mode(info);
679     /* ** */
680
681     // save exit info here
682     exit_code = guest_ctrl->exit_code;
683     exit_info1 = guest_ctrl->exit_info1;
684     exit_info2 = guest_ctrl->exit_info2;
685
686 #ifdef V3_CONFIG_SYMCALL
687     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
688         update_irq_exit_state(info);
689     }
690 #else
691     update_irq_exit_state(info);
692 #endif
693
694     // reenable global interrupts after vm exit
695     v3_stgi();
696  
697     // Conditionally yield the CPU if the timeslice has expired
698     v3_yield_cond(info);
699
700     // This update timers is for time-dependent handlers
701     // if we're slaved to host time
702     v3_advance_time(info, NULL);
703     v3_update_timers(info);
704
705     {
706         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
707         
708         if (ret != 0) {
709             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
710             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
711             return -1;
712         }
713     }
714
715     if (info->timeouts.timeout_active) {
716         /* Check to see if any timeouts have expired */
717         v3_handle_timeouts(info, guest_cycles);
718     }
719
720
721     return 0;
722 }
723
724
725 int v3_start_svm_guest(struct guest_info * info) {
726     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
727     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
728
729     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
730
731     if (info->vcpu_id == 0) {
732         info->core_run_state = CORE_RUNNING;
733     } else  { 
734         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
735
736         while (info->core_run_state == CORE_STOPPED) {
737             
738             if (info->vm_info->run_state == VM_STOPPED) {
739                 // The VM was stopped before this core was initialized. 
740                 return 0;
741             }
742
743             v3_yield(info);
744             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
745         }
746
747         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
748
749         // We'll be paranoid about race conditions here
750         v3_wait_at_barrier(info);
751     } 
752
753     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
754                info->vcpu_id, info->pcpu_id, 
755                info->segments.cs.selector, (void *)(info->segments.cs.base), 
756                info->segments.cs.limit, (void *)(info->rip));
757
758
759
760     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
761                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
762     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
763     
764     v3_start_time(info);
765
766     while (1) {
767
768         if (info->vm_info->run_state == VM_STOPPED) {
769             info->core_run_state = CORE_STOPPED;
770             break;
771         }
772         
773         if (v3_svm_enter(info) == -1) {
774             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
775             addr_t host_addr;
776             addr_t linear_addr = 0;
777             
778             info->vm_info->run_state = VM_ERROR;
779             
780             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
781             
782             v3_print_guest_state(info);
783             
784             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
785             
786             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
787             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
788             
789             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
790             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
791             
792             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
793             
794             if (info->mem_mode == PHYSICAL_MEM) {
795                 v3_gpa_to_hva(info, linear_addr, &host_addr);
796             } else if (info->mem_mode == VIRTUAL_MEM) {
797                 v3_gva_to_hva(info, linear_addr, &host_addr);
798             }
799             
800             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
801             
802             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
803             v3_dump_mem((uint8_t *)host_addr, 15);
804             
805             v3_print_stack(info);
806
807             break;
808         }
809
810         v3_wait_at_barrier(info);
811
812
813         if (info->vm_info->run_state == VM_STOPPED) {
814             info->core_run_state = CORE_STOPPED;
815             break;
816         }
817
818         
819
820 /*
821         if ((info->num_exits % 50000) == 0) {
822             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
823             v3_print_guest_state(info);
824         }
825 */
826         
827     }
828
829     // Need to take down the other cores on error... 
830
831     return 0;
832 }
833
834
835
836
837 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
838     // init vmcb_bios
839
840     // Write the RIP, CS, and descriptor
841     // assume the rest is already good to go
842     //
843     // vector VV -> rip at 0
844     //              CS = VV00
845     //  This means we start executing at linear address VV000
846     //
847     // So the selector needs to be VV00
848     // and the base needs to be VV000
849     //
850     core->rip = 0;
851     core->segments.cs.selector = rip << 8;
852     core->segments.cs.limit = 0xffff;
853     core->segments.cs.base = rip << 12;
854
855     return 0;
856 }
857
858
859
860
861
862
863 /* Checks machine SVM capability */
864 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
865 int v3_is_svm_capable() {
866     uint_t vm_cr_low = 0, vm_cr_high = 0;
867     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
868
869     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
870   
871     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
872
873     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
874       V3_Print("SVM Not Available\n");
875       return 0;
876     }  else {
877         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
878         
879         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
880         
881         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
882             V3_Print("SVM is available but is disabled.\n");
883             
884             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
885             
886             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
887             
888             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
889                 V3_Print("SVM BIOS Disabled, not unlockable\n");
890             } else {
891                 V3_Print("SVM is locked with a key\n");
892             }
893             return 0;
894
895         } else {
896             V3_Print("SVM is available and  enabled.\n");
897
898             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
899             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
900             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
901             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
902             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
903
904             return 1;
905         }
906     }
907 }
908
909 static int has_svm_nested_paging() {
910     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
911     
912     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
913     
914     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
915     
916     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
917         V3_Print("SVM Nested Paging not supported\n");
918         return 0;
919     } else {
920         V3_Print("SVM Nested Paging supported\n");
921         return 1;
922     }
923  }
924  
925
926
927 void v3_init_svm_cpu(int cpu_id) {
928     reg_ex_t msr;
929     extern v3_cpu_arch_t v3_cpu_types[];
930
931     // Enable SVM on the CPU
932     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
933     msr.e_reg.low |= EFER_MSR_svm_enable;
934     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
935
936     V3_Print("SVM Enabled\n");
937
938     // Setup the host state save area
939     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
940
941     /* 64-BIT-ISSUE */
942     //  msr.e_reg.high = 0;
943     //msr.e_reg.low = (uint_t)host_vmcb;
944     msr.r_reg = host_vmcbs[cpu_id];
945
946     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
947     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
948
949
950     if (has_svm_nested_paging() == 1) {
951         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
952     } else {
953         v3_cpu_types[cpu_id] = V3_SVM_CPU;
954     }
955 }
956
957
958
959 void v3_deinit_svm_cpu(int cpu_id) {
960     reg_ex_t msr;
961     extern v3_cpu_arch_t v3_cpu_types[];
962
963     // reset SVM_VM_HSAVE_PA_MSR
964     // Does setting it to NULL disable??
965     msr.r_reg = 0;
966     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
967
968     // Disable SVM?
969     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
970     msr.e_reg.low &= ~EFER_MSR_svm_enable;
971     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
972
973     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
974
975     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
976
977     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
978     return;
979 }
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030 #if 0
1031 /* 
1032  * Test VMSAVE/VMLOAD Latency 
1033  */
1034 #define vmsave ".byte 0x0F,0x01,0xDB ; "
1035 #define vmload ".byte 0x0F,0x01,0xDA ; "
1036 {
1037     uint32_t start_lo, start_hi;
1038     uint32_t end_lo, end_hi;
1039     uint64_t start, end;
1040     
1041     __asm__ __volatile__ (
1042                           "rdtsc ; "
1043                           "movl %%eax, %%esi ; "
1044                           "movl %%edx, %%edi ; "
1045                           "movq  %%rcx, %%rax ; "
1046                           vmsave
1047                           "rdtsc ; "
1048                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1049                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1050                           );
1051     
1052     start = start_hi;
1053     start <<= 32;
1054     start += start_lo;
1055     
1056     end = end_hi;
1057     end <<= 32;
1058     end += end_lo;
1059     
1060     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1061     
1062     __asm__ __volatile__ (
1063                           "rdtsc ; "
1064                           "movl %%eax, %%esi ; "
1065                           "movl %%edx, %%edi ; "
1066                           "movq  %%rcx, %%rax ; "
1067                           vmload
1068                           "rdtsc ; "
1069                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1070                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1071                               );
1072         
1073         start = start_hi;
1074         start <<= 32;
1075         start += start_lo;
1076
1077         end = end_hi;
1078         end <<= 32;
1079         end += end_lo;
1080
1081
1082         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1083     }
1084     /* End Latency Test */
1085
1086 #endif
1087
1088
1089
1090
1091
1092
1093
1094 #if 0
1095 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1096   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1097   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1098   uint_t i = 0;
1099
1100
1101   guest_state->rsp = vm_info.vm_regs.rsp;
1102   guest_state->rip = vm_info.rip;
1103
1104
1105   /* I pretty much just gutted this from TVMM */
1106   /* Note: That means its probably wrong */
1107
1108   // set the segment registers to mirror ours
1109   guest_state->cs.selector = 1<<3;
1110   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1111   guest_state->cs.attrib.fields.S = 1;
1112   guest_state->cs.attrib.fields.P = 1;
1113   guest_state->cs.attrib.fields.db = 1;
1114   guest_state->cs.attrib.fields.G = 1;
1115   guest_state->cs.limit = 0xfffff;
1116   guest_state->cs.base = 0;
1117   
1118   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1119   for ( i = 0; segregs[i] != NULL; i++) {
1120     struct vmcb_selector * seg = segregs[i];
1121     
1122     seg->selector = 2<<3;
1123     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1124     seg->attrib.fields.S = 1;
1125     seg->attrib.fields.P = 1;
1126     seg->attrib.fields.db = 1;
1127     seg->attrib.fields.G = 1;
1128     seg->limit = 0xfffff;
1129     seg->base = 0;
1130   }
1131
1132
1133   {
1134     /* JRL THIS HAS TO GO */
1135     
1136     //    guest_state->tr.selector = GetTR_Selector();
1137     guest_state->tr.attrib.fields.type = 0x9; 
1138     guest_state->tr.attrib.fields.P = 1;
1139     // guest_state->tr.limit = GetTR_Limit();
1140     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1141     /* ** */
1142   }
1143
1144
1145   /* ** */
1146
1147
1148   guest_state->efer |= EFER_MSR_svm_enable;
1149   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1150   ctrl_area->svm_instrs.VMRUN = 1;
1151   guest_state->cr0 = 0x00000001;    // PE 
1152   ctrl_area->guest_ASID = 1;
1153
1154
1155   //  guest_state->cpl = 0;
1156
1157
1158
1159   // Setup exits
1160
1161   ctrl_area->cr_writes.cr4 = 1;
1162   
1163   ctrl_area->exceptions.de = 1;
1164   ctrl_area->exceptions.df = 1;
1165   ctrl_area->exceptions.pf = 1;
1166   ctrl_area->exceptions.ts = 1;
1167   ctrl_area->exceptions.ss = 1;
1168   ctrl_area->exceptions.ac = 1;
1169   ctrl_area->exceptions.mc = 1;
1170   ctrl_area->exceptions.gp = 1;
1171   ctrl_area->exceptions.ud = 1;
1172   ctrl_area->exceptions.np = 1;
1173   ctrl_area->exceptions.of = 1;
1174   ctrl_area->exceptions.nmi = 1;
1175
1176   
1177
1178   ctrl_area->instrs.IOIO_PROT = 1;
1179   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1180   
1181   {
1182     reg_ex_t tmp_reg;
1183     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1184     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1185   }
1186
1187   ctrl_area->instrs.INTR = 1;
1188
1189   
1190   {
1191     char gdt_buf[6];
1192     char idt_buf[6];
1193
1194     memset(gdt_buf, 0, 6);
1195     memset(idt_buf, 0, 6);
1196
1197
1198     uint_t gdt_base, idt_base;
1199     ushort_t gdt_limit, idt_limit;
1200     
1201     GetGDTR(gdt_buf);
1202     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1203     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1204     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1205
1206     GetIDTR(idt_buf);
1207     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1208     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1209     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1210
1211
1212     // gdt_base -= 0x2000;
1213     //idt_base -= 0x2000;
1214
1215     guest_state->gdtr.base = gdt_base;
1216     guest_state->gdtr.limit = gdt_limit;
1217     guest_state->idtr.base = idt_base;
1218     guest_state->idtr.limit = idt_limit;
1219
1220
1221   }
1222   
1223   
1224   // also determine if CPU supports nested paging
1225   /*
1226   if (vm_info.page_tables) {
1227     //   if (0) {
1228     // Flush the TLB on entries/exits
1229     ctrl_area->TLB_CONTROL = 1;
1230
1231     // Enable Nested Paging
1232     ctrl_area->NP_ENABLE = 1;
1233
1234     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1235
1236         // Set the Nested Page Table pointer
1237     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1238
1239
1240     //   ctrl_area->N_CR3 = Get_CR3();
1241     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1242
1243     guest_state->g_pat = 0x7040600070406ULL;
1244
1245     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1246     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1247     // Enable Paging
1248     //    guest_state->cr0 |= 0x80000000;
1249   }
1250   */
1251
1252 }
1253
1254
1255
1256
1257
1258 #endif
1259
1260