Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


initial implementation of hookable exits
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39 #include <palacios/vmm_debug.h>
40
41 #ifdef V3_CONFIG_CHECKPOINT
42 #include <palacios/vmm_checkpoint.h>
43 #endif
44
45 #include <palacios/vmm_direct_paging.h>
46
47 #include <palacios/vmm_ctrl_regs.h>
48 #include <palacios/svm_io.h>
49
50 #include <palacios/vmm_sprintf.h>
51
52
53 #ifndef V3_CONFIG_DEBUG_SVM
54 #undef PrintDebug
55 #define PrintDebug(fmt, args...)
56 #endif
57
58
59 uint32_t v3_last_exit;
60
61 // This is a global pointer to the host's VMCB
62 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
63
64
65
66 extern void v3_stgi();
67 extern void v3_clgi();
68 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
69 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
70
71
72 static vmcb_t * Allocate_VMCB() {
73     vmcb_t * vmcb_page = NULL;
74     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
75
76     if ((void *)vmcb_pa == NULL) {
77         PrintError("Error allocating VMCB\n");
78         return NULL;
79     }
80
81     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
82
83     memset(vmcb_page, 0, 4096);
84
85     return vmcb_page;
86 }
87
88
89 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
90 {
91     int status;
92
93     // Call arch-independent handler
94     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
95         return status;
96     }
97
98     // SVM-specific code
99     {
100         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
101         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
102         hw_efer->svme = 1;
103     }
104
105     return 0;
106 }
107
108
109 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
110     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
111     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
112     uint_t i;
113
114
115     //
116     ctrl_area->svm_instrs.VMRUN = 1;
117     ctrl_area->svm_instrs.VMMCALL = 1;
118     ctrl_area->svm_instrs.VMLOAD = 1;
119     ctrl_area->svm_instrs.VMSAVE = 1;
120     ctrl_area->svm_instrs.STGI = 1;
121     ctrl_area->svm_instrs.CLGI = 1;
122     ctrl_area->svm_instrs.SKINIT = 1;
123     ctrl_area->svm_instrs.ICEBP = 1;
124     ctrl_area->svm_instrs.WBINVD = 1;
125     ctrl_area->svm_instrs.MONITOR = 1;
126     ctrl_area->svm_instrs.MWAIT_always = 1;
127     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
128     ctrl_area->instrs.INVLPGA = 1;
129     ctrl_area->instrs.CPUID = 1;
130
131     ctrl_area->instrs.HLT = 1;
132
133     /* Set at VMM launch as needed */
134     ctrl_area->instrs.RDTSC = 0;
135     ctrl_area->svm_instrs.RDTSCP = 0;
136
137     // guest_state->cr0 = 0x00000001;    // PE 
138   
139     /*
140       ctrl_area->exceptions.de = 1;
141       ctrl_area->exceptions.df = 1;
142       
143       ctrl_area->exceptions.ts = 1;
144       ctrl_area->exceptions.ss = 1;
145       ctrl_area->exceptions.ac = 1;
146       ctrl_area->exceptions.mc = 1;
147       ctrl_area->exceptions.gp = 1;
148       ctrl_area->exceptions.ud = 1;
149       ctrl_area->exceptions.np = 1;
150       ctrl_area->exceptions.of = 1;
151       
152       ctrl_area->exceptions.nmi = 1;
153     */
154     
155
156     ctrl_area->instrs.NMI = 1;
157     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
158     ctrl_area->instrs.INIT = 1;
159     //    ctrl_area->instrs.PAUSE = 1;
160     ctrl_area->instrs.shutdown_evts = 1;
161
162
163     /* DEBUG FOR RETURN CODE */
164     ctrl_area->exit_code = 1;
165
166
167     /* Setup Guest Machine state */
168
169     core->vm_regs.rsp = 0x00;
170     core->rip = 0xfff0;
171
172     core->vm_regs.rdx = 0x00000f00;
173
174
175     core->cpl = 0;
176
177     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
178     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
179     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
180
181
182
183
184
185     core->segments.cs.selector = 0xf000;
186     core->segments.cs.limit = 0xffff;
187     core->segments.cs.base = 0x0000000f0000LL;
188
189     // (raw attributes = 0xf3)
190     core->segments.cs.type = 0x3;
191     core->segments.cs.system = 0x1;
192     core->segments.cs.dpl = 0x3;
193     core->segments.cs.present = 1;
194
195
196
197     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
198                                       &(core->segments.es), &(core->segments.fs), 
199                                       &(core->segments.gs), NULL};
200
201     for ( i = 0; segregs[i] != NULL; i++) {
202         struct v3_segment * seg = segregs[i];
203         
204         seg->selector = 0x0000;
205         //    seg->base = seg->selector << 4;
206         seg->base = 0x00000000;
207         seg->limit = ~0u;
208
209         // (raw attributes = 0xf3)
210         seg->type = 0x3;
211         seg->system = 0x1;
212         seg->dpl = 0x3;
213         seg->present = 1;
214     }
215
216     core->segments.gdtr.limit = 0x0000ffff;
217     core->segments.gdtr.base = 0x0000000000000000LL;
218     core->segments.idtr.limit = 0x0000ffff;
219     core->segments.idtr.base = 0x0000000000000000LL;
220
221     core->segments.ldtr.selector = 0x0000;
222     core->segments.ldtr.limit = 0x0000ffff;
223     core->segments.ldtr.base = 0x0000000000000000LL;
224     core->segments.tr.selector = 0x0000;
225     core->segments.tr.limit = 0x0000ffff;
226     core->segments.tr.base = 0x0000000000000000LL;
227
228
229     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
230     core->dbg_regs.dr7 = 0x0000000000000400LL;
231
232
233     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
234     ctrl_area->instrs.IOIO_PROT = 1;
235             
236     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
237     ctrl_area->instrs.MSR_PROT = 1;   
238
239
240     PrintDebug("Exiting on interrupts\n");
241     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
242     ctrl_area->instrs.INTR = 1;
243
244
245     v3_hook_msr(core->vm_info, EFER_MSR, 
246                 &v3_handle_efer_read,
247                 &v3_svm_handle_efer_write, 
248                 core);
249
250     if (core->shdw_pg_mode == SHADOW_PAGING) {
251         PrintDebug("Creating initial shadow page table\n");
252         
253         /* JRL: This is a performance killer, and a simplistic solution */
254         /* We need to fix this */
255         ctrl_area->TLB_CONTROL = 1;
256         ctrl_area->guest_ASID = 1;
257         
258         
259         if (v3_init_passthrough_pts(core) == -1) {
260             PrintError("Could not initialize passthrough page tables\n");
261             return ;
262         }
263
264
265         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
266         PrintDebug("Created\n");
267         
268         core->ctrl_regs.cr0 |= 0x80000000;
269         core->ctrl_regs.cr3 = core->direct_map_pt;
270
271         ctrl_area->cr_reads.cr0 = 1;
272         ctrl_area->cr_writes.cr0 = 1;
273         //ctrl_area->cr_reads.cr4 = 1;
274         ctrl_area->cr_writes.cr4 = 1;
275         ctrl_area->cr_reads.cr3 = 1;
276         ctrl_area->cr_writes.cr3 = 1;
277
278
279
280         ctrl_area->instrs.INVLPG = 1;
281
282         ctrl_area->exceptions.pf = 1;
283
284         guest_state->g_pat = 0x7040600070406ULL;
285
286
287
288     } else if (core->shdw_pg_mode == NESTED_PAGING) {
289         // Flush the TLB on entries/exits
290         ctrl_area->TLB_CONTROL = 1;
291         ctrl_area->guest_ASID = 1;
292
293         // Enable Nested Paging
294         ctrl_area->NP_ENABLE = 1;
295
296         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
297
298         // Set the Nested Page Table pointer
299         if (v3_init_passthrough_pts(core) == -1) {
300             PrintError("Could not initialize Nested page tables\n");
301             return ;
302         }
303
304         ctrl_area->N_CR3 = core->direct_map_pt;
305
306         guest_state->g_pat = 0x7040600070406ULL;
307     }
308     
309     /* tell the guest that we don't support SVM */
310     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
311         &v3_handle_vm_cr_read,
312         &v3_handle_vm_cr_write, 
313         core);
314
315
316     {
317 #define INT_PENDING_AMD_MSR             0xc0010055
318
319         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
320         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
321         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
322         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
323         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
324
325         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
326         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
327         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
328
329
330         v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
331         v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
332
333         // Passthrough read operations are ok.
334         v3_hook_msr(core->vm_info, INT_PENDING_AMD_MSR, NULL, v3_msr_unhandled_write, NULL);
335     }
336 }
337
338
339 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
340
341     PrintDebug("Allocating VMCB\n");
342     core->vmm_data = (void *)Allocate_VMCB();
343     
344     if (core->vmm_data == NULL) {
345         PrintError("Could not allocate VMCB, Exiting...\n");
346         return -1;
347     }
348
349     if (vm_class == V3_PC_VM) {
350         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
351         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
352     } else {
353         PrintError("Invalid VM class\n");
354         return -1;
355     }
356
357     core->core_run_state = CORE_STOPPED;
358
359     return 0;
360 }
361
362
363 int v3_deinit_svm_vmcb(struct guest_info * core) {
364     V3_FreePages(V3_PAddr(core->vmm_data), 1);
365     return 0;
366 }
367
368
369 #ifdef V3_CONFIG_CHECKPOINT
370 int v3_svm_save_core(struct guest_info * core, void * ctx){
371
372     if (v3_chkpt_save_8(ctx, "cpl", &(core->cpl)) == -1) { 
373         PrintError("Could not save SVM cpl\n");
374         return -1;
375     }
376
377     if (v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) { 
378         PrintError("Could not save SVM vmcb\n");
379         return -1;
380     }
381
382     return 0;
383 }
384
385 int v3_svm_load_core(struct guest_info * core, void * ctx){
386     
387     if (v3_chkpt_load_8(ctx, "cpl", &(core->cpl)) == -1) { 
388         PrintError("Could not load SVM cpl\n");
389         return -1;
390     }
391
392     if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
393         return -1;
394     }
395
396     return 0;
397 }
398 #endif
399
400 static int update_irq_exit_state(struct guest_info * info) {
401     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
402
403     // Fix for QEMU bug using EVENTINJ as an internal cache
404     guest_ctrl->EVENTINJ.valid = 0;
405
406     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
407         
408 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
409         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
410 #endif
411
412         info->intr_core_state.irq_started = 1;
413         info->intr_core_state.irq_pending = 0;
414
415         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
416     }
417
418     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
419 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
420         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
421 #endif
422
423         // Interrupt was taken fully vectored
424         info->intr_core_state.irq_started = 0;
425
426     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
427 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
428         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
429 #endif
430     }
431
432     return 0;
433 }
434
435
436 static int update_irq_entry_state(struct guest_info * info) {
437     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
438
439
440     if (info->intr_core_state.irq_pending == 0) {
441         guest_ctrl->guest_ctrl.V_IRQ = 0;
442         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
443     }
444     
445     if (v3_excp_pending(info)) {
446         uint_t excp = v3_get_excp_number(info);
447         
448         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
449         
450         if (info->excp_state.excp_error_code_valid) {
451             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
452             guest_ctrl->EVENTINJ.ev = 1;
453 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
454             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
455 #endif
456         }
457         
458         guest_ctrl->EVENTINJ.vector = excp;
459         
460         guest_ctrl->EVENTINJ.valid = 1;
461
462 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
463         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
464                    (int)info->num_exits, 
465                    guest_ctrl->EVENTINJ.vector, 
466                    (void *)(addr_t)info->ctrl_regs.cr2,
467                    (void *)(addr_t)info->rip);
468 #endif
469
470         v3_injecting_excp(info, excp);
471     } else if (info->intr_core_state.irq_started == 1) {
472 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
473         PrintDebug("IRQ pending from previous injection\n");
474 #endif
475         guest_ctrl->guest_ctrl.V_IRQ = 1;
476         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
477         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
478         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
479
480     } else {
481         switch (v3_intr_pending(info)) {
482             case V3_EXTERNAL_IRQ: {
483                 uint32_t irq = v3_get_intr(info);
484
485                 guest_ctrl->guest_ctrl.V_IRQ = 1;
486                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
487                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
488                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
489
490 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
491                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
492                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
493                            (void *)(addr_t)info->rip);
494 #endif
495
496                 info->intr_core_state.irq_pending = 1;
497                 info->intr_core_state.irq_vector = irq;
498                 
499                 break;
500             }
501             case V3_NMI:
502                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
503                 break;
504             case V3_SOFTWARE_INTR:
505                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
506
507 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
508                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
509                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
510 #endif
511                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
512                 guest_ctrl->EVENTINJ.valid = 1;
513             
514                 /* reset swintr state */
515                 info->intr_core_state.swintr_posted = 0;
516                 info->intr_core_state.swintr_vector = 0;
517                 
518                 break;
519             case V3_VIRTUAL_IRQ:
520                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
521                 break;
522
523             case V3_INVALID_INTR:
524             default:
525                 break;
526         }
527         
528     }
529
530     return 0;
531 }
532
533 int 
534 v3_svm_config_tsc_virtualization(struct guest_info * info) {
535     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
536
537     if (!(info->time_state.flags & VM_TIME_TRAP_RDTSC)) {
538         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
539                 ctrl_area->TSC_OFFSET = 0;
540         } else {
541                 ctrl_area->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
542         }
543     }
544     return 0;
545 }
546
547 /* 
548  * CAUTION and DANGER!!! 
549  * 
550  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
551  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
552  * on its contents will cause things to break. The contents at the time of the exit WILL 
553  * change before the exit handler is executed.
554  */
555 int v3_svm_enter(struct guest_info * info) {
556     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
557     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
558     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
559     uint64_t guest_cycles = 0;
560
561     // Conditionally yield the CPU if the timeslice has expired
562     v3_yield_cond(info,-1);
563
564     // Update timer devices after being in the VM before doing 
565     // IRQ updates, so that any interrupts they raise get seen 
566     // immediately.
567     v3_advance_time(info, NULL);
568     v3_update_timers(info);
569
570     // disable global interrupts for vm state transition
571     v3_clgi();
572
573     // Synchronize the guest state to the VMCB
574     guest_state->cr0 = info->ctrl_regs.cr0;
575     guest_state->cr2 = info->ctrl_regs.cr2;
576     guest_state->cr3 = info->ctrl_regs.cr3;
577     guest_state->cr4 = info->ctrl_regs.cr4;
578     guest_state->dr6 = info->dbg_regs.dr6;
579     guest_state->dr7 = info->dbg_regs.dr7;
580     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
581     guest_state->rflags = info->ctrl_regs.rflags;
582     guest_state->efer = info->ctrl_regs.efer;
583     
584     /* Synchronize MSRs */
585     guest_state->star = info->msrs.star;
586     guest_state->lstar = info->msrs.lstar;
587     guest_state->sfmask = info->msrs.sfmask;
588     guest_state->KernelGsBase = info->msrs.kern_gs_base;
589
590     guest_state->cpl = info->cpl;
591
592     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
593
594     guest_state->rax = info->vm_regs.rax;
595     guest_state->rip = info->rip;
596     guest_state->rsp = info->vm_regs.rsp;
597
598 #ifdef V3_CONFIG_SYMCALL
599     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
600         update_irq_entry_state(info);
601     }
602 #else 
603     update_irq_entry_state(info);
604 #endif
605
606
607     /* ** */
608
609     /*
610       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
611       (void *)(addr_t)info->segments.cs.base, 
612       (void *)(addr_t)info->rip);
613     */
614
615 #ifdef V3_CONFIG_SYMCALL
616     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
617         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
618             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
619         }
620     }
621 #endif
622
623     v3_svm_config_tsc_virtualization(info);
624
625     //V3_Print("Calling v3_svm_launch\n");
626     {   
627         uint64_t entry_tsc = 0;
628         uint64_t exit_tsc = 0;
629         
630         rdtscll(entry_tsc);
631
632         v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
633
634         rdtscll(exit_tsc);
635
636         guest_cycles = exit_tsc - entry_tsc;
637     }
638
639
640     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
641
642     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
643
644     v3_advance_time(info, &guest_cycles);
645
646     info->num_exits++;
647
648     // Save Guest state from VMCB
649     info->rip = guest_state->rip;
650     info->vm_regs.rsp = guest_state->rsp;
651     info->vm_regs.rax = guest_state->rax;
652
653     info->cpl = guest_state->cpl;
654
655     info->ctrl_regs.cr0 = guest_state->cr0;
656     info->ctrl_regs.cr2 = guest_state->cr2;
657     info->ctrl_regs.cr3 = guest_state->cr3;
658     info->ctrl_regs.cr4 = guest_state->cr4;
659     info->dbg_regs.dr6 = guest_state->dr6;
660     info->dbg_regs.dr7 = guest_state->dr7;
661     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
662     info->ctrl_regs.rflags = guest_state->rflags;
663     info->ctrl_regs.efer = guest_state->efer;
664     
665     /* Synchronize MSRs */
666     info->msrs.star =  guest_state->star;
667     info->msrs.lstar = guest_state->lstar;
668     info->msrs.sfmask = guest_state->sfmask;
669     info->msrs.kern_gs_base = guest_state->KernelGsBase;
670
671     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
672     info->cpu_mode = v3_get_vm_cpu_mode(info);
673     info->mem_mode = v3_get_vm_mem_mode(info);
674     /* ** */
675
676     // save exit info here
677     exit_code = guest_ctrl->exit_code;
678     exit_info1 = guest_ctrl->exit_info1;
679     exit_info2 = guest_ctrl->exit_info2;
680
681 #ifdef V3_CONFIG_SYMCALL
682     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
683         update_irq_exit_state(info);
684     }
685 #else
686     update_irq_exit_state(info);
687 #endif
688
689     // reenable global interrupts after vm exit
690     v3_stgi();
691  
692     // Conditionally yield the CPU if the timeslice has expired
693     v3_yield_cond(info,-1);
694
695     // This update timers is for time-dependent handlers
696     // if we're slaved to host time
697     v3_advance_time(info, NULL);
698     v3_update_timers(info);
699
700     {
701         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
702         
703         if (ret != 0) {
704             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
705             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
706             return -1;
707         }
708     }
709
710     if (info->timeouts.timeout_active) {
711         /* Check to see if any timeouts have expired */
712         v3_handle_timeouts(info, guest_cycles);
713     }
714
715
716     return 0;
717 }
718
719
720 int v3_start_svm_guest(struct guest_info * info) {
721     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
722     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
723
724     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
725
726     if (info->vcpu_id == 0) {
727         info->core_run_state = CORE_RUNNING;
728     } else  { 
729         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
730
731         while (info->core_run_state == CORE_STOPPED) {
732             
733             if (info->vm_info->run_state == VM_STOPPED) {
734                 // The VM was stopped before this core was initialized. 
735                 return 0;
736             }
737
738             v3_yield(info,-1);
739             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
740         }
741
742         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
743
744         // We'll be paranoid about race conditions here
745         v3_wait_at_barrier(info);
746     } 
747
748     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
749                info->vcpu_id, info->pcpu_id, 
750                info->segments.cs.selector, (void *)(info->segments.cs.base), 
751                info->segments.cs.limit, (void *)(info->rip));
752
753
754
755     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
756                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
757     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
758     
759     v3_start_time(info);
760
761     while (1) {
762
763         if (info->vm_info->run_state == VM_STOPPED) {
764             info->core_run_state = CORE_STOPPED;
765             break;
766         }
767         
768         if (v3_svm_enter(info) == -1) {
769             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
770             addr_t host_addr;
771             addr_t linear_addr = 0;
772             
773             info->vm_info->run_state = VM_ERROR;
774             
775             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
776             
777             v3_print_guest_state(info);
778             
779             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
780             
781             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
782             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
783             
784             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
785             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
786             
787             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
788             
789             if (info->mem_mode == PHYSICAL_MEM) {
790                 v3_gpa_to_hva(info, linear_addr, &host_addr);
791             } else if (info->mem_mode == VIRTUAL_MEM) {
792                 v3_gva_to_hva(info, linear_addr, &host_addr);
793             }
794             
795             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
796             
797             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
798             v3_dump_mem((uint8_t *)host_addr, 15);
799             
800             v3_print_stack(info);
801
802             break;
803         }
804
805         v3_wait_at_barrier(info);
806
807
808         if (info->vm_info->run_state == VM_STOPPED) {
809             info->core_run_state = CORE_STOPPED;
810             break;
811         }
812
813         
814
815 /*
816         if ((info->num_exits % 50000) == 0) {
817             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
818             v3_print_guest_state(info);
819         }
820 */
821         
822     }
823
824     // Need to take down the other cores on error... 
825
826     return 0;
827 }
828
829
830
831
832 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
833     // init vmcb_bios
834
835     // Write the RIP, CS, and descriptor
836     // assume the rest is already good to go
837     //
838     // vector VV -> rip at 0
839     //              CS = VV00
840     //  This means we start executing at linear address VV000
841     //
842     // So the selector needs to be VV00
843     // and the base needs to be VV000
844     //
845     core->rip = 0;
846     core->segments.cs.selector = rip << 8;
847     core->segments.cs.limit = 0xffff;
848     core->segments.cs.base = rip << 12;
849
850     return 0;
851 }
852
853
854
855
856
857
858 /* Checks machine SVM capability */
859 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
860 int v3_is_svm_capable() {
861     uint_t vm_cr_low = 0, vm_cr_high = 0;
862     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
863
864     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
865   
866     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
867
868     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
869       V3_Print("SVM Not Available\n");
870       return 0;
871     }  else {
872         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
873         
874         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
875         
876         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
877             V3_Print("SVM is available but is disabled.\n");
878             
879             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
880             
881             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
882             
883             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
884                 V3_Print("SVM BIOS Disabled, not unlockable\n");
885             } else {
886                 V3_Print("SVM is locked with a key\n");
887             }
888             return 0;
889
890         } else {
891             V3_Print("SVM is available and  enabled.\n");
892
893             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
894             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
895             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
896             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
897             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
898
899             return 1;
900         }
901     }
902 }
903
904 static int has_svm_nested_paging() {
905     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
906     
907     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
908     
909     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
910     
911     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
912         V3_Print("SVM Nested Paging not supported\n");
913         return 0;
914     } else {
915         V3_Print("SVM Nested Paging supported\n");
916         return 1;
917     }
918  }
919  
920
921
922 void v3_init_svm_cpu(int cpu_id) {
923     reg_ex_t msr;
924     extern v3_cpu_arch_t v3_cpu_types[];
925
926     // Enable SVM on the CPU
927     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
928     msr.e_reg.low |= EFER_MSR_svm_enable;
929     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
930
931     V3_Print("SVM Enabled\n");
932
933     // Setup the host state save area
934     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
935
936     if (!host_vmcbs[cpu_id]) {
937         PrintError("Failed to allocate VMCB\n");
938         return;
939     }
940
941     /* 64-BIT-ISSUE */
942     //  msr.e_reg.high = 0;
943     //msr.e_reg.low = (uint_t)host_vmcb;
944     msr.r_reg = host_vmcbs[cpu_id];
945
946     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
947     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
948
949
950     if (has_svm_nested_paging() == 1) {
951         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
952     } else {
953         v3_cpu_types[cpu_id] = V3_SVM_CPU;
954     }
955 }
956
957
958
959 void v3_deinit_svm_cpu(int cpu_id) {
960     reg_ex_t msr;
961     extern v3_cpu_arch_t v3_cpu_types[];
962
963     // reset SVM_VM_HSAVE_PA_MSR
964     // Does setting it to NULL disable??
965     msr.r_reg = 0;
966     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
967
968     // Disable SVM?
969     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
970     msr.e_reg.low &= ~EFER_MSR_svm_enable;
971     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
972
973     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
974
975     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
976
977     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
978     return;
979 }
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030 #if 0
1031 /* 
1032  * Test VMSAVE/VMLOAD Latency 
1033  */
1034 #define vmsave ".byte 0x0F,0x01,0xDB ; "
1035 #define vmload ".byte 0x0F,0x01,0xDA ; "
1036 {
1037     uint32_t start_lo, start_hi;
1038     uint32_t end_lo, end_hi;
1039     uint64_t start, end;
1040     
1041     __asm__ __volatile__ (
1042                           "rdtsc ; "
1043                           "movl %%eax, %%esi ; "
1044                           "movl %%edx, %%edi ; "
1045                           "movq  %%rcx, %%rax ; "
1046                           vmsave
1047                           "rdtsc ; "
1048                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1049                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1050                           );
1051     
1052     start = start_hi;
1053     start <<= 32;
1054     start += start_lo;
1055     
1056     end = end_hi;
1057     end <<= 32;
1058     end += end_lo;
1059     
1060     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1061     
1062     __asm__ __volatile__ (
1063                           "rdtsc ; "
1064                           "movl %%eax, %%esi ; "
1065                           "movl %%edx, %%edi ; "
1066                           "movq  %%rcx, %%rax ; "
1067                           vmload
1068                           "rdtsc ; "
1069                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1070                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1071                               );
1072         
1073         start = start_hi;
1074         start <<= 32;
1075         start += start_lo;
1076
1077         end = end_hi;
1078         end <<= 32;
1079         end += end_lo;
1080
1081
1082         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1083     }
1084     /* End Latency Test */
1085
1086 #endif
1087
1088
1089
1090
1091
1092
1093
1094 #if 0
1095 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1096   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1097   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1098   uint_t i = 0;
1099
1100
1101   guest_state->rsp = vm_info.vm_regs.rsp;
1102   guest_state->rip = vm_info.rip;
1103
1104
1105   /* I pretty much just gutted this from TVMM */
1106   /* Note: That means its probably wrong */
1107
1108   // set the segment registers to mirror ours
1109   guest_state->cs.selector = 1<<3;
1110   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1111   guest_state->cs.attrib.fields.S = 1;
1112   guest_state->cs.attrib.fields.P = 1;
1113   guest_state->cs.attrib.fields.db = 1;
1114   guest_state->cs.attrib.fields.G = 1;
1115   guest_state->cs.limit = 0xfffff;
1116   guest_state->cs.base = 0;
1117   
1118   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1119   for ( i = 0; segregs[i] != NULL; i++) {
1120     struct vmcb_selector * seg = segregs[i];
1121     
1122     seg->selector = 2<<3;
1123     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1124     seg->attrib.fields.S = 1;
1125     seg->attrib.fields.P = 1;
1126     seg->attrib.fields.db = 1;
1127     seg->attrib.fields.G = 1;
1128     seg->limit = 0xfffff;
1129     seg->base = 0;
1130   }
1131
1132
1133   {
1134     /* JRL THIS HAS TO GO */
1135     
1136     //    guest_state->tr.selector = GetTR_Selector();
1137     guest_state->tr.attrib.fields.type = 0x9; 
1138     guest_state->tr.attrib.fields.P = 1;
1139     // guest_state->tr.limit = GetTR_Limit();
1140     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1141     /* ** */
1142   }
1143
1144
1145   /* ** */
1146
1147
1148   guest_state->efer |= EFER_MSR_svm_enable;
1149   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1150   ctrl_area->svm_instrs.VMRUN = 1;
1151   guest_state->cr0 = 0x00000001;    // PE 
1152   ctrl_area->guest_ASID = 1;
1153
1154
1155   //  guest_state->cpl = 0;
1156
1157
1158
1159   // Setup exits
1160
1161   ctrl_area->cr_writes.cr4 = 1;
1162   
1163   ctrl_area->exceptions.de = 1;
1164   ctrl_area->exceptions.df = 1;
1165   ctrl_area->exceptions.pf = 1;
1166   ctrl_area->exceptions.ts = 1;
1167   ctrl_area->exceptions.ss = 1;
1168   ctrl_area->exceptions.ac = 1;
1169   ctrl_area->exceptions.mc = 1;
1170   ctrl_area->exceptions.gp = 1;
1171   ctrl_area->exceptions.ud = 1;
1172   ctrl_area->exceptions.np = 1;
1173   ctrl_area->exceptions.of = 1;
1174   ctrl_area->exceptions.nmi = 1;
1175
1176   
1177
1178   ctrl_area->instrs.IOIO_PROT = 1;
1179   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1180
1181   if (!ctrl_area->IOPM_BASE_PA) { 
1182       PrintError("Cannot allocate IO bitmap\n");
1183       return;
1184   }
1185   
1186   {
1187     reg_ex_t tmp_reg;
1188     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1189     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1190   }
1191
1192   ctrl_area->instrs.INTR = 1;
1193
1194   
1195   {
1196     char gdt_buf[6];
1197     char idt_buf[6];
1198
1199     memset(gdt_buf, 0, 6);
1200     memset(idt_buf, 0, 6);
1201
1202
1203     uint_t gdt_base, idt_base;
1204     ushort_t gdt_limit, idt_limit;
1205     
1206     GetGDTR(gdt_buf);
1207     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1208     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1209     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1210
1211     GetIDTR(idt_buf);
1212     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1213     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1214     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1215
1216
1217     // gdt_base -= 0x2000;
1218     //idt_base -= 0x2000;
1219
1220     guest_state->gdtr.base = gdt_base;
1221     guest_state->gdtr.limit = gdt_limit;
1222     guest_state->idtr.base = idt_base;
1223     guest_state->idtr.limit = idt_limit;
1224
1225
1226   }
1227   
1228   
1229   // also determine if CPU supports nested paging
1230   /*
1231   if (vm_info.page_tables) {
1232     //   if (0) {
1233     // Flush the TLB on entries/exits
1234     ctrl_area->TLB_CONTROL = 1;
1235
1236     // Enable Nested Paging
1237     ctrl_area->NP_ENABLE = 1;
1238
1239     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1240
1241         // Set the Nested Page Table pointer
1242     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1243
1244
1245     //   ctrl_area->N_CR3 = Get_CR3();
1246     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1247
1248     guest_state->g_pat = 0x7040600070406ULL;
1249
1250     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1251     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1252     // Enable Paging
1253     //    guest_state->cr0 |= 0x80000000;
1254   }
1255   */
1256
1257 }
1258
1259
1260
1261
1262
1263 #endif
1264
1265