Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Cleaned up time management stuff, being more careful on signs of various time computa...
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
42 #endif
43
44 #include <palacios/vmm_direct_paging.h>
45
46 #include <palacios/vmm_ctrl_regs.h>
47 #include <palacios/svm_io.h>
48
49 #include <palacios/vmm_sprintf.h>
50
51
52 #ifndef V3_CONFIG_DEBUG_SVM
53 #undef PrintDebug
54 #define PrintDebug(fmt, args...)
55 #endif
56
57
58 uint32_t v3_last_exit;
59
60 // This is a global pointer to the host's VMCB
61 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
62
63
64
65 extern void v3_stgi();
66 extern void v3_clgi();
67 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
68 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
69
70
71 static vmcb_t * Allocate_VMCB() {
72     vmcb_t * vmcb_page = NULL;
73     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
74
75     if ((void *)vmcb_pa == NULL) {
76         PrintError("Error allocating VMCB\n");
77         return NULL;
78     }
79
80     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
81
82     memset(vmcb_page, 0, 4096);
83
84     return vmcb_page;
85 }
86
87
88 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
89 {
90     int status;
91
92     // Call arch-independent handler
93     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
94         return status;
95     }
96
97     // SVM-specific code
98     {
99         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
100         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
101         hw_efer->svme = 1;
102     }
103
104     return 0;
105 }
106
107
108 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
109     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
110     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
111     uint_t i;
112
113
114     //
115     ctrl_area->svm_instrs.VMRUN = 1;
116     ctrl_area->svm_instrs.VMMCALL = 1;
117     ctrl_area->svm_instrs.VMLOAD = 1;
118     ctrl_area->svm_instrs.VMSAVE = 1;
119     ctrl_area->svm_instrs.STGI = 1;
120     ctrl_area->svm_instrs.CLGI = 1;
121     ctrl_area->svm_instrs.SKINIT = 1;
122     ctrl_area->svm_instrs.ICEBP = 1;
123     ctrl_area->svm_instrs.WBINVD = 1;
124     ctrl_area->svm_instrs.MONITOR = 1;
125     ctrl_area->svm_instrs.MWAIT_always = 1;
126     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
127     ctrl_area->instrs.INVLPGA = 1;
128     ctrl_area->instrs.CPUID = 1;
129
130     ctrl_area->instrs.HLT = 1;
131
132 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
133     ctrl_area->instrs.RDTSC = 1;
134     ctrl_area->svm_instrs.RDTSCP = 1;
135 #endif
136
137     // guest_state->cr0 = 0x00000001;    // PE 
138   
139     /*
140       ctrl_area->exceptions.de = 1;
141       ctrl_area->exceptions.df = 1;
142       
143       ctrl_area->exceptions.ts = 1;
144       ctrl_area->exceptions.ss = 1;
145       ctrl_area->exceptions.ac = 1;
146       ctrl_area->exceptions.mc = 1;
147       ctrl_area->exceptions.gp = 1;
148       ctrl_area->exceptions.ud = 1;
149       ctrl_area->exceptions.np = 1;
150       ctrl_area->exceptions.of = 1;
151       
152       ctrl_area->exceptions.nmi = 1;
153     */
154     
155
156     ctrl_area->instrs.NMI = 1;
157     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
158     ctrl_area->instrs.INIT = 1;
159     ctrl_area->instrs.PAUSE = 1;
160     ctrl_area->instrs.shutdown_evts = 1;
161
162
163     /* DEBUG FOR RETURN CODE */
164     ctrl_area->exit_code = 1;
165
166
167     /* Setup Guest Machine state */
168
169     core->vm_regs.rsp = 0x00;
170     core->rip = 0xfff0;
171
172     core->vm_regs.rdx = 0x00000f00;
173
174
175     core->cpl = 0;
176
177     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
178     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
179     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
180
181
182
183
184
185     core->segments.cs.selector = 0xf000;
186     core->segments.cs.limit = 0xffff;
187     core->segments.cs.base = 0x0000000f0000LL;
188
189     // (raw attributes = 0xf3)
190     core->segments.cs.type = 0x3;
191     core->segments.cs.system = 0x1;
192     core->segments.cs.dpl = 0x3;
193     core->segments.cs.present = 1;
194
195
196
197     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
198                                       &(core->segments.es), &(core->segments.fs), 
199                                       &(core->segments.gs), NULL};
200
201     for ( i = 0; segregs[i] != NULL; i++) {
202         struct v3_segment * seg = segregs[i];
203         
204         seg->selector = 0x0000;
205         //    seg->base = seg->selector << 4;
206         seg->base = 0x00000000;
207         seg->limit = ~0u;
208
209         // (raw attributes = 0xf3)
210         seg->type = 0x3;
211         seg->system = 0x1;
212         seg->dpl = 0x3;
213         seg->present = 1;
214     }
215
216     core->segments.gdtr.limit = 0x0000ffff;
217     core->segments.gdtr.base = 0x0000000000000000LL;
218     core->segments.idtr.limit = 0x0000ffff;
219     core->segments.idtr.base = 0x0000000000000000LL;
220
221     core->segments.ldtr.selector = 0x0000;
222     core->segments.ldtr.limit = 0x0000ffff;
223     core->segments.ldtr.base = 0x0000000000000000LL;
224     core->segments.tr.selector = 0x0000;
225     core->segments.tr.limit = 0x0000ffff;
226     core->segments.tr.base = 0x0000000000000000LL;
227
228
229     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
230     core->dbg_regs.dr7 = 0x0000000000000400LL;
231
232
233     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
234     ctrl_area->instrs.IOIO_PROT = 1;
235             
236     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
237     ctrl_area->instrs.MSR_PROT = 1;   
238
239
240     PrintDebug("Exiting on interrupts\n");
241     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
242     ctrl_area->instrs.INTR = 1;
243
244
245     v3_hook_msr(core->vm_info, EFER_MSR, 
246                 &v3_handle_efer_read,
247                 &v3_svm_handle_efer_write, 
248                 core);
249
250     if (core->shdw_pg_mode == SHADOW_PAGING) {
251         PrintDebug("Creating initial shadow page table\n");
252         
253         /* JRL: This is a performance killer, and a simplistic solution */
254         /* We need to fix this */
255         ctrl_area->TLB_CONTROL = 1;
256         ctrl_area->guest_ASID = 1;
257         
258         
259         if (v3_init_passthrough_pts(core) == -1) {
260             PrintError("Could not initialize passthrough page tables\n");
261             return ;
262         }
263
264
265         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
266         PrintDebug("Created\n");
267         
268         core->ctrl_regs.cr0 |= 0x80000000;
269         core->ctrl_regs.cr3 = core->direct_map_pt;
270
271         ctrl_area->cr_reads.cr0 = 1;
272         ctrl_area->cr_writes.cr0 = 1;
273         //ctrl_area->cr_reads.cr4 = 1;
274         ctrl_area->cr_writes.cr4 = 1;
275         ctrl_area->cr_reads.cr3 = 1;
276         ctrl_area->cr_writes.cr3 = 1;
277
278
279
280         ctrl_area->instrs.INVLPG = 1;
281
282         ctrl_area->exceptions.pf = 1;
283
284         guest_state->g_pat = 0x7040600070406ULL;
285
286
287
288     } else if (core->shdw_pg_mode == NESTED_PAGING) {
289         // Flush the TLB on entries/exits
290         ctrl_area->TLB_CONTROL = 1;
291         ctrl_area->guest_ASID = 1;
292
293         // Enable Nested Paging
294         ctrl_area->NP_ENABLE = 1;
295
296         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
297
298         // Set the Nested Page Table pointer
299         if (v3_init_passthrough_pts(core) == -1) {
300             PrintError("Could not initialize Nested page tables\n");
301             return ;
302         }
303
304         ctrl_area->N_CR3 = core->direct_map_pt;
305
306         guest_state->g_pat = 0x7040600070406ULL;
307     }
308     
309     /* tell the guest that we don't support SVM */
310     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
311         &v3_handle_vm_cr_read,
312         &v3_handle_vm_cr_write, 
313         core);
314 }
315
316
317 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
318
319     PrintDebug("Allocating VMCB\n");
320     core->vmm_data = (void *)Allocate_VMCB();
321     
322     if (core->vmm_data == NULL) {
323         PrintError("Could not allocate VMCB, Exiting...\n");
324         return -1;
325     }
326
327     if (vm_class == V3_PC_VM) {
328         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
329         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
330     } else {
331         PrintError("Invalid VM class\n");
332         return -1;
333     }
334
335     return 0;
336 }
337
338
339 int v3_deinit_svm_vmcb(struct guest_info * core) {
340     V3_FreePages(V3_PAddr(core->vmm_data), 1);
341     return 0;
342 }
343
344
345 #ifdef V3_CONFIG_CHECKPOINT
346 int v3_svm_save_core(struct guest_info * core, void * ctx){
347
348     v3_chkpt_save_8(ctx, "cpl", &(core->cpl));
349     v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data);
350
351     return 0;
352 }
353
354 int v3_svm_load_core(struct guest_info * core, void * ctx){
355     
356     v3_chkpt_load_8(ctx, "cpl", &(core->cpl));
357
358     if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
359         return -1;
360     }
361
362     return 0;
363 }
364 #endif
365
366 static int update_irq_exit_state(struct guest_info * info) {
367     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
368
369     // Fix for QEMU bug using EVENTINJ as an internal cache
370     guest_ctrl->EVENTINJ.valid = 0;
371
372     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
373         
374 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
375         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
376 #endif
377
378         info->intr_core_state.irq_started = 1;
379         info->intr_core_state.irq_pending = 0;
380
381         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
382     }
383
384     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
385 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
386         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
387 #endif
388
389         // Interrupt was taken fully vectored
390         info->intr_core_state.irq_started = 0;
391
392     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
393 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
394         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
395 #endif
396     }
397
398     return 0;
399 }
400
401
402 static int update_irq_entry_state(struct guest_info * info) {
403     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
404
405
406     if (info->intr_core_state.irq_pending == 0) {
407         guest_ctrl->guest_ctrl.V_IRQ = 0;
408         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
409     }
410     
411     if (v3_excp_pending(info)) {
412         uint_t excp = v3_get_excp_number(info);
413         
414         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
415         
416         if (info->excp_state.excp_error_code_valid) {
417             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
418             guest_ctrl->EVENTINJ.ev = 1;
419 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
420             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
421 #endif
422         }
423         
424         guest_ctrl->EVENTINJ.vector = excp;
425         
426         guest_ctrl->EVENTINJ.valid = 1;
427
428 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
429         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
430                    (int)info->num_exits, 
431                    guest_ctrl->EVENTINJ.vector, 
432                    (void *)(addr_t)info->ctrl_regs.cr2,
433                    (void *)(addr_t)info->rip);
434 #endif
435
436         v3_injecting_excp(info, excp);
437     } else if (info->intr_core_state.irq_started == 1) {
438 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
439         PrintDebug("IRQ pending from previous injection\n");
440 #endif
441         guest_ctrl->guest_ctrl.V_IRQ = 1;
442         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
443         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
444         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
445
446     } else {
447         switch (v3_intr_pending(info)) {
448             case V3_EXTERNAL_IRQ: {
449                 uint32_t irq = v3_get_intr(info);
450
451                 guest_ctrl->guest_ctrl.V_IRQ = 1;
452                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
453                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
454                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
455
456 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
457                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
458                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
459                            (void *)(addr_t)info->rip);
460 #endif
461
462                 info->intr_core_state.irq_pending = 1;
463                 info->intr_core_state.irq_vector = irq;
464                 
465                 break;
466             }
467             case V3_NMI:
468                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
469                 break;
470             case V3_SOFTWARE_INTR:
471                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
472
473 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
474                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
475                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
476 #endif
477                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
478                 guest_ctrl->EVENTINJ.valid = 1;
479             
480                 /* reset swintr state */
481                 info->intr_core_state.swintr_posted = 0;
482                 info->intr_core_state.swintr_vector = 0;
483                 
484                 break;
485             case V3_VIRTUAL_IRQ:
486                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
487                 break;
488
489             case V3_INVALID_INTR:
490             default:
491                 break;
492         }
493         
494     }
495
496     return 0;
497 }
498
499
500 /* 
501  * CAUTION and DANGER!!! 
502  * 
503  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
504  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
505  * on its contents will cause things to break. The contents at the time of the exit WILL 
506  * change before the exit handler is executed.
507  */
508 int v3_svm_enter(struct guest_info * info) {
509     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
510     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
511     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
512     sint64_t tsc_offset;
513
514     // Conditionally yield the CPU if the timeslice has expired
515     v3_yield_cond(info);
516
517     // Perform any additional yielding needed for time adjustment
518     v3_adjust_time(info);
519
520     // disable global interrupts for vm state transition
521     v3_clgi();
522
523     // Update timer devices after being in the VM, with interupts
524     // disabled, but before doing IRQ updates, so that any interrupts they 
525     //raise get seen immediately.
526     v3_update_timers(info);
527
528     // Synchronize the guest state to the VMCB
529     guest_state->cr0 = info->ctrl_regs.cr0;
530     guest_state->cr2 = info->ctrl_regs.cr2;
531     guest_state->cr3 = info->ctrl_regs.cr3;
532     guest_state->cr4 = info->ctrl_regs.cr4;
533     guest_state->dr6 = info->dbg_regs.dr6;
534     guest_state->dr7 = info->dbg_regs.dr7;
535     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
536     guest_state->rflags = info->ctrl_regs.rflags;
537     guest_state->efer = info->ctrl_regs.efer;
538     
539     guest_state->cpl = info->cpl;
540
541     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
542
543     guest_state->rax = info->vm_regs.rax;
544     guest_state->rip = info->rip;
545     guest_state->rsp = info->vm_regs.rsp;
546
547 #ifdef V3_CONFIG_SYMCALL
548     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
549         update_irq_entry_state(info);
550     }
551 #else 
552     update_irq_entry_state(info);
553 #endif
554
555
556     /* ** */
557
558     /*
559       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
560       (void *)(addr_t)info->segments.cs.base, 
561       (void *)(addr_t)info->rip);
562     */
563
564 #ifdef V3_CONFIG_SYMCALL
565     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
566         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
567             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
568         }
569     }
570 #endif
571
572     v3_time_enter_vm(info);
573     tsc_offset = v3_tsc_host_offset(&info->time_state);
574     guest_ctrl->TSC_OFFSET = tsc_offset;
575
576
577     //V3_Print("Calling v3_svm_launch\n");
578
579     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
580
581     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
582
583     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
584
585     // Immediate exit from VM time bookkeeping
586     v3_time_exit_vm(info);
587
588     info->num_exits++;
589
590     // Save Guest state from VMCB
591     info->rip = guest_state->rip;
592     info->vm_regs.rsp = guest_state->rsp;
593     info->vm_regs.rax = guest_state->rax;
594
595     info->cpl = guest_state->cpl;
596
597     info->ctrl_regs.cr0 = guest_state->cr0;
598     info->ctrl_regs.cr2 = guest_state->cr2;
599     info->ctrl_regs.cr3 = guest_state->cr3;
600     info->ctrl_regs.cr4 = guest_state->cr4;
601     info->dbg_regs.dr6 = guest_state->dr6;
602     info->dbg_regs.dr7 = guest_state->dr7;
603     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
604     info->ctrl_regs.rflags = guest_state->rflags;
605     info->ctrl_regs.efer = guest_state->efer;
606     
607     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
608     info->cpu_mode = v3_get_vm_cpu_mode(info);
609     info->mem_mode = v3_get_vm_mem_mode(info);
610     /* ** */
611
612     // save exit info here
613     exit_code = guest_ctrl->exit_code;
614     exit_info1 = guest_ctrl->exit_info1;
615     exit_info2 = guest_ctrl->exit_info2;
616
617 #ifdef V3_CONFIG_SYMCALL
618     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
619         update_irq_exit_state(info);
620     }
621 #else
622     update_irq_exit_state(info);
623 #endif
624
625     // reenable global interrupts after vm exit
626     v3_stgi();
627  
628     // Conditionally yield the CPU if the timeslice has expired
629     v3_yield_cond(info);
630
631     {
632         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
633         
634         if (ret != 0) {
635             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
636             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
637             return -1;
638         }
639     }
640
641
642     return 0;
643 }
644
645
646 int v3_start_svm_guest(struct guest_info * info) {
647     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
648     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
649
650     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
651
652     if (info->vcpu_id == 0) {
653         info->core_run_state = CORE_RUNNING;
654         info->vm_info->run_state = VM_RUNNING;
655     } else  { 
656         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
657
658         while (info->core_run_state == CORE_STOPPED) {
659             v3_yield(info);
660             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
661         }
662
663         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
664
665         // We'll be paranoid about race conditions here
666         v3_wait_at_barrier(info);
667     } 
668
669     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
670                info->vcpu_id, info->pcpu_id, 
671                info->segments.cs.selector, (void *)(info->segments.cs.base), 
672                info->segments.cs.limit, (void *)(info->rip));
673
674
675
676     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
677                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
678     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
679     
680     v3_start_time(info);
681
682     while (1) {
683
684         if (info->vm_info->run_state == VM_STOPPED) {
685             info->core_run_state = CORE_STOPPED;
686             break;
687         }
688         
689         if (v3_svm_enter(info) == -1) {
690             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
691             addr_t host_addr;
692             addr_t linear_addr = 0;
693             
694             info->vm_info->run_state = VM_ERROR;
695             
696             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
697             
698             v3_print_guest_state(info);
699             
700             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
701             
702             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
703             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
704             
705             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
706             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
707             
708             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
709             
710             if (info->mem_mode == PHYSICAL_MEM) {
711                 v3_gpa_to_hva(info, linear_addr, &host_addr);
712             } else if (info->mem_mode == VIRTUAL_MEM) {
713                 v3_gva_to_hva(info, linear_addr, &host_addr);
714             }
715             
716             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
717             
718             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
719             v3_dump_mem((uint8_t *)host_addr, 15);
720             
721             v3_print_stack(info);
722
723             break;
724         }
725
726         v3_wait_at_barrier(info);
727
728
729         if (info->vm_info->run_state == VM_STOPPED) {
730             info->core_run_state = CORE_STOPPED;
731             break;
732         }
733
734         
735
736 /*
737         if ((info->num_exits % 50000) == 0) {
738             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
739             v3_print_guest_state(info);
740         }
741 */
742         
743     }
744
745     // Need to take down the other cores on error... 
746
747     return 0;
748 }
749
750
751
752
753 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
754     // init vmcb_bios
755
756     // Write the RIP, CS, and descriptor
757     // assume the rest is already good to go
758     //
759     // vector VV -> rip at 0
760     //              CS = VV00
761     //  This means we start executing at linear address VV000
762     //
763     // So the selector needs to be VV00
764     // and the base needs to be VV000
765     //
766     core->rip = 0;
767     core->segments.cs.selector = rip << 8;
768     core->segments.cs.limit = 0xffff;
769     core->segments.cs.base = rip << 12;
770
771     return 0;
772 }
773
774
775
776
777
778
779 /* Checks machine SVM capability */
780 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
781 int v3_is_svm_capable() {
782     uint_t vm_cr_low = 0, vm_cr_high = 0;
783     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
784
785     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
786   
787     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
788
789     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
790       V3_Print("SVM Not Available\n");
791       return 0;
792     }  else {
793         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
794         
795         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
796         
797         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
798             V3_Print("SVM is available but is disabled.\n");
799             
800             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
801             
802             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
803             
804             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
805                 V3_Print("SVM BIOS Disabled, not unlockable\n");
806             } else {
807                 V3_Print("SVM is locked with a key\n");
808             }
809             return 0;
810
811         } else {
812             V3_Print("SVM is available and  enabled.\n");
813
814             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
815             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
816             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
817             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
818             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
819
820             return 1;
821         }
822     }
823 }
824
825 static int has_svm_nested_paging() {
826     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
827     
828     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
829     
830     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
831     
832     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
833         V3_Print("SVM Nested Paging not supported\n");
834         return 0;
835     } else {
836         V3_Print("SVM Nested Paging supported\n");
837         return 1;
838     }
839  }
840  
841
842
843 void v3_init_svm_cpu(int cpu_id) {
844     reg_ex_t msr;
845     extern v3_cpu_arch_t v3_cpu_types[];
846
847     // Enable SVM on the CPU
848     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
849     msr.e_reg.low |= EFER_MSR_svm_enable;
850     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
851
852     V3_Print("SVM Enabled\n");
853
854     // Setup the host state save area
855     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
856
857     /* 64-BIT-ISSUE */
858     //  msr.e_reg.high = 0;
859     //msr.e_reg.low = (uint_t)host_vmcb;
860     msr.r_reg = host_vmcbs[cpu_id];
861
862     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
863     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
864
865
866     if (has_svm_nested_paging() == 1) {
867         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
868     } else {
869         v3_cpu_types[cpu_id] = V3_SVM_CPU;
870     }
871 }
872
873
874
875 void v3_deinit_svm_cpu(int cpu_id) {
876     reg_ex_t msr;
877     extern v3_cpu_arch_t v3_cpu_types[];
878
879     // reset SVM_VM_HSAVE_PA_MSR
880     // Does setting it to NULL disable??
881     msr.r_reg = 0;
882     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
883
884     // Disable SVM?
885     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
886     msr.e_reg.low &= ~EFER_MSR_svm_enable;
887     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
888
889     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
890
891     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
892
893     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
894     return;
895 }
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946 #if 0
947 /* 
948  * Test VMSAVE/VMLOAD Latency 
949  */
950 #define vmsave ".byte 0x0F,0x01,0xDB ; "
951 #define vmload ".byte 0x0F,0x01,0xDA ; "
952 {
953     uint32_t start_lo, start_hi;
954     uint32_t end_lo, end_hi;
955     uint64_t start, end;
956     
957     __asm__ __volatile__ (
958                           "rdtsc ; "
959                           "movl %%eax, %%esi ; "
960                           "movl %%edx, %%edi ; "
961                           "movq  %%rcx, %%rax ; "
962                           vmsave
963                           "rdtsc ; "
964                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
965                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
966                           );
967     
968     start = start_hi;
969     start <<= 32;
970     start += start_lo;
971     
972     end = end_hi;
973     end <<= 32;
974     end += end_lo;
975     
976     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
977     
978     __asm__ __volatile__ (
979                           "rdtsc ; "
980                           "movl %%eax, %%esi ; "
981                           "movl %%edx, %%edi ; "
982                           "movq  %%rcx, %%rax ; "
983                           vmload
984                           "rdtsc ; "
985                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
986                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
987                               );
988         
989         start = start_hi;
990         start <<= 32;
991         start += start_lo;
992
993         end = end_hi;
994         end <<= 32;
995         end += end_lo;
996
997
998         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
999     }
1000     /* End Latency Test */
1001
1002 #endif
1003
1004
1005
1006
1007
1008
1009
1010 #if 0
1011 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1012   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1013   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1014   uint_t i = 0;
1015
1016
1017   guest_state->rsp = vm_info.vm_regs.rsp;
1018   guest_state->rip = vm_info.rip;
1019
1020
1021   /* I pretty much just gutted this from TVMM */
1022   /* Note: That means its probably wrong */
1023
1024   // set the segment registers to mirror ours
1025   guest_state->cs.selector = 1<<3;
1026   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1027   guest_state->cs.attrib.fields.S = 1;
1028   guest_state->cs.attrib.fields.P = 1;
1029   guest_state->cs.attrib.fields.db = 1;
1030   guest_state->cs.attrib.fields.G = 1;
1031   guest_state->cs.limit = 0xfffff;
1032   guest_state->cs.base = 0;
1033   
1034   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1035   for ( i = 0; segregs[i] != NULL; i++) {
1036     struct vmcb_selector * seg = segregs[i];
1037     
1038     seg->selector = 2<<3;
1039     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1040     seg->attrib.fields.S = 1;
1041     seg->attrib.fields.P = 1;
1042     seg->attrib.fields.db = 1;
1043     seg->attrib.fields.G = 1;
1044     seg->limit = 0xfffff;
1045     seg->base = 0;
1046   }
1047
1048
1049   {
1050     /* JRL THIS HAS TO GO */
1051     
1052     //    guest_state->tr.selector = GetTR_Selector();
1053     guest_state->tr.attrib.fields.type = 0x9; 
1054     guest_state->tr.attrib.fields.P = 1;
1055     // guest_state->tr.limit = GetTR_Limit();
1056     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1057     /* ** */
1058   }
1059
1060
1061   /* ** */
1062
1063
1064   guest_state->efer |= EFER_MSR_svm_enable;
1065   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1066   ctrl_area->svm_instrs.VMRUN = 1;
1067   guest_state->cr0 = 0x00000001;    // PE 
1068   ctrl_area->guest_ASID = 1;
1069
1070
1071   //  guest_state->cpl = 0;
1072
1073
1074
1075   // Setup exits
1076
1077   ctrl_area->cr_writes.cr4 = 1;
1078   
1079   ctrl_area->exceptions.de = 1;
1080   ctrl_area->exceptions.df = 1;
1081   ctrl_area->exceptions.pf = 1;
1082   ctrl_area->exceptions.ts = 1;
1083   ctrl_area->exceptions.ss = 1;
1084   ctrl_area->exceptions.ac = 1;
1085   ctrl_area->exceptions.mc = 1;
1086   ctrl_area->exceptions.gp = 1;
1087   ctrl_area->exceptions.ud = 1;
1088   ctrl_area->exceptions.np = 1;
1089   ctrl_area->exceptions.of = 1;
1090   ctrl_area->exceptions.nmi = 1;
1091
1092   
1093
1094   ctrl_area->instrs.IOIO_PROT = 1;
1095   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1096   
1097   {
1098     reg_ex_t tmp_reg;
1099     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1100     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1101   }
1102
1103   ctrl_area->instrs.INTR = 1;
1104
1105   
1106   {
1107     char gdt_buf[6];
1108     char idt_buf[6];
1109
1110     memset(gdt_buf, 0, 6);
1111     memset(idt_buf, 0, 6);
1112
1113
1114     uint_t gdt_base, idt_base;
1115     ushort_t gdt_limit, idt_limit;
1116     
1117     GetGDTR(gdt_buf);
1118     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1119     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1120     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1121
1122     GetIDTR(idt_buf);
1123     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1124     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1125     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1126
1127
1128     // gdt_base -= 0x2000;
1129     //idt_base -= 0x2000;
1130
1131     guest_state->gdtr.base = gdt_base;
1132     guest_state->gdtr.limit = gdt_limit;
1133     guest_state->idtr.base = idt_base;
1134     guest_state->idtr.limit = idt_limit;
1135
1136
1137   }
1138   
1139   
1140   // also determine if CPU supports nested paging
1141   /*
1142   if (vm_info.page_tables) {
1143     //   if (0) {
1144     // Flush the TLB on entries/exits
1145     ctrl_area->TLB_CONTROL = 1;
1146
1147     // Enable Nested Paging
1148     ctrl_area->NP_ENABLE = 1;
1149
1150     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1151
1152         // Set the Nested Page Table pointer
1153     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1154
1155
1156     //   ctrl_area->N_CR3 = Get_CR3();
1157     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1158
1159     guest_state->g_pat = 0x7040600070406ULL;
1160
1161     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1162     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1163     // Enable Paging
1164     //    guest_state->cr0 |= 0x80000000;
1165   }
1166   */
1167
1168 }
1169
1170
1171
1172
1173
1174 #endif
1175
1176