Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


6064855f3652ded4c25a18fc58e8d849b254248f
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
42 #endif
43
44 #include <palacios/vmm_direct_paging.h>
45
46 #include <palacios/vmm_ctrl_regs.h>
47 #include <palacios/svm_io.h>
48
49 #include <palacios/vmm_sprintf.h>
50
51
52 #ifndef V3_CONFIG_DEBUG_SVM
53 #undef PrintDebug
54 #define PrintDebug(fmt, args...)
55 #endif
56
57
58 uint32_t v3_last_exit;
59
60 // This is a global pointer to the host's VMCB
61 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
62
63
64
65 extern void v3_stgi();
66 extern void v3_clgi();
67 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
68 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
69
70
71 static vmcb_t * Allocate_VMCB() {
72     vmcb_t * vmcb_page = NULL;
73     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
74
75     if ((void *)vmcb_pa == NULL) {
76         PrintError("Error allocating VMCB\n");
77         return NULL;
78     }
79
80     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
81
82     memset(vmcb_page, 0, 4096);
83
84     return vmcb_page;
85 }
86
87
88 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
89 {
90     int status;
91
92     // Call arch-independent handler
93     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
94         return status;
95     }
96
97     // SVM-specific code
98     {
99         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
100         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
101         hw_efer->svme = 1;
102     }
103
104     return 0;
105 }
106
107
108 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
109     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
110     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
111     uint_t i;
112
113
114     //
115     ctrl_area->svm_instrs.VMRUN = 1;
116     ctrl_area->svm_instrs.VMMCALL = 1;
117     ctrl_area->svm_instrs.VMLOAD = 1;
118     ctrl_area->svm_instrs.VMSAVE = 1;
119     ctrl_area->svm_instrs.STGI = 1;
120     ctrl_area->svm_instrs.CLGI = 1;
121     ctrl_area->svm_instrs.SKINIT = 1;
122     ctrl_area->svm_instrs.ICEBP = 1;
123     ctrl_area->svm_instrs.WBINVD = 1;
124     ctrl_area->svm_instrs.MONITOR = 1;
125     ctrl_area->svm_instrs.MWAIT_always = 1;
126     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
127     ctrl_area->instrs.INVLPGA = 1;
128     ctrl_area->instrs.CPUID = 1;
129
130     ctrl_area->instrs.HLT = 1;
131
132 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
133     ctrl_area->instrs.RDTSC = 1;
134     ctrl_area->svm_instrs.RDTSCP = 1;
135 #endif
136
137     // guest_state->cr0 = 0x00000001;    // PE 
138   
139     /*
140       ctrl_area->exceptions.de = 1;
141       ctrl_area->exceptions.df = 1;
142       
143       ctrl_area->exceptions.ts = 1;
144       ctrl_area->exceptions.ss = 1;
145       ctrl_area->exceptions.ac = 1;
146       ctrl_area->exceptions.mc = 1;
147       ctrl_area->exceptions.gp = 1;
148       ctrl_area->exceptions.ud = 1;
149       ctrl_area->exceptions.np = 1;
150       ctrl_area->exceptions.of = 1;
151       
152       ctrl_area->exceptions.nmi = 1;
153     */
154     
155
156     ctrl_area->instrs.NMI = 1;
157     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
158     ctrl_area->instrs.INIT = 1;
159     ctrl_area->instrs.PAUSE = 1;
160     ctrl_area->instrs.shutdown_evts = 1;
161
162
163     /* DEBUG FOR RETURN CODE */
164     ctrl_area->exit_code = 1;
165
166
167     /* Setup Guest Machine state */
168
169     core->vm_regs.rsp = 0x00;
170     core->rip = 0xfff0;
171
172     core->vm_regs.rdx = 0x00000f00;
173
174
175     core->cpl = 0;
176
177     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
178     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
179     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
180
181
182
183
184
185     core->segments.cs.selector = 0xf000;
186     core->segments.cs.limit = 0xffff;
187     core->segments.cs.base = 0x0000000f0000LL;
188
189     // (raw attributes = 0xf3)
190     core->segments.cs.type = 0x3;
191     core->segments.cs.system = 0x1;
192     core->segments.cs.dpl = 0x3;
193     core->segments.cs.present = 1;
194
195
196
197     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
198                                       &(core->segments.es), &(core->segments.fs), 
199                                       &(core->segments.gs), NULL};
200
201     for ( i = 0; segregs[i] != NULL; i++) {
202         struct v3_segment * seg = segregs[i];
203         
204         seg->selector = 0x0000;
205         //    seg->base = seg->selector << 4;
206         seg->base = 0x00000000;
207         seg->limit = ~0u;
208
209         // (raw attributes = 0xf3)
210         seg->type = 0x3;
211         seg->system = 0x1;
212         seg->dpl = 0x3;
213         seg->present = 1;
214     }
215
216     core->segments.gdtr.limit = 0x0000ffff;
217     core->segments.gdtr.base = 0x0000000000000000LL;
218     core->segments.idtr.limit = 0x0000ffff;
219     core->segments.idtr.base = 0x0000000000000000LL;
220
221     core->segments.ldtr.selector = 0x0000;
222     core->segments.ldtr.limit = 0x0000ffff;
223     core->segments.ldtr.base = 0x0000000000000000LL;
224     core->segments.tr.selector = 0x0000;
225     core->segments.tr.limit = 0x0000ffff;
226     core->segments.tr.base = 0x0000000000000000LL;
227
228
229     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
230     core->dbg_regs.dr7 = 0x0000000000000400LL;
231
232
233     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
234     ctrl_area->instrs.IOIO_PROT = 1;
235             
236     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
237     ctrl_area->instrs.MSR_PROT = 1;   
238
239
240     PrintDebug("Exiting on interrupts\n");
241     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
242     ctrl_area->instrs.INTR = 1;
243
244
245     v3_hook_msr(core->vm_info, EFER_MSR, 
246                 &v3_handle_efer_read,
247                 &v3_svm_handle_efer_write, 
248                 core);
249
250     if (core->shdw_pg_mode == SHADOW_PAGING) {
251         PrintDebug("Creating initial shadow page table\n");
252         
253         /* JRL: This is a performance killer, and a simplistic solution */
254         /* We need to fix this */
255         ctrl_area->TLB_CONTROL = 1;
256         ctrl_area->guest_ASID = 1;
257         
258         
259         if (v3_init_passthrough_pts(core) == -1) {
260             PrintError("Could not initialize passthrough page tables\n");
261             return ;
262         }
263
264
265         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
266         PrintDebug("Created\n");
267         
268         core->ctrl_regs.cr0 |= 0x80000000;
269         core->ctrl_regs.cr3 = core->direct_map_pt;
270
271         ctrl_area->cr_reads.cr0 = 1;
272         ctrl_area->cr_writes.cr0 = 1;
273         //ctrl_area->cr_reads.cr4 = 1;
274         ctrl_area->cr_writes.cr4 = 1;
275         ctrl_area->cr_reads.cr3 = 1;
276         ctrl_area->cr_writes.cr3 = 1;
277
278
279
280         ctrl_area->instrs.INVLPG = 1;
281
282         ctrl_area->exceptions.pf = 1;
283
284         guest_state->g_pat = 0x7040600070406ULL;
285
286
287
288     } else if (core->shdw_pg_mode == NESTED_PAGING) {
289         // Flush the TLB on entries/exits
290         ctrl_area->TLB_CONTROL = 1;
291         ctrl_area->guest_ASID = 1;
292
293         // Enable Nested Paging
294         ctrl_area->NP_ENABLE = 1;
295
296         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
297
298         // Set the Nested Page Table pointer
299         if (v3_init_passthrough_pts(core) == -1) {
300             PrintError("Could not initialize Nested page tables\n");
301             return ;
302         }
303
304         ctrl_area->N_CR3 = core->direct_map_pt;
305
306         guest_state->g_pat = 0x7040600070406ULL;
307     }
308     
309     /* tell the guest that we don't support SVM */
310     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
311         &v3_handle_vm_cr_read,
312         &v3_handle_vm_cr_write, 
313         core);
314 }
315
316
317 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
318
319     PrintDebug("Allocating VMCB\n");
320     core->vmm_data = (void *)Allocate_VMCB();
321     
322     if (core->vmm_data == NULL) {
323         PrintError("Could not allocate VMCB, Exiting...\n");
324         return -1;
325     }
326
327     if (vm_class == V3_PC_VM) {
328         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
329         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
330     } else {
331         PrintError("Invalid VM class\n");
332         return -1;
333     }
334
335     return 0;
336 }
337
338
339 int v3_deinit_svm_vmcb(struct guest_info * core) {
340     V3_FreePages(V3_PAddr(core->vmm_data), 1);
341     return 0;
342 }
343
344
345 #ifdef V3_CONFIG_CHECKPOINT
346 int v3_svm_save_core(struct guest_info * core, void * ctx){
347
348     v3_chkpt_save_8(ctx, "cpl", &(core->cpl));
349     v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data);
350
351     return 0;
352 }
353
354 int v3_svm_load_core(struct guest_info * core, void * ctx){
355     
356     v3_chkpt_load_8(ctx, "cpl", &(core->cpl));
357
358     if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
359         return -1;
360     }
361
362     return 0;
363 }
364 #endif
365
366 static int update_irq_exit_state(struct guest_info * info) {
367     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
368
369     // Fix for QEMU bug using EVENTINJ as an internal cache
370     guest_ctrl->EVENTINJ.valid = 0;
371
372     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
373         
374 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
375         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
376 #endif
377
378         info->intr_core_state.irq_started = 1;
379         info->intr_core_state.irq_pending = 0;
380
381         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
382     }
383
384     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
385 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
386         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
387 #endif
388
389         // Interrupt was taken fully vectored
390         info->intr_core_state.irq_started = 0;
391
392     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
393 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
394         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
395 #endif
396     }
397
398     return 0;
399 }
400
401
402 static int update_irq_entry_state(struct guest_info * info) {
403     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
404
405
406     if (info->intr_core_state.irq_pending == 0) {
407         guest_ctrl->guest_ctrl.V_IRQ = 0;
408         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
409     }
410     
411     if (v3_excp_pending(info)) {
412         uint_t excp = v3_get_excp_number(info);
413         
414         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
415         
416         if (info->excp_state.excp_error_code_valid) {
417             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
418             guest_ctrl->EVENTINJ.ev = 1;
419 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
420             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
421 #endif
422         }
423         
424         guest_ctrl->EVENTINJ.vector = excp;
425         
426         guest_ctrl->EVENTINJ.valid = 1;
427
428 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
429         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
430                    (int)info->num_exits, 
431                    guest_ctrl->EVENTINJ.vector, 
432                    (void *)(addr_t)info->ctrl_regs.cr2,
433                    (void *)(addr_t)info->rip);
434 #endif
435
436         v3_injecting_excp(info, excp);
437     } else if (info->intr_core_state.irq_started == 1) {
438 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
439         PrintDebug("IRQ pending from previous injection\n");
440 #endif
441         guest_ctrl->guest_ctrl.V_IRQ = 1;
442         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
443         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
444         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
445
446     } else {
447         switch (v3_intr_pending(info)) {
448             case V3_EXTERNAL_IRQ: {
449                 uint32_t irq = v3_get_intr(info);
450
451                 guest_ctrl->guest_ctrl.V_IRQ = 1;
452                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
453                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
454                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
455
456 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
457                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
458                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
459                            (void *)(addr_t)info->rip);
460 #endif
461
462                 info->intr_core_state.irq_pending = 1;
463                 info->intr_core_state.irq_vector = irq;
464                 
465                 break;
466             }
467             case V3_NMI:
468                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
469                 break;
470             case V3_SOFTWARE_INTR:
471                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
472
473 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
474                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
475                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
476 #endif
477                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
478                 guest_ctrl->EVENTINJ.valid = 1;
479             
480                 /* reset swintr state */
481                 info->intr_core_state.swintr_posted = 0;
482                 info->intr_core_state.swintr_vector = 0;
483                 
484                 break;
485             case V3_VIRTUAL_IRQ:
486                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
487                 break;
488
489             case V3_INVALID_INTR:
490             default:
491                 break;
492         }
493         
494     }
495
496     return 0;
497 }
498
499
500 /* 
501  * CAUTION and DANGER!!! 
502  * 
503  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
504  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
505  * on its contents will cause things to break. The contents at the time of the exit WILL 
506  * change before the exit handler is executed.
507  */
508 int v3_svm_enter(struct guest_info * info) {
509     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
510     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
511     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
512
513     // Conditionally yield the CPU if the timeslice has expired
514     v3_yield_cond(info);
515
516     // Perform any additional yielding needed for time adjustment
517     v3_adjust_time(info);
518
519     // disable global interrupts for vm state transition
520     v3_clgi();
521
522     // Update timer devices after being in the VM, with interupts
523     // disabled, but before doing IRQ updates, so that any interrupts they 
524     //raise get seen immediately.
525     v3_update_timers(info);
526
527     // Synchronize the guest state to the VMCB
528     guest_state->cr0 = info->ctrl_regs.cr0;
529     guest_state->cr2 = info->ctrl_regs.cr2;
530     guest_state->cr3 = info->ctrl_regs.cr3;
531     guest_state->cr4 = info->ctrl_regs.cr4;
532     guest_state->dr6 = info->dbg_regs.dr6;
533     guest_state->dr7 = info->dbg_regs.dr7;
534     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
535     guest_state->rflags = info->ctrl_regs.rflags;
536     guest_state->efer = info->ctrl_regs.efer;
537     
538     guest_state->cpl = info->cpl;
539
540     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
541
542     guest_state->rax = info->vm_regs.rax;
543     guest_state->rip = info->rip;
544     guest_state->rsp = info->vm_regs.rsp;
545
546 #ifdef V3_CONFIG_SYMCALL
547     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
548         update_irq_entry_state(info);
549     }
550 #else 
551     update_irq_entry_state(info);
552 #endif
553
554
555     /* ** */
556
557     /*
558       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
559       (void *)(addr_t)info->segments.cs.base, 
560       (void *)(addr_t)info->rip);
561     */
562
563 #ifdef V3_CONFIG_SYMCALL
564     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
565         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
566             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
567         }
568     }
569 #endif
570
571     v3_time_enter_vm(info);
572     // guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
573
574
575     //V3_Print("Calling v3_svm_launch\n");
576
577     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
578
579     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
580
581     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
582
583     // Immediate exit from VM time bookkeeping
584     v3_time_exit_vm(info);
585
586     info->num_exits++;
587
588     // Save Guest state from VMCB
589     info->rip = guest_state->rip;
590     info->vm_regs.rsp = guest_state->rsp;
591     info->vm_regs.rax = guest_state->rax;
592
593     info->cpl = guest_state->cpl;
594
595     info->ctrl_regs.cr0 = guest_state->cr0;
596     info->ctrl_regs.cr2 = guest_state->cr2;
597     info->ctrl_regs.cr3 = guest_state->cr3;
598     info->ctrl_regs.cr4 = guest_state->cr4;
599     info->dbg_regs.dr6 = guest_state->dr6;
600     info->dbg_regs.dr7 = guest_state->dr7;
601     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
602     info->ctrl_regs.rflags = guest_state->rflags;
603     info->ctrl_regs.efer = guest_state->efer;
604     
605     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
606     info->cpu_mode = v3_get_vm_cpu_mode(info);
607     info->mem_mode = v3_get_vm_mem_mode(info);
608     /* ** */
609
610     // save exit info here
611     exit_code = guest_ctrl->exit_code;
612     exit_info1 = guest_ctrl->exit_info1;
613     exit_info2 = guest_ctrl->exit_info2;
614
615 #ifdef V3_CONFIG_SYMCALL
616     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
617         update_irq_exit_state(info);
618     }
619 #else
620     update_irq_exit_state(info);
621 #endif
622
623     // reenable global interrupts after vm exit
624     v3_stgi();
625  
626     // Conditionally yield the CPU if the timeslice has expired
627     v3_yield_cond(info);
628
629     {
630         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
631         
632         if (ret != 0) {
633             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
634             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
635             return -1;
636         }
637     }
638
639
640     return 0;
641 }
642
643
644 int v3_start_svm_guest(struct guest_info * info) {
645     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
646     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
647
648     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
649
650     if (info->vcpu_id == 0) {
651         info->core_run_state = CORE_RUNNING;
652         info->vm_info->run_state = VM_RUNNING;
653     } else  { 
654         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
655
656         while (info->core_run_state == CORE_STOPPED) {
657             v3_yield(info);
658             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
659         }
660
661         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
662
663         // We'll be paranoid about race conditions here
664         v3_wait_at_barrier(info);
665     } 
666
667     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
668                info->vcpu_id, info->pcpu_id, 
669                info->segments.cs.selector, (void *)(info->segments.cs.base), 
670                info->segments.cs.limit, (void *)(info->rip));
671
672
673
674     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
675                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
676     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
677     
678     v3_start_time(info);
679
680     while (1) {
681
682         if (info->vm_info->run_state == VM_STOPPED) {
683             info->core_run_state = CORE_STOPPED;
684             break;
685         }
686         
687         if (v3_svm_enter(info) == -1) {
688             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
689             addr_t host_addr;
690             addr_t linear_addr = 0;
691             
692             info->vm_info->run_state = VM_ERROR;
693             
694             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
695             
696             v3_print_guest_state(info);
697             
698             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
699             
700             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
701             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
702             
703             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
704             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
705             
706             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
707             
708             if (info->mem_mode == PHYSICAL_MEM) {
709                 v3_gpa_to_hva(info, linear_addr, &host_addr);
710             } else if (info->mem_mode == VIRTUAL_MEM) {
711                 v3_gva_to_hva(info, linear_addr, &host_addr);
712             }
713             
714             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
715             
716             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
717             v3_dump_mem((uint8_t *)host_addr, 15);
718             
719             v3_print_stack(info);
720
721             break;
722         }
723
724         v3_wait_at_barrier(info);
725
726
727         if (info->vm_info->run_state == VM_STOPPED) {
728             info->core_run_state = CORE_STOPPED;
729             break;
730         }
731
732         
733
734 /*
735         if ((info->num_exits % 50000) == 0) {
736             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
737             v3_print_guest_state(info);
738         }
739 */
740         
741     }
742
743     // Need to take down the other cores on error... 
744
745     return 0;
746 }
747
748
749
750
751 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
752     // init vmcb_bios
753
754     // Write the RIP, CS, and descriptor
755     // assume the rest is already good to go
756     //
757     // vector VV -> rip at 0
758     //              CS = VV00
759     //  This means we start executing at linear address VV000
760     //
761     // So the selector needs to be VV00
762     // and the base needs to be VV000
763     //
764     core->rip = 0;
765     core->segments.cs.selector = rip << 8;
766     core->segments.cs.limit = 0xffff;
767     core->segments.cs.base = rip << 12;
768
769     return 0;
770 }
771
772
773
774
775
776
777 /* Checks machine SVM capability */
778 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
779 int v3_is_svm_capable() {
780     uint_t vm_cr_low = 0, vm_cr_high = 0;
781     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
782
783     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
784   
785     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
786
787     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
788       V3_Print("SVM Not Available\n");
789       return 0;
790     }  else {
791         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
792         
793         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
794         
795         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
796             V3_Print("SVM is available but is disabled.\n");
797             
798             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
799             
800             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
801             
802             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
803                 V3_Print("SVM BIOS Disabled, not unlockable\n");
804             } else {
805                 V3_Print("SVM is locked with a key\n");
806             }
807             return 0;
808
809         } else {
810             V3_Print("SVM is available and  enabled.\n");
811
812             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
813             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
814             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
815             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
816             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
817
818             return 1;
819         }
820     }
821 }
822
823 static int has_svm_nested_paging() {
824     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
825     
826     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
827     
828     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
829     
830     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
831         V3_Print("SVM Nested Paging not supported\n");
832         return 0;
833     } else {
834         V3_Print("SVM Nested Paging supported\n");
835         return 1;
836     }
837  }
838  
839
840
841 void v3_init_svm_cpu(int cpu_id) {
842     reg_ex_t msr;
843     extern v3_cpu_arch_t v3_cpu_types[];
844
845     // Enable SVM on the CPU
846     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
847     msr.e_reg.low |= EFER_MSR_svm_enable;
848     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
849
850     V3_Print("SVM Enabled\n");
851
852     // Setup the host state save area
853     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
854
855     /* 64-BIT-ISSUE */
856     //  msr.e_reg.high = 0;
857     //msr.e_reg.low = (uint_t)host_vmcb;
858     msr.r_reg = host_vmcbs[cpu_id];
859
860     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
861     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
862
863
864     if (has_svm_nested_paging() == 1) {
865         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
866     } else {
867         v3_cpu_types[cpu_id] = V3_SVM_CPU;
868     }
869 }
870
871
872
873 void v3_deinit_svm_cpu(int cpu_id) {
874     reg_ex_t msr;
875     extern v3_cpu_arch_t v3_cpu_types[];
876
877     // reset SVM_VM_HSAVE_PA_MSR
878     // Does setting it to NULL disable??
879     msr.r_reg = 0;
880     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
881
882     // Disable SVM?
883     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
884     msr.e_reg.low &= ~EFER_MSR_svm_enable;
885     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
886
887     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
888
889     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
890
891     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
892     return;
893 }
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944 #if 0
945 /* 
946  * Test VMSAVE/VMLOAD Latency 
947  */
948 #define vmsave ".byte 0x0F,0x01,0xDB ; "
949 #define vmload ".byte 0x0F,0x01,0xDA ; "
950 {
951     uint32_t start_lo, start_hi;
952     uint32_t end_lo, end_hi;
953     uint64_t start, end;
954     
955     __asm__ __volatile__ (
956                           "rdtsc ; "
957                           "movl %%eax, %%esi ; "
958                           "movl %%edx, %%edi ; "
959                           "movq  %%rcx, %%rax ; "
960                           vmsave
961                           "rdtsc ; "
962                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
963                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
964                           );
965     
966     start = start_hi;
967     start <<= 32;
968     start += start_lo;
969     
970     end = end_hi;
971     end <<= 32;
972     end += end_lo;
973     
974     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
975     
976     __asm__ __volatile__ (
977                           "rdtsc ; "
978                           "movl %%eax, %%esi ; "
979                           "movl %%edx, %%edi ; "
980                           "movq  %%rcx, %%rax ; "
981                           vmload
982                           "rdtsc ; "
983                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
984                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
985                               );
986         
987         start = start_hi;
988         start <<= 32;
989         start += start_lo;
990
991         end = end_hi;
992         end <<= 32;
993         end += end_lo;
994
995
996         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
997     }
998     /* End Latency Test */
999
1000 #endif
1001
1002
1003
1004
1005
1006
1007
1008 #if 0
1009 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1010   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1011   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1012   uint_t i = 0;
1013
1014
1015   guest_state->rsp = vm_info.vm_regs.rsp;
1016   guest_state->rip = vm_info.rip;
1017
1018
1019   /* I pretty much just gutted this from TVMM */
1020   /* Note: That means its probably wrong */
1021
1022   // set the segment registers to mirror ours
1023   guest_state->cs.selector = 1<<3;
1024   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1025   guest_state->cs.attrib.fields.S = 1;
1026   guest_state->cs.attrib.fields.P = 1;
1027   guest_state->cs.attrib.fields.db = 1;
1028   guest_state->cs.attrib.fields.G = 1;
1029   guest_state->cs.limit = 0xfffff;
1030   guest_state->cs.base = 0;
1031   
1032   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1033   for ( i = 0; segregs[i] != NULL; i++) {
1034     struct vmcb_selector * seg = segregs[i];
1035     
1036     seg->selector = 2<<3;
1037     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1038     seg->attrib.fields.S = 1;
1039     seg->attrib.fields.P = 1;
1040     seg->attrib.fields.db = 1;
1041     seg->attrib.fields.G = 1;
1042     seg->limit = 0xfffff;
1043     seg->base = 0;
1044   }
1045
1046
1047   {
1048     /* JRL THIS HAS TO GO */
1049     
1050     //    guest_state->tr.selector = GetTR_Selector();
1051     guest_state->tr.attrib.fields.type = 0x9; 
1052     guest_state->tr.attrib.fields.P = 1;
1053     // guest_state->tr.limit = GetTR_Limit();
1054     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1055     /* ** */
1056   }
1057
1058
1059   /* ** */
1060
1061
1062   guest_state->efer |= EFER_MSR_svm_enable;
1063   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1064   ctrl_area->svm_instrs.VMRUN = 1;
1065   guest_state->cr0 = 0x00000001;    // PE 
1066   ctrl_area->guest_ASID = 1;
1067
1068
1069   //  guest_state->cpl = 0;
1070
1071
1072
1073   // Setup exits
1074
1075   ctrl_area->cr_writes.cr4 = 1;
1076   
1077   ctrl_area->exceptions.de = 1;
1078   ctrl_area->exceptions.df = 1;
1079   ctrl_area->exceptions.pf = 1;
1080   ctrl_area->exceptions.ts = 1;
1081   ctrl_area->exceptions.ss = 1;
1082   ctrl_area->exceptions.ac = 1;
1083   ctrl_area->exceptions.mc = 1;
1084   ctrl_area->exceptions.gp = 1;
1085   ctrl_area->exceptions.ud = 1;
1086   ctrl_area->exceptions.np = 1;
1087   ctrl_area->exceptions.of = 1;
1088   ctrl_area->exceptions.nmi = 1;
1089
1090   
1091
1092   ctrl_area->instrs.IOIO_PROT = 1;
1093   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1094   
1095   {
1096     reg_ex_t tmp_reg;
1097     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1098     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1099   }
1100
1101   ctrl_area->instrs.INTR = 1;
1102
1103   
1104   {
1105     char gdt_buf[6];
1106     char idt_buf[6];
1107
1108     memset(gdt_buf, 0, 6);
1109     memset(idt_buf, 0, 6);
1110
1111
1112     uint_t gdt_base, idt_base;
1113     ushort_t gdt_limit, idt_limit;
1114     
1115     GetGDTR(gdt_buf);
1116     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1117     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1118     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1119
1120     GetIDTR(idt_buf);
1121     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1122     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1123     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1124
1125
1126     // gdt_base -= 0x2000;
1127     //idt_base -= 0x2000;
1128
1129     guest_state->gdtr.base = gdt_base;
1130     guest_state->gdtr.limit = gdt_limit;
1131     guest_state->idtr.base = idt_base;
1132     guest_state->idtr.limit = idt_limit;
1133
1134
1135   }
1136   
1137   
1138   // also determine if CPU supports nested paging
1139   /*
1140   if (vm_info.page_tables) {
1141     //   if (0) {
1142     // Flush the TLB on entries/exits
1143     ctrl_area->TLB_CONTROL = 1;
1144
1145     // Enable Nested Paging
1146     ctrl_area->NP_ENABLE = 1;
1147
1148     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1149
1150         // Set the Nested Page Table pointer
1151     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1152
1153
1154     //   ctrl_area->N_CR3 = Get_CR3();
1155     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1156
1157     guest_state->g_pat = 0x7040600070406ULL;
1158
1159     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1160     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1161     // Enable Paging
1162     //    guest_state->cr0 |= 0x80000000;
1163   }
1164   */
1165
1166 }
1167
1168
1169
1170
1171
1172 #endif
1173
1174