Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


decoder fix for cut/paste bug in rm decoding
[palacios-OLD.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
42 #endif
43
44 #include <palacios/vmm_direct_paging.h>
45
46 #include <palacios/vmm_ctrl_regs.h>
47 #include <palacios/svm_io.h>
48
49 #include <palacios/vmm_sprintf.h>
50
51
52 #ifndef V3_CONFIG_DEBUG_SVM
53 #undef PrintDebug
54 #define PrintDebug(fmt, args...)
55 #endif
56
57
58 uint32_t v3_last_exit;
59
60 // This is a global pointer to the host's VMCB
61 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
62
63
64
65 extern void v3_stgi();
66 extern void v3_clgi();
67 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
68 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
69
70
71 static vmcb_t * Allocate_VMCB() {
72     vmcb_t * vmcb_page = NULL;
73     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
74
75     if ((void *)vmcb_pa == NULL) {
76         PrintError("Error allocating VMCB\n");
77         return NULL;
78     }
79
80     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
81
82     memset(vmcb_page, 0, 4096);
83
84     return vmcb_page;
85 }
86
87
88 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
89 {
90     int status;
91
92     // Call arch-independent handler
93     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
94         return status;
95     }
96
97     // SVM-specific code
98     {
99         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
100         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
101         hw_efer->svme = 1;
102     }
103
104     return 0;
105 }
106
107
108 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
109     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
110     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
111     uint_t i;
112
113
114     //
115     ctrl_area->svm_instrs.VMRUN = 1;
116     ctrl_area->svm_instrs.VMMCALL = 1;
117     ctrl_area->svm_instrs.VMLOAD = 1;
118     ctrl_area->svm_instrs.VMSAVE = 1;
119     ctrl_area->svm_instrs.STGI = 1;
120     ctrl_area->svm_instrs.CLGI = 1;
121     ctrl_area->svm_instrs.SKINIT = 1;
122     ctrl_area->svm_instrs.ICEBP = 1;
123     ctrl_area->svm_instrs.WBINVD = 1;
124     ctrl_area->svm_instrs.MONITOR = 1;
125     ctrl_area->svm_instrs.MWAIT_always = 1;
126     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
127     ctrl_area->instrs.INVLPGA = 1;
128     ctrl_area->instrs.CPUID = 1;
129
130     ctrl_area->instrs.HLT = 1;
131
132 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
133     ctrl_area->instrs.RDTSC = 1;
134     ctrl_area->svm_instrs.RDTSCP = 1;
135 #endif
136
137     // guest_state->cr0 = 0x00000001;    // PE 
138   
139     /*
140       ctrl_area->exceptions.de = 1;
141       ctrl_area->exceptions.df = 1;
142       
143       ctrl_area->exceptions.ts = 1;
144       ctrl_area->exceptions.ss = 1;
145       ctrl_area->exceptions.ac = 1;
146       ctrl_area->exceptions.mc = 1;
147       ctrl_area->exceptions.gp = 1;
148       ctrl_area->exceptions.ud = 1;
149       ctrl_area->exceptions.np = 1;
150       ctrl_area->exceptions.of = 1;
151       
152       ctrl_area->exceptions.nmi = 1;
153     */
154     
155
156     ctrl_area->instrs.NMI = 1;
157     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
158     ctrl_area->instrs.INIT = 1;
159     ctrl_area->instrs.PAUSE = 1;
160     ctrl_area->instrs.shutdown_evts = 1;
161
162
163     /* DEBUG FOR RETURN CODE */
164     ctrl_area->exit_code = 1;
165
166
167     /* Setup Guest Machine state */
168
169     core->vm_regs.rsp = 0x00;
170     core->rip = 0xfff0;
171
172     core->vm_regs.rdx = 0x00000f00;
173
174
175     core->cpl = 0;
176
177     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
178     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
179     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
180
181
182
183
184
185     core->segments.cs.selector = 0xf000;
186     core->segments.cs.limit = 0xffff;
187     core->segments.cs.base = 0x0000000f0000LL;
188
189     // (raw attributes = 0xf3)
190     core->segments.cs.type = 0x3;
191     core->segments.cs.system = 0x1;
192     core->segments.cs.dpl = 0x3;
193     core->segments.cs.present = 1;
194
195
196
197     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
198                                       &(core->segments.es), &(core->segments.fs), 
199                                       &(core->segments.gs), NULL};
200
201     for ( i = 0; segregs[i] != NULL; i++) {
202         struct v3_segment * seg = segregs[i];
203         
204         seg->selector = 0x0000;
205         //    seg->base = seg->selector << 4;
206         seg->base = 0x00000000;
207         seg->limit = ~0u;
208
209         // (raw attributes = 0xf3)
210         seg->type = 0x3;
211         seg->system = 0x1;
212         seg->dpl = 0x3;
213         seg->present = 1;
214     }
215
216     core->segments.gdtr.limit = 0x0000ffff;
217     core->segments.gdtr.base = 0x0000000000000000LL;
218     core->segments.idtr.limit = 0x0000ffff;
219     core->segments.idtr.base = 0x0000000000000000LL;
220
221     core->segments.ldtr.selector = 0x0000;
222     core->segments.ldtr.limit = 0x0000ffff;
223     core->segments.ldtr.base = 0x0000000000000000LL;
224     core->segments.tr.selector = 0x0000;
225     core->segments.tr.limit = 0x0000ffff;
226     core->segments.tr.base = 0x0000000000000000LL;
227
228
229     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
230     core->dbg_regs.dr7 = 0x0000000000000400LL;
231
232
233     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
234     ctrl_area->instrs.IOIO_PROT = 1;
235             
236     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
237     ctrl_area->instrs.MSR_PROT = 1;   
238
239
240     PrintDebug("Exiting on interrupts\n");
241     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
242     ctrl_area->instrs.INTR = 1;
243
244
245     v3_hook_msr(core->vm_info, EFER_MSR, 
246                 &v3_handle_efer_read,
247                 &v3_svm_handle_efer_write, 
248                 core);
249
250     if (core->shdw_pg_mode == SHADOW_PAGING) {
251         PrintDebug("Creating initial shadow page table\n");
252         
253         /* JRL: This is a performance killer, and a simplistic solution */
254         /* We need to fix this */
255         ctrl_area->TLB_CONTROL = 1;
256         ctrl_area->guest_ASID = 1;
257         
258         
259         if (v3_init_passthrough_pts(core) == -1) {
260             PrintError("Could not initialize passthrough page tables\n");
261             return ;
262         }
263
264
265         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
266         PrintDebug("Created\n");
267         
268         core->ctrl_regs.cr0 |= 0x80000000;
269         core->ctrl_regs.cr3 = core->direct_map_pt;
270
271         ctrl_area->cr_reads.cr0 = 1;
272         ctrl_area->cr_writes.cr0 = 1;
273         //ctrl_area->cr_reads.cr4 = 1;
274         ctrl_area->cr_writes.cr4 = 1;
275         ctrl_area->cr_reads.cr3 = 1;
276         ctrl_area->cr_writes.cr3 = 1;
277
278
279
280         ctrl_area->instrs.INVLPG = 1;
281
282         ctrl_area->exceptions.pf = 1;
283
284         guest_state->g_pat = 0x7040600070406ULL;
285
286
287
288     } else if (core->shdw_pg_mode == NESTED_PAGING) {
289         // Flush the TLB on entries/exits
290         ctrl_area->TLB_CONTROL = 1;
291         ctrl_area->guest_ASID = 1;
292
293         // Enable Nested Paging
294         ctrl_area->NP_ENABLE = 1;
295
296         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
297
298         // Set the Nested Page Table pointer
299         if (v3_init_passthrough_pts(core) == -1) {
300             PrintError("Could not initialize Nested page tables\n");
301             return ;
302         }
303
304         ctrl_area->N_CR3 = core->direct_map_pt;
305
306         guest_state->g_pat = 0x7040600070406ULL;
307     }
308     
309     /* tell the guest that we don't support SVM */
310     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
311         &v3_handle_vm_cr_read,
312         &v3_handle_vm_cr_write, 
313         core);
314 }
315
316
317 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
318
319     PrintDebug("Allocating VMCB\n");
320     core->vmm_data = (void *)Allocate_VMCB();
321     
322     if (core->vmm_data == NULL) {
323         PrintError("Could not allocate VMCB, Exiting...\n");
324         return -1;
325     }
326
327     if (vm_class == V3_PC_VM) {
328         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
329         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
330     } else {
331         PrintError("Invalid VM class\n");
332         return -1;
333     }
334
335     return 0;
336 }
337
338
339 int v3_deinit_svm_vmcb(struct guest_info * core) {
340     V3_FreePages(V3_PAddr(core->vmm_data), 1);
341     return 0;
342 }
343
344
345 #ifdef V3_CONFIG_CHECKPOINT
346 int v3_svm_save_core(struct guest_info * core, void * ctx){
347
348     v3_chkpt_save_8(ctx, "cpl", &(core->cpl));
349     v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data);
350
351     return 0;
352 }
353
354 int v3_svm_load_core(struct guest_info * core, void * ctx){
355     
356     v3_chkpt_load_8(ctx, "cpl", &(core->cpl));
357
358     if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
359         return -1;
360     }
361
362     return 0;
363 }
364 #endif
365
366 static int update_irq_exit_state(struct guest_info * info) {
367     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
368
369     // Fix for QEMU bug using EVENTINJ as an internal cache
370     guest_ctrl->EVENTINJ.valid = 0;
371
372     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
373         
374 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
375         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
376 #endif
377
378         info->intr_core_state.irq_started = 1;
379         info->intr_core_state.irq_pending = 0;
380
381         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
382     }
383
384     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
385 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
386         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
387 #endif
388
389         // Interrupt was taken fully vectored
390         info->intr_core_state.irq_started = 0;
391
392     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
393 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
394         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
395 #endif
396     }
397
398     return 0;
399 }
400
401
402 static int update_irq_entry_state(struct guest_info * info) {
403     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
404
405
406     if (info->intr_core_state.irq_pending == 0) {
407         guest_ctrl->guest_ctrl.V_IRQ = 0;
408         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
409     }
410     
411     if (v3_excp_pending(info)) {
412         uint_t excp = v3_get_excp_number(info);
413         
414         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
415         
416         if (info->excp_state.excp_error_code_valid) {
417             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
418             guest_ctrl->EVENTINJ.ev = 1;
419 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
420             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
421 #endif
422         }
423         
424         guest_ctrl->EVENTINJ.vector = excp;
425         
426         guest_ctrl->EVENTINJ.valid = 1;
427
428 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
429         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
430                    (int)info->num_exits, 
431                    guest_ctrl->EVENTINJ.vector, 
432                    (void *)(addr_t)info->ctrl_regs.cr2,
433                    (void *)(addr_t)info->rip);
434 #endif
435
436         v3_injecting_excp(info, excp);
437     } else if (info->intr_core_state.irq_started == 1) {
438 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
439         PrintDebug("IRQ pending from previous injection\n");
440 #endif
441         guest_ctrl->guest_ctrl.V_IRQ = 1;
442         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
443         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
444         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
445
446     } else {
447         switch (v3_intr_pending(info)) {
448             case V3_EXTERNAL_IRQ: {
449                 uint32_t irq = v3_get_intr(info);
450
451                 guest_ctrl->guest_ctrl.V_IRQ = 1;
452                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
453                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
454                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
455
456 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
457                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
458                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
459                            (void *)(addr_t)info->rip);
460 #endif
461
462                 info->intr_core_state.irq_pending = 1;
463                 info->intr_core_state.irq_vector = irq;
464                 
465                 break;
466             }
467             case V3_NMI:
468                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
469                 break;
470             case V3_SOFTWARE_INTR:
471                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
472
473 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
474                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
475                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
476 #endif
477                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
478                 guest_ctrl->EVENTINJ.valid = 1;
479             
480                 /* reset swintr state */
481                 info->intr_core_state.swintr_posted = 0;
482                 info->intr_core_state.swintr_vector = 0;
483                 
484                 break;
485             case V3_VIRTUAL_IRQ:
486                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
487                 break;
488
489             case V3_INVALID_INTR:
490             default:
491                 break;
492         }
493         
494     }
495
496     return 0;
497 }
498
499
500 /* 
501  * CAUTION and DANGER!!! 
502  * 
503  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
504  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
505  * on its contents will cause things to break. The contents at the time of the exit WILL 
506  * change before the exit handler is executed.
507  */
508 int v3_svm_enter(struct guest_info * info) {
509     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
510     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
511     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
512
513     // Conditionally yield the CPU if the timeslice has expired
514     v3_yield_cond(info);
515
516     // Perform any additional yielding needed for time adjustment
517     v3_adjust_time(info);
518
519     // disable global interrupts for vm state transition
520     v3_clgi();
521
522     // Update timer devices after being in the VM, with interupts
523     // disabled, but before doing IRQ updates, so that any interrupts they 
524     //raise get seen immediately.
525     v3_update_timers(info);
526
527     // Synchronize the guest state to the VMCB
528     guest_state->cr0 = info->ctrl_regs.cr0;
529     guest_state->cr2 = info->ctrl_regs.cr2;
530     guest_state->cr3 = info->ctrl_regs.cr3;
531     guest_state->cr4 = info->ctrl_regs.cr4;
532     guest_state->dr6 = info->dbg_regs.dr6;
533     guest_state->dr7 = info->dbg_regs.dr7;
534     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
535     guest_state->rflags = info->ctrl_regs.rflags;
536     guest_state->efer = info->ctrl_regs.efer;
537     
538     guest_state->cpl = info->cpl;
539
540     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
541
542     guest_state->rax = info->vm_regs.rax;
543     guest_state->rip = info->rip;
544     guest_state->rsp = info->vm_regs.rsp;
545
546 #ifdef V3_CONFIG_SYMCALL
547     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
548         update_irq_entry_state(info);
549     }
550 #else 
551     update_irq_entry_state(info);
552 #endif
553
554
555     /* ** */
556
557     /*
558       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
559       (void *)(addr_t)info->segments.cs.base, 
560       (void *)(addr_t)info->rip);
561     */
562
563 #ifdef V3_CONFIG_SYMCALL
564     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
565         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
566             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
567         }
568     }
569 #endif
570
571     v3_time_enter_vm(info);
572     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
573
574
575     //V3_Print("Calling v3_svm_launch\n");
576
577     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
578
579     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
580
581     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
582
583     // Immediate exit from VM time bookkeeping
584     v3_time_exit_vm(info);
585
586     info->num_exits++;
587
588     // Save Guest state from VMCB
589     info->rip = guest_state->rip;
590     info->vm_regs.rsp = guest_state->rsp;
591     info->vm_regs.rax = guest_state->rax;
592
593     info->cpl = guest_state->cpl;
594
595     info->ctrl_regs.cr0 = guest_state->cr0;
596     info->ctrl_regs.cr2 = guest_state->cr2;
597     info->ctrl_regs.cr3 = guest_state->cr3;
598     info->ctrl_regs.cr4 = guest_state->cr4;
599     info->dbg_regs.dr6 = guest_state->dr6;
600     info->dbg_regs.dr7 = guest_state->dr7;
601     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
602     info->ctrl_regs.rflags = guest_state->rflags;
603     info->ctrl_regs.efer = guest_state->efer;
604     
605     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
606     info->cpu_mode = v3_get_vm_cpu_mode(info);
607     info->mem_mode = v3_get_vm_mem_mode(info);
608     /* ** */
609
610     // save exit info here
611     exit_code = guest_ctrl->exit_code;
612     exit_info1 = guest_ctrl->exit_info1;
613     exit_info2 = guest_ctrl->exit_info2;
614
615 #ifdef V3_CONFIG_SYMCALL
616     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
617         update_irq_exit_state(info);
618     }
619 #else
620     update_irq_exit_state(info);
621 #endif
622
623     // reenable global interrupts after vm exit
624     v3_stgi();
625  
626     // Conditionally yield the CPU if the timeslice has expired
627     v3_yield_cond(info);
628
629     {
630         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
631         
632         if (ret != 0) {
633             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
634             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
635             return -1;
636         }
637     }
638
639
640     return 0;
641 }
642
643
644 int v3_start_svm_guest(struct guest_info * info) {
645     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
646     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
647
648     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
649
650     if (info->vcpu_id == 0) {
651         info->core_run_state = CORE_RUNNING;
652         info->vm_info->run_state = VM_RUNNING;
653     } else  { 
654         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
655
656         while (info->core_run_state == CORE_STOPPED) {
657             
658             if (info->vm_info->run_state == VM_STOPPED) {
659                 // The VM was stopped before this core was initialized. 
660                 return 0;
661             }
662
663             v3_yield(info);
664             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
665         }
666
667         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
668
669         // We'll be paranoid about race conditions here
670         v3_wait_at_barrier(info);
671     } 
672
673     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
674                info->vcpu_id, info->pcpu_id, 
675                info->segments.cs.selector, (void *)(info->segments.cs.base), 
676                info->segments.cs.limit, (void *)(info->rip));
677
678
679
680     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
681                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
682     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
683     
684     v3_start_time(info);
685
686     while (1) {
687
688         if (info->vm_info->run_state == VM_STOPPED) {
689             info->core_run_state = CORE_STOPPED;
690             break;
691         }
692         
693         if (v3_svm_enter(info) == -1) {
694             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
695             addr_t host_addr;
696             addr_t linear_addr = 0;
697             
698             info->vm_info->run_state = VM_ERROR;
699             
700             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
701             
702             v3_print_guest_state(info);
703             
704             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
705             
706             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
707             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
708             
709             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
710             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
711             
712             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
713             
714             if (info->mem_mode == PHYSICAL_MEM) {
715                 v3_gpa_to_hva(info, linear_addr, &host_addr);
716             } else if (info->mem_mode == VIRTUAL_MEM) {
717                 v3_gva_to_hva(info, linear_addr, &host_addr);
718             }
719             
720             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
721             
722             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
723             v3_dump_mem((uint8_t *)host_addr, 15);
724             
725             v3_print_stack(info);
726
727             break;
728         }
729
730         v3_wait_at_barrier(info);
731
732
733         if (info->vm_info->run_state == VM_STOPPED) {
734             info->core_run_state = CORE_STOPPED;
735             break;
736         }
737
738         
739
740 /*
741         if ((info->num_exits % 50000) == 0) {
742             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
743             v3_print_guest_state(info);
744         }
745 */
746         
747     }
748
749     // Need to take down the other cores on error... 
750
751     return 0;
752 }
753
754
755
756
757 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
758     // init vmcb_bios
759
760     // Write the RIP, CS, and descriptor
761     // assume the rest is already good to go
762     //
763     // vector VV -> rip at 0
764     //              CS = VV00
765     //  This means we start executing at linear address VV000
766     //
767     // So the selector needs to be VV00
768     // and the base needs to be VV000
769     //
770     core->rip = 0;
771     core->segments.cs.selector = rip << 8;
772     core->segments.cs.limit = 0xffff;
773     core->segments.cs.base = rip << 12;
774
775     return 0;
776 }
777
778
779
780
781
782
783 /* Checks machine SVM capability */
784 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
785 int v3_is_svm_capable() {
786     uint_t vm_cr_low = 0, vm_cr_high = 0;
787     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
788
789     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
790   
791     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
792
793     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
794       V3_Print("SVM Not Available\n");
795       return 0;
796     }  else {
797         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
798         
799         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
800         
801         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
802             V3_Print("SVM is available but is disabled.\n");
803             
804             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
805             
806             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
807             
808             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
809                 V3_Print("SVM BIOS Disabled, not unlockable\n");
810             } else {
811                 V3_Print("SVM is locked with a key\n");
812             }
813             return 0;
814
815         } else {
816             V3_Print("SVM is available and  enabled.\n");
817
818             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
819             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
820             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
821             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
822             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
823
824             return 1;
825         }
826     }
827 }
828
829 static int has_svm_nested_paging() {
830     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
831     
832     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
833     
834     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
835     
836     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
837         V3_Print("SVM Nested Paging not supported\n");
838         return 0;
839     } else {
840         V3_Print("SVM Nested Paging supported\n");
841         return 1;
842     }
843  }
844  
845
846
847 void v3_init_svm_cpu(int cpu_id) {
848     reg_ex_t msr;
849     extern v3_cpu_arch_t v3_cpu_types[];
850
851     // Enable SVM on the CPU
852     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
853     msr.e_reg.low |= EFER_MSR_svm_enable;
854     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
855
856     V3_Print("SVM Enabled\n");
857
858     // Setup the host state save area
859     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
860
861     /* 64-BIT-ISSUE */
862     //  msr.e_reg.high = 0;
863     //msr.e_reg.low = (uint_t)host_vmcb;
864     msr.r_reg = host_vmcbs[cpu_id];
865
866     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
867     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
868
869
870     if (has_svm_nested_paging() == 1) {
871         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
872     } else {
873         v3_cpu_types[cpu_id] = V3_SVM_CPU;
874     }
875 }
876
877
878
879 void v3_deinit_svm_cpu(int cpu_id) {
880     reg_ex_t msr;
881     extern v3_cpu_arch_t v3_cpu_types[];
882
883     // reset SVM_VM_HSAVE_PA_MSR
884     // Does setting it to NULL disable??
885     msr.r_reg = 0;
886     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
887
888     // Disable SVM?
889     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
890     msr.e_reg.low &= ~EFER_MSR_svm_enable;
891     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
892
893     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
894
895     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
896
897     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
898     return;
899 }
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950 #if 0
951 /* 
952  * Test VMSAVE/VMLOAD Latency 
953  */
954 #define vmsave ".byte 0x0F,0x01,0xDB ; "
955 #define vmload ".byte 0x0F,0x01,0xDA ; "
956 {
957     uint32_t start_lo, start_hi;
958     uint32_t end_lo, end_hi;
959     uint64_t start, end;
960     
961     __asm__ __volatile__ (
962                           "rdtsc ; "
963                           "movl %%eax, %%esi ; "
964                           "movl %%edx, %%edi ; "
965                           "movq  %%rcx, %%rax ; "
966                           vmsave
967                           "rdtsc ; "
968                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
969                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
970                           );
971     
972     start = start_hi;
973     start <<= 32;
974     start += start_lo;
975     
976     end = end_hi;
977     end <<= 32;
978     end += end_lo;
979     
980     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
981     
982     __asm__ __volatile__ (
983                           "rdtsc ; "
984                           "movl %%eax, %%esi ; "
985                           "movl %%edx, %%edi ; "
986                           "movq  %%rcx, %%rax ; "
987                           vmload
988                           "rdtsc ; "
989                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
990                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
991                               );
992         
993         start = start_hi;
994         start <<= 32;
995         start += start_lo;
996
997         end = end_hi;
998         end <<= 32;
999         end += end_lo;
1000
1001
1002         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1003     }
1004     /* End Latency Test */
1005
1006 #endif
1007
1008
1009
1010
1011
1012
1013
1014 #if 0
1015 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1016   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1017   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1018   uint_t i = 0;
1019
1020
1021   guest_state->rsp = vm_info.vm_regs.rsp;
1022   guest_state->rip = vm_info.rip;
1023
1024
1025   /* I pretty much just gutted this from TVMM */
1026   /* Note: That means its probably wrong */
1027
1028   // set the segment registers to mirror ours
1029   guest_state->cs.selector = 1<<3;
1030   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1031   guest_state->cs.attrib.fields.S = 1;
1032   guest_state->cs.attrib.fields.P = 1;
1033   guest_state->cs.attrib.fields.db = 1;
1034   guest_state->cs.attrib.fields.G = 1;
1035   guest_state->cs.limit = 0xfffff;
1036   guest_state->cs.base = 0;
1037   
1038   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1039   for ( i = 0; segregs[i] != NULL; i++) {
1040     struct vmcb_selector * seg = segregs[i];
1041     
1042     seg->selector = 2<<3;
1043     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1044     seg->attrib.fields.S = 1;
1045     seg->attrib.fields.P = 1;
1046     seg->attrib.fields.db = 1;
1047     seg->attrib.fields.G = 1;
1048     seg->limit = 0xfffff;
1049     seg->base = 0;
1050   }
1051
1052
1053   {
1054     /* JRL THIS HAS TO GO */
1055     
1056     //    guest_state->tr.selector = GetTR_Selector();
1057     guest_state->tr.attrib.fields.type = 0x9; 
1058     guest_state->tr.attrib.fields.P = 1;
1059     // guest_state->tr.limit = GetTR_Limit();
1060     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1061     /* ** */
1062   }
1063
1064
1065   /* ** */
1066
1067
1068   guest_state->efer |= EFER_MSR_svm_enable;
1069   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1070   ctrl_area->svm_instrs.VMRUN = 1;
1071   guest_state->cr0 = 0x00000001;    // PE 
1072   ctrl_area->guest_ASID = 1;
1073
1074
1075   //  guest_state->cpl = 0;
1076
1077
1078
1079   // Setup exits
1080
1081   ctrl_area->cr_writes.cr4 = 1;
1082   
1083   ctrl_area->exceptions.de = 1;
1084   ctrl_area->exceptions.df = 1;
1085   ctrl_area->exceptions.pf = 1;
1086   ctrl_area->exceptions.ts = 1;
1087   ctrl_area->exceptions.ss = 1;
1088   ctrl_area->exceptions.ac = 1;
1089   ctrl_area->exceptions.mc = 1;
1090   ctrl_area->exceptions.gp = 1;
1091   ctrl_area->exceptions.ud = 1;
1092   ctrl_area->exceptions.np = 1;
1093   ctrl_area->exceptions.of = 1;
1094   ctrl_area->exceptions.nmi = 1;
1095
1096   
1097
1098   ctrl_area->instrs.IOIO_PROT = 1;
1099   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1100   
1101   {
1102     reg_ex_t tmp_reg;
1103     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1104     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1105   }
1106
1107   ctrl_area->instrs.INTR = 1;
1108
1109   
1110   {
1111     char gdt_buf[6];
1112     char idt_buf[6];
1113
1114     memset(gdt_buf, 0, 6);
1115     memset(idt_buf, 0, 6);
1116
1117
1118     uint_t gdt_base, idt_base;
1119     ushort_t gdt_limit, idt_limit;
1120     
1121     GetGDTR(gdt_buf);
1122     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1123     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1124     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1125
1126     GetIDTR(idt_buf);
1127     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1128     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1129     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1130
1131
1132     // gdt_base -= 0x2000;
1133     //idt_base -= 0x2000;
1134
1135     guest_state->gdtr.base = gdt_base;
1136     guest_state->gdtr.limit = gdt_limit;
1137     guest_state->idtr.base = idt_base;
1138     guest_state->idtr.limit = idt_limit;
1139
1140
1141   }
1142   
1143   
1144   // also determine if CPU supports nested paging
1145   /*
1146   if (vm_info.page_tables) {
1147     //   if (0) {
1148     // Flush the TLB on entries/exits
1149     ctrl_area->TLB_CONTROL = 1;
1150
1151     // Enable Nested Paging
1152     ctrl_area->NP_ENABLE = 1;
1153
1154     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1155
1156         // Set the Nested Page Table pointer
1157     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1158
1159
1160     //   ctrl_area->N_CR3 = Get_CR3();
1161     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1162
1163     guest_state->g_pat = 0x7040600070406ULL;
1164
1165     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1166     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1167     // Enable Paging
1168     //    guest_state->cr0 |= 0x80000000;
1169   }
1170   */
1171
1172 }
1173
1174
1175
1176
1177
1178 #endif
1179
1180