Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


checkpoint paging fix
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
42 #endif
43
44 #include <palacios/vmm_direct_paging.h>
45
46 #include <palacios/vmm_ctrl_regs.h>
47 #include <palacios/svm_io.h>
48
49 #include <palacios/vmm_sprintf.h>
50
51
52 #ifndef V3_CONFIG_DEBUG_SVM
53 #undef PrintDebug
54 #define PrintDebug(fmt, args...)
55 #endif
56
57
58 uint32_t v3_last_exit;
59
60 // This is a global pointer to the host's VMCB
61 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
62
63
64
65 extern void v3_stgi();
66 extern void v3_clgi();
67 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
68 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
69
70
71 static vmcb_t * Allocate_VMCB() {
72     vmcb_t * vmcb_page = NULL;
73     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
74
75     if ((void *)vmcb_pa == NULL) {
76         PrintError("Error allocating VMCB\n");
77         return NULL;
78     }
79
80     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
81
82     memset(vmcb_page, 0, 4096);
83
84     return vmcb_page;
85 }
86
87
88 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
89 {
90     int status;
91
92     // Call arch-independent handler
93     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
94         return status;
95     }
96
97     // SVM-specific code
98     {
99         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
100         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
101         hw_efer->svme = 1;
102     }
103
104     return 0;
105 }
106
107
108 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
109     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
110     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
111     uint_t i;
112
113
114     //
115     ctrl_area->svm_instrs.VMRUN = 1;
116     ctrl_area->svm_instrs.VMMCALL = 1;
117     ctrl_area->svm_instrs.VMLOAD = 1;
118     ctrl_area->svm_instrs.VMSAVE = 1;
119     ctrl_area->svm_instrs.STGI = 1;
120     ctrl_area->svm_instrs.CLGI = 1;
121     ctrl_area->svm_instrs.SKINIT = 1;
122     ctrl_area->svm_instrs.ICEBP = 1;
123     ctrl_area->svm_instrs.WBINVD = 1;
124     ctrl_area->svm_instrs.MONITOR = 1;
125     ctrl_area->svm_instrs.MWAIT_always = 1;
126     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
127     ctrl_area->instrs.INVLPGA = 1;
128     ctrl_area->instrs.CPUID = 1;
129
130     ctrl_area->instrs.HLT = 1;
131
132 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
133     ctrl_area->instrs.RDTSC = 1;
134     ctrl_area->svm_instrs.RDTSCP = 1;
135 #endif
136
137     // guest_state->cr0 = 0x00000001;    // PE 
138   
139     /*
140       ctrl_area->exceptions.de = 1;
141       ctrl_area->exceptions.df = 1;
142       
143       ctrl_area->exceptions.ts = 1;
144       ctrl_area->exceptions.ss = 1;
145       ctrl_area->exceptions.ac = 1;
146       ctrl_area->exceptions.mc = 1;
147       ctrl_area->exceptions.gp = 1;
148       ctrl_area->exceptions.ud = 1;
149       ctrl_area->exceptions.np = 1;
150       ctrl_area->exceptions.of = 1;
151       
152       ctrl_area->exceptions.nmi = 1;
153     */
154     
155
156     ctrl_area->instrs.NMI = 1;
157     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
158     ctrl_area->instrs.INIT = 1;
159     ctrl_area->instrs.PAUSE = 1;
160     ctrl_area->instrs.shutdown_evts = 1;
161
162
163     /* DEBUG FOR RETURN CODE */
164     ctrl_area->exit_code = 1;
165
166
167     /* Setup Guest Machine state */
168
169     core->vm_regs.rsp = 0x00;
170     core->rip = 0xfff0;
171
172     core->vm_regs.rdx = 0x00000f00;
173
174
175     core->cpl = 0;
176
177     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
178     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
179     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
180
181
182
183
184
185     core->segments.cs.selector = 0xf000;
186     core->segments.cs.limit = 0xffff;
187     core->segments.cs.base = 0x0000000f0000LL;
188
189     // (raw attributes = 0xf3)
190     core->segments.cs.type = 0x3;
191     core->segments.cs.system = 0x1;
192     core->segments.cs.dpl = 0x3;
193     core->segments.cs.present = 1;
194
195
196
197     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
198                                       &(core->segments.es), &(core->segments.fs), 
199                                       &(core->segments.gs), NULL};
200
201     for ( i = 0; segregs[i] != NULL; i++) {
202         struct v3_segment * seg = segregs[i];
203         
204         seg->selector = 0x0000;
205         //    seg->base = seg->selector << 4;
206         seg->base = 0x00000000;
207         seg->limit = ~0u;
208
209         // (raw attributes = 0xf3)
210         seg->type = 0x3;
211         seg->system = 0x1;
212         seg->dpl = 0x3;
213         seg->present = 1;
214     }
215
216     core->segments.gdtr.limit = 0x0000ffff;
217     core->segments.gdtr.base = 0x0000000000000000LL;
218     core->segments.idtr.limit = 0x0000ffff;
219     core->segments.idtr.base = 0x0000000000000000LL;
220
221     core->segments.ldtr.selector = 0x0000;
222     core->segments.ldtr.limit = 0x0000ffff;
223     core->segments.ldtr.base = 0x0000000000000000LL;
224     core->segments.tr.selector = 0x0000;
225     core->segments.tr.limit = 0x0000ffff;
226     core->segments.tr.base = 0x0000000000000000LL;
227
228
229     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
230     core->dbg_regs.dr7 = 0x0000000000000400LL;
231
232
233     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
234     ctrl_area->instrs.IOIO_PROT = 1;
235             
236     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
237     ctrl_area->instrs.MSR_PROT = 1;   
238
239
240     PrintDebug("Exiting on interrupts\n");
241     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
242     ctrl_area->instrs.INTR = 1;
243
244
245     v3_hook_msr(core->vm_info, EFER_MSR, 
246                 &v3_handle_efer_read,
247                 &v3_svm_handle_efer_write, 
248                 core);
249
250     if (core->shdw_pg_mode == SHADOW_PAGING) {
251         PrintDebug("Creating initial shadow page table\n");
252         
253         /* JRL: This is a performance killer, and a simplistic solution */
254         /* We need to fix this */
255         ctrl_area->TLB_CONTROL = 1;
256         ctrl_area->guest_ASID = 1;
257         
258         
259         if (v3_init_passthrough_pts(core) == -1) {
260             PrintError("Could not initialize passthrough page tables\n");
261             return ;
262         }
263
264
265         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
266         PrintDebug("Created\n");
267         
268         core->ctrl_regs.cr0 |= 0x80000000;
269         core->ctrl_regs.cr3 = core->direct_map_pt;
270
271         ctrl_area->cr_reads.cr0 = 1;
272         ctrl_area->cr_writes.cr0 = 1;
273         //ctrl_area->cr_reads.cr4 = 1;
274         ctrl_area->cr_writes.cr4 = 1;
275         ctrl_area->cr_reads.cr3 = 1;
276         ctrl_area->cr_writes.cr3 = 1;
277
278
279
280         ctrl_area->instrs.INVLPG = 1;
281
282         ctrl_area->exceptions.pf = 1;
283
284         guest_state->g_pat = 0x7040600070406ULL;
285
286
287
288     } else if (core->shdw_pg_mode == NESTED_PAGING) {
289         // Flush the TLB on entries/exits
290         ctrl_area->TLB_CONTROL = 1;
291         ctrl_area->guest_ASID = 1;
292
293         // Enable Nested Paging
294         ctrl_area->NP_ENABLE = 1;
295
296         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
297
298         // Set the Nested Page Table pointer
299         if (v3_init_passthrough_pts(core) == -1) {
300             PrintError("Could not initialize Nested page tables\n");
301             return ;
302         }
303
304         ctrl_area->N_CR3 = core->direct_map_pt;
305
306         guest_state->g_pat = 0x7040600070406ULL;
307     }
308     
309     /* tell the guest that we don't support SVM */
310     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
311         &v3_handle_vm_cr_read,
312         &v3_handle_vm_cr_write, 
313         core);
314 }
315
316
317 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
318
319     PrintDebug("Allocating VMCB\n");
320     core->vmm_data = (void *)Allocate_VMCB();
321     
322     if (core->vmm_data == NULL) {
323         PrintError("Could not allocate VMCB, Exiting...\n");
324         return -1;
325     }
326
327     if (vm_class == V3_PC_VM) {
328         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
329         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
330     } else {
331         PrintError("Invalid VM class\n");
332         return -1;
333     }
334
335     return 0;
336 }
337
338
339 int v3_deinit_svm_vmcb(struct guest_info * core) {
340     V3_FreePages(V3_PAddr(core->vmm_data), 1);
341     return 0;
342 }
343
344
345 #ifdef V3_CONFIG_CHECKPOINT
346 int v3_svm_save_core(struct guest_info * core, void * ctx){
347
348     v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data);
349
350     return 0;
351 }
352
353 int v3_svm_load_core(struct guest_info * core, void * chkpt_ctx){
354     struct cr0_32 * shadow_cr0;
355     vmcb_saved_state_t * guest_state; 
356     vmcb_ctrl_t * guest_ctrl;
357
358
359
360     if (v3_chkpt_load(chkpt_ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1){
361         return -1;
362     }
363
364     guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t *)(core->vmm_data));
365     guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t *)(core->vmm_data));
366
367         
368     core->rip = guest_state->rip;
369     core->vm_regs.rsp = guest_state->rsp;
370     core->vm_regs.rax = guest_state->rax;
371
372     core->cpl = guest_state->cpl;
373
374     core->ctrl_regs.cr0 = guest_state->cr0;
375     core->ctrl_regs.cr2 = guest_state->cr2;
376     core->ctrl_regs.cr4 = guest_state->cr4;
377     core->dbg_regs.dr6 = guest_state->dr6;
378     core->dbg_regs.dr7 = guest_state->dr7;
379     core->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
380     core->ctrl_regs.rflags = guest_state->rflags;
381     core->ctrl_regs.efer = guest_state->efer;
382
383                 
384     shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
385
386
387     if (core->shdw_pg_mode == SHADOW_PAGING) {
388         if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
389             if (v3_activate_shadow_pt(core) == -1) {
390                 PrintError("Failed to activate shadow page tables\n");
391                 return -1;
392             }
393         } else {
394             if (v3_activate_passthrough_pt(core) == -1) {
395                 PrintError("Failed to activate passthrough page tables\n");
396                 return -1;
397             }
398         }
399     }
400
401
402     v3_get_vmcb_segments((vmcb_t *)(core->vmm_data), &(core->segments));
403     return 0;
404 }
405 #endif
406
407 static int update_irq_exit_state(struct guest_info * info) {
408     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
409
410     // Fix for QEMU bug using EVENTINJ as an internal cache
411     guest_ctrl->EVENTINJ.valid = 0;
412
413     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
414         
415 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
416         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
417 #endif
418
419         info->intr_core_state.irq_started = 1;
420         info->intr_core_state.irq_pending = 0;
421
422         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
423     }
424
425     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
426 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
427         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
428 #endif
429
430         // Interrupt was taken fully vectored
431         info->intr_core_state.irq_started = 0;
432
433     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
434 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
435         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
436 #endif
437     }
438
439     return 0;
440 }
441
442
443 static int update_irq_entry_state(struct guest_info * info) {
444     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
445
446
447     if (info->intr_core_state.irq_pending == 0) {
448         guest_ctrl->guest_ctrl.V_IRQ = 0;
449         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
450     }
451     
452     if (v3_excp_pending(info)) {
453         uint_t excp = v3_get_excp_number(info);
454         
455         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
456         
457         if (info->excp_state.excp_error_code_valid) {
458             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
459             guest_ctrl->EVENTINJ.ev = 1;
460 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
461             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
462 #endif
463         }
464         
465         guest_ctrl->EVENTINJ.vector = excp;
466         
467         guest_ctrl->EVENTINJ.valid = 1;
468
469 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
470         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
471                    (int)info->num_exits, 
472                    guest_ctrl->EVENTINJ.vector, 
473                    (void *)(addr_t)info->ctrl_regs.cr2,
474                    (void *)(addr_t)info->rip);
475 #endif
476
477         v3_injecting_excp(info, excp);
478     } else if (info->intr_core_state.irq_started == 1) {
479 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
480         PrintDebug("IRQ pending from previous injection\n");
481 #endif
482         guest_ctrl->guest_ctrl.V_IRQ = 1;
483         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
484         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
485         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
486
487     } else {
488         switch (v3_intr_pending(info)) {
489             case V3_EXTERNAL_IRQ: {
490                 uint32_t irq = v3_get_intr(info);
491
492                 guest_ctrl->guest_ctrl.V_IRQ = 1;
493                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
494                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
495                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
496
497 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
498                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
499                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
500                            (void *)(addr_t)info->rip);
501 #endif
502
503                 info->intr_core_state.irq_pending = 1;
504                 info->intr_core_state.irq_vector = irq;
505                 
506                 break;
507             }
508             case V3_NMI:
509                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
510                 break;
511             case V3_SOFTWARE_INTR:
512                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
513
514 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
515                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
516                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
517 #endif
518                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
519                 guest_ctrl->EVENTINJ.valid = 1;
520             
521                 /* reset swintr state */
522                 info->intr_core_state.swintr_posted = 0;
523                 info->intr_core_state.swintr_vector = 0;
524                 
525                 break;
526             case V3_VIRTUAL_IRQ:
527                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
528                 break;
529
530             case V3_INVALID_INTR:
531             default:
532                 break;
533         }
534         
535     }
536
537     return 0;
538 }
539
540
541 /* 
542  * CAUTION and DANGER!!! 
543  * 
544  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
545  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
546  * on its contents will cause things to break. The contents at the time of the exit WILL 
547  * change before the exit handler is executed.
548  */
549 int v3_svm_enter(struct guest_info * info) {
550     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
551     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
552     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
553
554     // Conditionally yield the CPU if the timeslice has expired
555     v3_yield_cond(info);
556
557     // Perform any additional yielding needed for time adjustment
558     v3_adjust_time(info);
559
560     // disable global interrupts for vm state transition
561     v3_clgi();
562
563     // Update timer devices after being in the VM, with interupts
564     // disabled, but before doing IRQ updates, so that any interrupts they 
565     //raise get seen immediately.
566     v3_update_timers(info);
567
568     // Synchronize the guest state to the VMCB
569     guest_state->cr0 = info->ctrl_regs.cr0;
570     guest_state->cr2 = info->ctrl_regs.cr2;
571     guest_state->cr3 = info->ctrl_regs.cr3;
572     guest_state->cr4 = info->ctrl_regs.cr4;
573     guest_state->dr6 = info->dbg_regs.dr6;
574     guest_state->dr7 = info->dbg_regs.dr7;
575     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
576     guest_state->rflags = info->ctrl_regs.rflags;
577     guest_state->efer = info->ctrl_regs.efer;
578     
579     guest_state->cpl = info->cpl;
580
581     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
582
583     guest_state->rax = info->vm_regs.rax;
584     guest_state->rip = info->rip;
585     guest_state->rsp = info->vm_regs.rsp;
586
587 #ifdef V3_CONFIG_SYMCALL
588     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
589         update_irq_entry_state(info);
590     }
591 #else 
592     update_irq_entry_state(info);
593 #endif
594
595
596     /* ** */
597
598     /*
599       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
600       (void *)(addr_t)info->segments.cs.base, 
601       (void *)(addr_t)info->rip);
602     */
603
604 #ifdef V3_CONFIG_SYMCALL
605     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
606         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
607             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
608         }
609     }
610 #endif
611
612     v3_time_enter_vm(info);
613     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
614
615
616     //V3_Print("Calling v3_svm_launch\n");
617
618     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
619
620     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
621
622     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
623
624     // Immediate exit from VM time bookkeeping
625     v3_time_exit_vm(info);
626
627     info->num_exits++;
628
629     // Save Guest state from VMCB
630     info->rip = guest_state->rip;
631     info->vm_regs.rsp = guest_state->rsp;
632     info->vm_regs.rax = guest_state->rax;
633
634     info->cpl = guest_state->cpl;
635
636     info->ctrl_regs.cr0 = guest_state->cr0;
637     info->ctrl_regs.cr2 = guest_state->cr2;
638     info->ctrl_regs.cr3 = guest_state->cr3;
639     info->ctrl_regs.cr4 = guest_state->cr4;
640     info->dbg_regs.dr6 = guest_state->dr6;
641     info->dbg_regs.dr7 = guest_state->dr7;
642     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
643     info->ctrl_regs.rflags = guest_state->rflags;
644     info->ctrl_regs.efer = guest_state->efer;
645     
646     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
647     info->cpu_mode = v3_get_vm_cpu_mode(info);
648     info->mem_mode = v3_get_vm_mem_mode(info);
649     /* ** */
650
651     // save exit info here
652     exit_code = guest_ctrl->exit_code;
653     exit_info1 = guest_ctrl->exit_info1;
654     exit_info2 = guest_ctrl->exit_info2;
655
656 #ifdef V3_CONFIG_SYMCALL
657     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
658         update_irq_exit_state(info);
659     }
660 #else
661     update_irq_exit_state(info);
662 #endif
663
664     // reenable global interrupts after vm exit
665     v3_stgi();
666  
667     // Conditionally yield the CPU if the timeslice has expired
668     v3_yield_cond(info);
669
670     {
671         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
672         
673         if (ret != 0) {
674             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
675             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
676             return -1;
677         }
678     }
679
680
681     return 0;
682 }
683
684
685 int v3_start_svm_guest(struct guest_info * info) {
686     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
687     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
688
689     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
690
691     if (info->vcpu_id == 0) {
692         info->core_run_state = CORE_RUNNING;
693         info->vm_info->run_state = VM_RUNNING;
694     } else  { 
695         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
696
697         while (info->core_run_state == CORE_STOPPED) {
698             v3_yield(info);
699             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
700         }
701
702         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
703     } 
704
705     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
706                info->vcpu_id, info->pcpu_id, 
707                info->segments.cs.selector, (void *)(info->segments.cs.base), 
708                info->segments.cs.limit, (void *)(info->rip));
709
710
711
712     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
713                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
714     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
715     
716     v3_start_time(info);
717
718     while (1) {
719
720         if (info->vm_info->run_state == VM_STOPPED) {
721             info->core_run_state = CORE_STOPPED;
722             break;
723         }
724         
725         if (v3_svm_enter(info) == -1) {
726             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
727             addr_t host_addr;
728             addr_t linear_addr = 0;
729             
730             info->vm_info->run_state = VM_ERROR;
731             
732             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
733             
734             v3_print_guest_state(info);
735             
736             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
737             
738             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
739             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
740             
741             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
742             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
743             
744             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
745             
746             if (info->mem_mode == PHYSICAL_MEM) {
747                 v3_gpa_to_hva(info, linear_addr, &host_addr);
748             } else if (info->mem_mode == VIRTUAL_MEM) {
749                 v3_gva_to_hva(info, linear_addr, &host_addr);
750             }
751             
752             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
753             
754             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
755             v3_dump_mem((uint8_t *)host_addr, 15);
756             
757             v3_print_stack(info);
758
759             break;
760         }
761
762         v3_wait_at_barrier(info);
763
764
765         if (info->vm_info->run_state == VM_STOPPED) {
766             info->core_run_state = CORE_STOPPED;
767             break;
768         }
769
770         
771
772 /*
773         if ((info->num_exits % 50000) == 0) {
774             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
775             v3_print_guest_state(info);
776         }
777 */
778         
779     }
780
781     // Need to take down the other cores on error... 
782
783     return 0;
784 }
785
786
787
788
789 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
790     // init vmcb_bios
791
792     // Write the RIP, CS, and descriptor
793     // assume the rest is already good to go
794     //
795     // vector VV -> rip at 0
796     //              CS = VV00
797     //  This means we start executing at linear address VV000
798     //
799     // So the selector needs to be VV00
800     // and the base needs to be VV000
801     //
802     core->rip = 0;
803     core->segments.cs.selector = rip << 8;
804     core->segments.cs.limit = 0xffff;
805     core->segments.cs.base = rip << 12;
806
807     return 0;
808 }
809
810
811
812
813
814
815 /* Checks machine SVM capability */
816 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
817 int v3_is_svm_capable() {
818     uint_t vm_cr_low = 0, vm_cr_high = 0;
819     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
820
821     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
822   
823     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
824
825     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
826       V3_Print("SVM Not Available\n");
827       return 0;
828     }  else {
829         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
830         
831         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
832         
833         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
834             V3_Print("SVM is available but is disabled.\n");
835             
836             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
837             
838             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
839             
840             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
841                 V3_Print("SVM BIOS Disabled, not unlockable\n");
842             } else {
843                 V3_Print("SVM is locked with a key\n");
844             }
845             return 0;
846
847         } else {
848             V3_Print("SVM is available and  enabled.\n");
849
850             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
851             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
852             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
853             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
854             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
855
856             return 1;
857         }
858     }
859 }
860
861 static int has_svm_nested_paging() {
862     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
863     
864     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
865     
866     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
867     
868     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
869         V3_Print("SVM Nested Paging not supported\n");
870         return 0;
871     } else {
872         V3_Print("SVM Nested Paging supported\n");
873         return 1;
874     }
875  }
876  
877
878
879 void v3_init_svm_cpu(int cpu_id) {
880     reg_ex_t msr;
881     extern v3_cpu_arch_t v3_cpu_types[];
882
883     // Enable SVM on the CPU
884     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
885     msr.e_reg.low |= EFER_MSR_svm_enable;
886     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
887
888     V3_Print("SVM Enabled\n");
889
890     // Setup the host state save area
891     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
892
893     /* 64-BIT-ISSUE */
894     //  msr.e_reg.high = 0;
895     //msr.e_reg.low = (uint_t)host_vmcb;
896     msr.r_reg = host_vmcbs[cpu_id];
897
898     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
899     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
900
901
902     if (has_svm_nested_paging() == 1) {
903         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
904     } else {
905         v3_cpu_types[cpu_id] = V3_SVM_CPU;
906     }
907 }
908
909
910
911 void v3_deinit_svm_cpu(int cpu_id) {
912     reg_ex_t msr;
913     extern v3_cpu_arch_t v3_cpu_types[];
914
915     // reset SVM_VM_HSAVE_PA_MSR
916     // Does setting it to NULL disable??
917     msr.r_reg = 0;
918     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
919
920     // Disable SVM?
921     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
922     msr.e_reg.low &= ~EFER_MSR_svm_enable;
923     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
924
925     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
926
927     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
928
929     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
930     return;
931 }
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982 #if 0
983 /* 
984  * Test VMSAVE/VMLOAD Latency 
985  */
986 #define vmsave ".byte 0x0F,0x01,0xDB ; "
987 #define vmload ".byte 0x0F,0x01,0xDA ; "
988 {
989     uint32_t start_lo, start_hi;
990     uint32_t end_lo, end_hi;
991     uint64_t start, end;
992     
993     __asm__ __volatile__ (
994                           "rdtsc ; "
995                           "movl %%eax, %%esi ; "
996                           "movl %%edx, %%edi ; "
997                           "movq  %%rcx, %%rax ; "
998                           vmsave
999                           "rdtsc ; "
1000                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1001                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1002                           );
1003     
1004     start = start_hi;
1005     start <<= 32;
1006     start += start_lo;
1007     
1008     end = end_hi;
1009     end <<= 32;
1010     end += end_lo;
1011     
1012     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1013     
1014     __asm__ __volatile__ (
1015                           "rdtsc ; "
1016                           "movl %%eax, %%esi ; "
1017                           "movl %%edx, %%edi ; "
1018                           "movq  %%rcx, %%rax ; "
1019                           vmload
1020                           "rdtsc ; "
1021                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1022                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1023                               );
1024         
1025         start = start_hi;
1026         start <<= 32;
1027         start += start_lo;
1028
1029         end = end_hi;
1030         end <<= 32;
1031         end += end_lo;
1032
1033
1034         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1035     }
1036     /* End Latency Test */
1037
1038 #endif
1039
1040
1041
1042
1043
1044
1045
1046 #if 0
1047 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1048   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1049   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1050   uint_t i = 0;
1051
1052
1053   guest_state->rsp = vm_info.vm_regs.rsp;
1054   guest_state->rip = vm_info.rip;
1055
1056
1057   /* I pretty much just gutted this from TVMM */
1058   /* Note: That means its probably wrong */
1059
1060   // set the segment registers to mirror ours
1061   guest_state->cs.selector = 1<<3;
1062   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1063   guest_state->cs.attrib.fields.S = 1;
1064   guest_state->cs.attrib.fields.P = 1;
1065   guest_state->cs.attrib.fields.db = 1;
1066   guest_state->cs.attrib.fields.G = 1;
1067   guest_state->cs.limit = 0xfffff;
1068   guest_state->cs.base = 0;
1069   
1070   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1071   for ( i = 0; segregs[i] != NULL; i++) {
1072     struct vmcb_selector * seg = segregs[i];
1073     
1074     seg->selector = 2<<3;
1075     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1076     seg->attrib.fields.S = 1;
1077     seg->attrib.fields.P = 1;
1078     seg->attrib.fields.db = 1;
1079     seg->attrib.fields.G = 1;
1080     seg->limit = 0xfffff;
1081     seg->base = 0;
1082   }
1083
1084
1085   {
1086     /* JRL THIS HAS TO GO */
1087     
1088     //    guest_state->tr.selector = GetTR_Selector();
1089     guest_state->tr.attrib.fields.type = 0x9; 
1090     guest_state->tr.attrib.fields.P = 1;
1091     // guest_state->tr.limit = GetTR_Limit();
1092     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1093     /* ** */
1094   }
1095
1096
1097   /* ** */
1098
1099
1100   guest_state->efer |= EFER_MSR_svm_enable;
1101   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1102   ctrl_area->svm_instrs.VMRUN = 1;
1103   guest_state->cr0 = 0x00000001;    // PE 
1104   ctrl_area->guest_ASID = 1;
1105
1106
1107   //  guest_state->cpl = 0;
1108
1109
1110
1111   // Setup exits
1112
1113   ctrl_area->cr_writes.cr4 = 1;
1114   
1115   ctrl_area->exceptions.de = 1;
1116   ctrl_area->exceptions.df = 1;
1117   ctrl_area->exceptions.pf = 1;
1118   ctrl_area->exceptions.ts = 1;
1119   ctrl_area->exceptions.ss = 1;
1120   ctrl_area->exceptions.ac = 1;
1121   ctrl_area->exceptions.mc = 1;
1122   ctrl_area->exceptions.gp = 1;
1123   ctrl_area->exceptions.ud = 1;
1124   ctrl_area->exceptions.np = 1;
1125   ctrl_area->exceptions.of = 1;
1126   ctrl_area->exceptions.nmi = 1;
1127
1128   
1129
1130   ctrl_area->instrs.IOIO_PROT = 1;
1131   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1132   
1133   {
1134     reg_ex_t tmp_reg;
1135     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1136     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1137   }
1138
1139   ctrl_area->instrs.INTR = 1;
1140
1141   
1142   {
1143     char gdt_buf[6];
1144     char idt_buf[6];
1145
1146     memset(gdt_buf, 0, 6);
1147     memset(idt_buf, 0, 6);
1148
1149
1150     uint_t gdt_base, idt_base;
1151     ushort_t gdt_limit, idt_limit;
1152     
1153     GetGDTR(gdt_buf);
1154     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1155     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1156     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1157
1158     GetIDTR(idt_buf);
1159     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1160     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1161     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1162
1163
1164     // gdt_base -= 0x2000;
1165     //idt_base -= 0x2000;
1166
1167     guest_state->gdtr.base = gdt_base;
1168     guest_state->gdtr.limit = gdt_limit;
1169     guest_state->idtr.base = idt_base;
1170     guest_state->idtr.limit = idt_limit;
1171
1172
1173   }
1174   
1175   
1176   // also determine if CPU supports nested paging
1177   /*
1178   if (vm_info.page_tables) {
1179     //   if (0) {
1180     // Flush the TLB on entries/exits
1181     ctrl_area->TLB_CONTROL = 1;
1182
1183     // Enable Nested Paging
1184     ctrl_area->NP_ENABLE = 1;
1185
1186     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1187
1188         // Set the Nested Page Table pointer
1189     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1190
1191
1192     //   ctrl_area->N_CR3 = Get_CR3();
1193     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1194
1195     guest_state->g_pat = 0x7040600070406ULL;
1196
1197     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1198     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1199     // Enable Paging
1200     //    guest_state->cr0 |= 0x80000000;
1201   }
1202   */
1203
1204 }
1205
1206
1207
1208
1209
1210 #endif
1211
1212