Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


aa821daee83534a9be6022d2ff7cc9711821d9ce
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_barrier.h>
36
37 #ifdef V3_CONFIG_CHECKPOINT
38 #include <palacios/vmm_checkpoint.h>
39 #endif
40
41 #include <palacios/vmx_ept.h>
42 #include <palacios/vmx_assist.h>
43 #include <palacios/vmx_hw_info.h>
44
45 #ifndef V3_CONFIG_DEBUG_VMX
46 #undef PrintDebug
47 #define PrintDebug(fmt, args...)
48 #endif
49
50
51 /* These fields contain the hardware feature sets supported by the local CPU */
52 static struct vmx_hw_info hw_info;
53
54 extern v3_cpu_arch_t v3_cpu_types[];
55
56 static addr_t host_vmcs_ptrs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
57
58 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
59 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
60
61 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
62     int ret = 0;
63
64     ret = vmcs_write(field, val);
65
66     if (ret != VMX_SUCCESS) {
67         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
68         return 1;
69     }
70
71     return 0;
72 }
73
74 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
75     int ret = 0;
76
77     ret = vmcs_read(field, val);
78
79     if (ret != VMX_SUCCESS) {
80         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
81     }
82
83     return ret;
84 }
85
86
87
88
89 static addr_t allocate_vmcs() {
90     struct vmcs_data * vmcs_page = NULL;
91
92     PrintDebug("Allocating page\n");
93
94     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
95     memset(vmcs_page, 0, 4096);
96
97     vmcs_page->revision = hw_info.basic_info.revision;
98     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
99
100     return (addr_t)V3_PAddr((void *)vmcs_page);
101 }
102
103
104
105
106 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
107     int vmx_ret = 0;
108
109     // disable global interrupts for vm state initialization
110     v3_disable_ints();
111
112     PrintDebug("Loading VMCS\n");
113     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
114     vmx_state->state = VMX_UNLAUNCHED;
115
116     if (vmx_ret != VMX_SUCCESS) {
117         PrintError("VMPTRLD failed\n");
118         return -1;
119     }
120
121
122     /*** Setup default state from HW ***/
123
124     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
125     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
126     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
127     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
128     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
129
130     /* Print Control MSRs */
131     PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)hw_info.cr0.value);
132     PrintDebug("CR4 MSR: %p\n", (void *)(addr_t)hw_info.cr4.value);
133
134
135
136     /******* Setup Host State **********/
137
138     /* Cache GDTR, IDTR, and TR in host struct */
139     addr_t gdtr_base;
140     struct {
141         uint16_t selector;
142         addr_t   base;
143     } __attribute__((packed)) tmp_seg;
144     
145
146     __asm__ __volatile__(
147                          "sgdt (%0);"
148                          :
149                          : "q"(&tmp_seg)
150                          : "memory"
151                          );
152     gdtr_base = tmp_seg.base;
153     vmx_state->host_state.gdtr.base = gdtr_base;
154
155     __asm__ __volatile__(
156                          "sidt (%0);"
157                          :
158                          : "q"(&tmp_seg)
159                          : "memory"
160                          );
161     vmx_state->host_state.idtr.base = tmp_seg.base;
162
163     __asm__ __volatile__(
164                          "str (%0);"
165                          :
166                          : "q"(&tmp_seg)
167                          : "memory"
168                          );
169     vmx_state->host_state.tr.selector = tmp_seg.selector;
170
171     /* The GDTR *index* is bits 3-15 of the selector. */
172     struct tss_descriptor * desc = NULL;
173     desc = (struct tss_descriptor *)(gdtr_base + (8 * (tmp_seg.selector >> 3)));
174
175     tmp_seg.base = ((desc->base1) |
176                     (desc->base2 << 16) |
177                     (desc->base3 << 24) |
178 #ifdef __V3_64BIT__
179                     ((uint64_t)desc->base4 << 32)
180 #else 
181                     (0)
182 #endif
183                     );
184
185     vmx_state->host_state.tr.base = tmp_seg.base;
186
187
188     /********** Setup VMX Control Fields ***********/
189
190     /* Add external interrupts, NMI exiting, and virtual NMI */
191     vmx_state->pin_ctrls.nmi_exit = 1;
192     vmx_state->pin_ctrls.ext_int_exit = 1;
193
194
195     vmx_state->pri_proc_ctrls.hlt_exit = 1;
196
197
198     vmx_state->pri_proc_ctrls.pause_exit = 0;
199     vmx_state->pri_proc_ctrls.tsc_offset = 1;
200 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
201     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
202 #endif
203
204     /* Setup IO map */
205     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
206     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
207     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
208             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
209
210
211     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
212     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
213
214
215
216     
217
218
219
220 #ifdef __V3_64BIT__
221     // Ensure host runs in 64-bit mode at each VM EXIT
222     vmx_state->exit_ctrls.host_64_on = 1;
223 #endif
224
225     // Hook all accesses to EFER register
226     v3_hook_msr(core->vm_info, EFER_MSR, 
227                 &v3_handle_efer_read,
228                 &v3_handle_efer_write, 
229                 core);
230
231     // Restore host's EFER register on each VM EXIT
232     vmx_state->exit_ctrls.ld_efer = 1;
233
234     // Save/restore guest's EFER register to/from VMCS on VM EXIT/ENTRY
235     vmx_state->exit_ctrls.save_efer = 1;
236     vmx_state->entry_ctrls.ld_efer  = 1;
237
238     // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
239     vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
240
241
242     /* Setup paging */
243     if (core->shdw_pg_mode == SHADOW_PAGING) {
244         PrintDebug("Creating initial shadow page table\n");
245
246         if (v3_init_passthrough_pts(core) == -1) {
247             PrintError("Could not initialize passthrough page tables\n");
248             return -1;
249         }
250         
251 #define CR0_PE 0x00000001
252 #define CR0_PG 0x80000000
253 #define CR0_WP 0x00010000 // To ensure mem hooks work
254         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
255
256         core->ctrl_regs.cr3 = core->direct_map_pt;
257
258         // vmx_state->pinbased_ctrls |= NMI_EXIT;
259
260         /* Add CR exits */
261         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
262         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
263         
264         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
265         
266         /* Add page fault exits */
267         vmx_state->excp_bmap.pf = 1;
268
269         // Setup VMX Assist
270         v3_vmxassist_init(core, vmx_state);
271
272     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
273                (v3_cpu_types[core->pcpu_id] == V3_VMX_EPT_CPU)) {
274
275 #define CR0_PE 0x00000001
276 #define CR0_PG 0x80000000
277 #define CR0_WP 0x00010000 // To ensure mem hooks work
278         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
279
280         // vmx_state->pinbased_ctrls |= NMI_EXIT;
281
282         /* Disable CR exits */
283         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
284         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
285
286         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
287
288         /* Add page fault exits */
289         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
290         
291         // Setup VMX Assist
292         v3_vmxassist_init(core, vmx_state);
293
294         /* Enable EPT */
295         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
296         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
297
298
299
300         if (v3_init_ept(core, &hw_info) == -1) {
301             PrintError("Error initializing EPT\n");
302             return -1;
303         }
304
305     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
306                (v3_cpu_types[core->pcpu_id] == V3_VMX_EPT_UG_CPU)) {
307         int i = 0;
308         // For now we will assume that unrestricted guest mode is assured w/ EPT
309
310
311         core->vm_regs.rsp = 0x00;
312         core->rip = 0xfff0;
313         core->vm_regs.rdx = 0x00000f00;
314         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
315         core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
316
317
318         core->segments.cs.selector = 0xf000;
319         core->segments.cs.limit = 0xffff;
320         core->segments.cs.base = 0x0000000f0000LL;
321
322         // (raw attributes = 0xf3)
323         core->segments.cs.type = 0xb;
324         core->segments.cs.system = 0x1;
325         core->segments.cs.dpl = 0x0;
326         core->segments.cs.present = 1;
327
328
329
330         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
331                                           &(core->segments.es), &(core->segments.fs), 
332                                           &(core->segments.gs), NULL};
333
334         for ( i = 0; segregs[i] != NULL; i++) {
335             struct v3_segment * seg = segregs[i];
336         
337             seg->selector = 0x0000;
338             //    seg->base = seg->selector << 4;
339             seg->base = 0x00000000;
340             seg->limit = 0xffff;
341
342
343             seg->type = 0x3;
344             seg->system = 0x1;
345             seg->dpl = 0x0;
346             seg->present = 1;
347             //    seg->granularity = 1;
348
349         }
350
351
352         core->segments.gdtr.limit = 0x0000ffff;
353         core->segments.gdtr.base = 0x0000000000000000LL;
354
355         core->segments.idtr.limit = 0x0000ffff;
356         core->segments.idtr.base = 0x0000000000000000LL;
357
358         core->segments.ldtr.selector = 0x0000;
359         core->segments.ldtr.limit = 0x0000ffff;
360         core->segments.ldtr.base = 0x0000000000000000LL;
361         core->segments.ldtr.type = 2;
362         core->segments.ldtr.present = 1;
363
364         core->segments.tr.selector = 0x0000;
365         core->segments.tr.limit = 0x0000ffff;
366         core->segments.tr.base = 0x0000000000000000LL;
367         core->segments.tr.type = 0xb;
368         core->segments.tr.present = 1;
369
370         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
371         core->dbg_regs.dr7 = 0x0000000000000400LL;
372
373         /* Enable EPT */
374         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
375         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
376         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
377
378
379         /* Disable shadow paging stuff */
380         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
381         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
382
383         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
384
385
386         if (v3_init_ept(core, &hw_info) == -1) {
387             PrintError("Error initializing EPT\n");
388             return -1;
389         }
390
391     } else {
392         PrintError("Invalid Virtual paging mode\n");
393         return -1;
394     }
395
396
397     // hook vmx msrs
398
399     // Setup SYSCALL/SYSENTER MSRs in load/store area
400     
401     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
402     {
403 #define IA32_STAR 0xc0000081
404 #define IA32_LSTAR 0xc0000082
405 #define IA32_FMASK 0xc0000084
406 #define IA32_KERN_GS_BASE 0xc0000102
407
408 #define IA32_CSTAR 0xc0000083 // Compatibility mode STAR (ignored for now... hopefully its not that important...)
409
410         int msr_ret = 0;
411
412         struct vmcs_msr_entry * exit_store_msrs = NULL;
413         struct vmcs_msr_entry * exit_load_msrs = NULL;
414         struct vmcs_msr_entry * entry_load_msrs = NULL;;
415         int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
416
417         V3_Print("Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
418
419         if (max_msrs < 4) {
420             PrintError("Max MSR cache size is too small (%d)\n", max_msrs);
421             return -1;
422         }
423
424         vmx_state->msr_area = V3_VAddr(V3_AllocPages(1));
425
426         if (vmx_state->msr_area == NULL) {
427             PrintError("could not allocate msr load/store area\n");
428             return -1;
429         }
430
431         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
432         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
433         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
434         
435         
436         exit_store_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area);
437         exit_load_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area + (sizeof(struct vmcs_msr_entry) * 4));
438         entry_load_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area + (sizeof(struct vmcs_msr_entry) * 8));
439
440
441         exit_store_msrs[0].index = IA32_STAR;
442         exit_store_msrs[1].index = IA32_LSTAR;
443         exit_store_msrs[2].index = IA32_FMASK;
444         exit_store_msrs[3].index = IA32_KERN_GS_BASE;
445         
446         memcpy(exit_store_msrs, exit_load_msrs, sizeof(struct vmcs_msr_entry) * 4);
447         memcpy(exit_store_msrs, entry_load_msrs, sizeof(struct vmcs_msr_entry) * 4);
448
449         
450         v3_get_msr(IA32_STAR, &(exit_load_msrs[0].hi), &(exit_load_msrs[0].lo));
451         v3_get_msr(IA32_LSTAR, &(exit_load_msrs[1].hi), &(exit_load_msrs[1].lo));
452         v3_get_msr(IA32_FMASK, &(exit_load_msrs[2].hi), &(exit_load_msrs[2].lo));
453         v3_get_msr(IA32_KERN_GS_BASE, &(exit_load_msrs[3].hi), &(exit_load_msrs[3].lo));
454
455         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(exit_store_msrs));
456         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(exit_load_msrs));
457         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(entry_load_msrs));
458
459     }    
460
461     /* Sanity check ctrl/reg fields against hw_defaults */
462
463
464
465
466     /*** Write all the info to the VMCS ***/
467   
468     /*
469     {
470         // IS THIS NECESSARY???
471 #define DEBUGCTL_MSR 0x1d9
472         struct v3_msr tmp_msr;
473         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
474         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
475         core->dbg_regs.dr7 = 0x400;
476     }
477     */
478
479 #ifdef __V3_64BIT__
480     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
481 #else
482     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
483     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
484 #endif
485
486
487  
488
489     if (v3_update_vmcs_ctrl_fields(core)) {
490         PrintError("Could not write control fields!\n");
491         return -1;
492     }
493     
494     if (v3_update_vmcs_host_state(core)) {
495         PrintError("Could not write host state\n");
496         return -1;
497     }
498
499     // reenable global interrupts for vm state initialization now
500     // that the vm state is initialized. If another VM kicks us off, 
501     // it'll update our vmx state so that we know to reload ourself
502     v3_enable_ints();
503
504     return 0;
505 }
506
507 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
508     struct vmx_data * vmx_state = NULL;
509     int vmx_ret = 0;
510     
511     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
512     memset(vmx_state, 0, sizeof(struct vmx_data));
513
514     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
515
516     PrintDebug("Allocating VMCS\n");
517     vmx_state->vmcs_ptr_phys = allocate_vmcs();
518
519     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
520
521     core->vmm_data = vmx_state;
522     vmx_state->state = VMX_UNLAUNCHED;
523
524     PrintDebug("Initializing VMCS (addr=%p)\n", core->vmm_data);
525     
526     // TODO: Fix vmcs fields so they're 32-bit
527
528     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
529     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
530
531     if (vmx_ret != VMX_SUCCESS) {
532         PrintError("VMCLEAR failed\n");
533         return -1; 
534     }
535
536     if (vm_class == V3_PC_VM) {
537         PrintDebug("Initializing VMCS\n");
538         if (init_vmcs_bios(core, vmx_state) == -1) {
539             PrintError("Error initializing VMCS to BIOS state\n");
540             return -1;
541         }
542     } else {
543         PrintError("Invalid VM Class\n");
544         return -1;
545     }
546
547     PrintDebug("Serializing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
548     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
549
550     return 0;
551 }
552
553
554 int v3_deinit_vmx_vmcs(struct guest_info * core) {
555     struct vmx_data * vmx_state = core->vmm_data;
556
557     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
558     V3_FreePages(V3_PAddr(vmx_state->msr_area), 1);
559
560     V3_Free(vmx_state);
561
562     return 0;
563 }
564
565
566
567 #ifdef V3_CONFIG_CHECKPOINT
568 /* 
569  * JRL: This is broken
570  */
571 int v3_vmx_save_core(struct guest_info * core, void * ctx){
572     uint64_t vmcs_ptr = vmcs_store();
573
574     v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE, (void *)vmcs_ptr);
575
576     return 0;
577 }
578
579 int v3_vmx_load_core(struct guest_info * core, void * ctx){
580     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
581     struct cr0_32 * shadow_cr0;
582     char vmcs[PAGE_SIZE_4KB];
583
584     v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, vmcs);
585
586     vmcs_clear(vmx_info->vmcs_ptr_phys);
587     vmcs_load((addr_t)vmcs);
588
589     v3_vmx_save_vmcs(core);
590
591     shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
592
593
594     /* Get the CPU mode to set the guest_ia32e entry ctrl */
595
596     if (core->shdw_pg_mode == SHADOW_PAGING) {
597         if (shadow_cr0->pg){
598             if (v3_activate_passthrough_pt(core) == -1) {
599                 PrintError("Failed to activate passthrough page tables\n");
600                 return -1;
601             }
602         }
603     }
604
605     return 0;
606 }
607 #endif
608
609
610 static int update_irq_exit_state(struct guest_info * info) {
611     struct vmx_exit_idt_vec_info idt_vec_info;
612
613     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
614
615     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
616 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
617         V3_Print("Calling v3_injecting_intr\n");
618 #endif
619         info->intr_core_state.irq_started = 0;
620         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
621     }
622
623     return 0;
624 }
625
626 static int update_irq_entry_state(struct guest_info * info) {
627     struct vmx_exit_idt_vec_info idt_vec_info;
628     struct vmcs_interrupt_state intr_core_state;
629     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
630
631     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
632     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
633
634     /* Check for pending exceptions to inject */
635     if (v3_excp_pending(info)) {
636         struct vmx_entry_int_info int_info;
637         int_info.value = 0;
638
639         // In VMX, almost every exception is hardware
640         // Software exceptions are pretty much only for breakpoint or overflow
641         int_info.type = 3;
642         int_info.vector = v3_get_excp_number(info);
643
644         if (info->excp_state.excp_error_code_valid) {
645             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
646             int_info.error_code = 1;
647
648 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
649             V3_Print("Injecting exception %d with error code %x\n", 
650                     int_info.vector, info->excp_state.excp_error_code);
651 #endif
652         }
653
654         int_info.valid = 1;
655 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
656         V3_Print("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
657 #endif
658         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
659
660         v3_injecting_excp(info, int_info.vector);
661
662     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
663                (intr_core_state.val == 0)) {
664        
665         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
666
667 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
668             V3_Print("IRQ pending from previous injection\n");
669 #endif
670
671             // Copy the IDT vectoring info over to reinject the old interrupt
672             if (idt_vec_info.error_code == 1) {
673                 uint32_t err_code = 0;
674
675                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
676                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
677             }
678
679             idt_vec_info.undef = 0;
680             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
681
682         } else {
683             struct vmx_entry_int_info ent_int;
684             ent_int.value = 0;
685
686             switch (v3_intr_pending(info)) {
687                 case V3_EXTERNAL_IRQ: {
688                     info->intr_core_state.irq_vector = v3_get_intr(info); 
689                     ent_int.vector = info->intr_core_state.irq_vector;
690                     ent_int.type = 0;
691                     ent_int.error_code = 0;
692                     ent_int.valid = 1;
693
694 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
695                     V3_Print("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
696                                info->intr_core_state.irq_vector, 
697                                (uint32_t)info->num_exits, 
698                                (void *)(addr_t)info->rip);
699 #endif
700
701                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
702                     info->intr_core_state.irq_started = 1;
703
704                     break;
705                 }
706                 case V3_NMI:
707                     PrintDebug("Injecting NMI\n");
708
709                     ent_int.type = 2;
710                     ent_int.vector = 2;
711                     ent_int.valid = 1;
712                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
713
714                     break;
715                 case V3_SOFTWARE_INTR:
716                     PrintDebug("Injecting software interrupt\n");
717                     ent_int.type = 4;
718
719                     ent_int.valid = 1;
720                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
721
722                     break;
723                 case V3_VIRTUAL_IRQ:
724                     // Not sure what to do here, Intel doesn't have virtual IRQs
725                     // May be the same as external interrupts/IRQs
726
727                     break;
728                 case V3_INVALID_INTR:
729                 default:
730                     break;
731             }
732         }
733     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
734         // Enable INTR window exiting so we know when IF=1
735         uint32_t instr_len;
736
737         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
738
739 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
740         V3_Print("Enabling Interrupt-Window exiting: %d\n", instr_len);
741 #endif
742
743         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
744         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
745     }
746
747
748     return 0;
749 }
750
751
752
753 static struct vmx_exit_info exit_log[10];
754
755 static void print_exit_log(struct guest_info * info) {
756     int cnt = info->num_exits % 10;
757     int i = 0;
758     
759
760     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
761
762     for (i = 0; i < 10; i++) {
763         struct vmx_exit_info * tmp = &exit_log[cnt];
764
765         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
766         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
767         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
768         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
769         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
770
771         cnt--;
772
773         if (cnt == -1) {
774             cnt = 9;
775         }
776
777     }
778
779 }
780
781 /* 
782  * CAUTION and DANGER!!! 
783  * 
784  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
785  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
786  * on its contents will cause things to break. The contents at the time of the exit WILL 
787  * change before the exit handler is executed.
788  */
789 int v3_vmx_enter(struct guest_info * info) {
790     int ret = 0;
791     uint32_t tsc_offset_low, tsc_offset_high;
792     struct vmx_exit_info exit_info;
793     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
794
795     // Conditionally yield the CPU if the timeslice has expired
796     v3_yield_cond(info);
797
798     // Perform any additional yielding needed for time adjustment
799     v3_adjust_time(info);
800
801     // disable global interrupts for vm state transition
802     v3_disable_ints();
803
804     // Update timer devices late after being in the VM so that as much 
805     // of hte time in the VM is accounted for as possible. Also do it before
806     // updating IRQ entry state so that any interrupts the timers raise get 
807     // handled on the next VM entry. Must be done with interrupts disabled.
808     v3_update_timers(info);
809
810     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
811         vmcs_clear(vmx_info->vmcs_ptr_phys);
812         vmcs_load(vmx_info->vmcs_ptr_phys);
813         vmx_info->state = VMX_UNLAUNCHED;
814     }
815
816     v3_vmx_restore_vmcs(info);
817
818
819 #ifdef V3_CONFIG_SYMCALL
820     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
821         update_irq_entry_state(info);
822     }
823 #else 
824     update_irq_entry_state(info);
825 #endif
826
827     {
828         addr_t guest_cr3;
829         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
830         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
831     }
832
833     // Perform last-minute time bookkeeping prior to entering the VM
834     v3_time_enter_vm(info);
835
836     tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
837     tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
838     check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
839     check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
840
841
842     if (v3_update_vmcs_host_state(info)) {
843         v3_enable_ints();
844         PrintError("Could not write host state\n");
845         return -1;
846     }
847
848
849     if (vmx_info->state == VMX_UNLAUNCHED) {
850         vmx_info->state = VMX_LAUNCHED;
851
852         info->vm_info->run_state = VM_RUNNING;
853         ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
854     } else {
855         V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
856         ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
857     }
858     
859     //  PrintDebug("VMX Exit: ret=%d\n", ret);
860
861     if (ret != VMX_SUCCESS) {
862         uint32_t error = 0;
863
864         vmcs_read(VMCS_INSTR_ERR, &error);
865
866         v3_enable_ints();
867
868         PrintError("VMENTRY Error: %d\n", error);
869         return -1;
870     }
871
872     // Immediate exit from VM time bookkeeping
873     v3_time_exit_vm(info);
874
875     info->num_exits++;
876
877     /* Update guest state */
878     v3_vmx_save_vmcs(info);
879
880     // info->cpl = info->segments.cs.selector & 0x3;
881
882     info->mem_mode = v3_get_vm_mem_mode(info);
883     info->cpu_mode = v3_get_vm_cpu_mode(info);
884
885
886     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
887     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
888     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
889     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
890     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
891     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
892     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
893
894     if (info->shdw_pg_mode == NESTED_PAGING) {
895         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
896     }
897
898     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
899
900     exit_log[info->num_exits % 10] = exit_info;
901
902 #ifdef V3_CONFIG_SYMCALL
903     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
904         update_irq_exit_state(info);
905     }
906 #else
907     update_irq_exit_state(info);
908 #endif
909
910     if (exit_info.exit_reason == VMEXIT_INTR_WINDOW) {
911         // This is a special case whose only job is to inject an interrupt
912         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
913         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
914         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
915
916 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
917        V3_Print("Interrupts available again! (RIP=%llx)\n", info->rip);
918 #endif
919     }
920
921     // reenable global interrupts after vm exit
922     v3_enable_ints();
923
924     // Conditionally yield the CPU if the timeslice has expired
925     v3_yield_cond(info);
926
927     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
928         PrintError("Error in VMX exit handler (Exit reason=%x)\n", exit_info.exit_reason);
929         return -1;
930     }
931
932     return 0;
933 }
934
935
936 int v3_start_vmx_guest(struct guest_info * info) {
937
938     PrintDebug("Starting VMX core %u\n", info->vcpu_id);
939
940     if (info->vcpu_id == 0) {
941         info->core_run_state = CORE_RUNNING;
942         info->vm_info->run_state = VM_RUNNING;
943     } else {
944
945         PrintDebug("VMX core %u: Waiting for core initialization\n", info->vcpu_id);
946
947         while (info->core_run_state == CORE_STOPPED) {
948             v3_yield(info);
949             //PrintDebug("VMX core %u: still waiting for INIT\n",info->vcpu_id);
950         }
951         
952         PrintDebug("VMX core %u initialized\n", info->vcpu_id);
953     }
954
955
956     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
957                info->vcpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
958                info->segments.cs.limit, (void *)(info->rip));
959
960
961     PrintDebug("VMX core %u: Launching VMX VM on logical core %u\n", info->vcpu_id, info->pcpu_id);
962
963     v3_start_time(info);
964
965     while (1) {
966
967         if (info->vm_info->run_state == VM_STOPPED) {
968             info->core_run_state = CORE_STOPPED;
969             break;
970         }
971
972         if (v3_vmx_enter(info) == -1) {
973
974             addr_t host_addr;
975             addr_t linear_addr = 0;
976             
977             info->vm_info->run_state = VM_ERROR;
978             
979             V3_Print("VMX core %u: VMX ERROR!!\n", info->vcpu_id); 
980             
981             v3_print_guest_state(info);
982             
983             V3_Print("VMX core %u\n", info->vcpu_id); 
984
985             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
986             
987             if (info->mem_mode == PHYSICAL_MEM) {
988                 v3_gpa_to_hva(info, linear_addr, &host_addr);
989             } else if (info->mem_mode == VIRTUAL_MEM) {
990                 v3_gva_to_hva(info, linear_addr, &host_addr);
991             }
992             
993             V3_Print("VMX core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
994             
995             V3_Print("VMX core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
996             v3_dump_mem((uint8_t *)host_addr, 15);
997             
998             v3_print_stack(info);
999
1000
1001             v3_print_vmcs();
1002             print_exit_log(info);
1003             return -1;
1004         }
1005
1006         v3_wait_at_barrier(info);
1007
1008
1009         if (info->vm_info->run_state == VM_STOPPED) {
1010             info->core_run_state = CORE_STOPPED;
1011             break;
1012         }
1013 /*
1014         if ((info->num_exits % 5000) == 0) {
1015             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
1016         }
1017 */
1018
1019     }
1020
1021     return 0;
1022 }
1023
1024
1025
1026
1027 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
1028 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
1029 #define CPUID_1_ECX_VTXFLAG 0x00000020
1030
1031 int v3_is_vmx_capable() {
1032     v3_msr_t feature_msr;
1033     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1034
1035     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
1036
1037     PrintDebug("ECX: 0x%x\n", ecx);
1038
1039     if (ecx & CPUID_1_ECX_VTXFLAG) {
1040         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
1041         
1042         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
1043
1044         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
1045             PrintDebug("VMX is locked -- enable in the BIOS\n");
1046             return 0;
1047         }
1048
1049     } else {
1050         PrintDebug("VMX not supported on this cpu\n");
1051         return 0;
1052     }
1053
1054     return 1;
1055 }
1056
1057
1058 int v3_reset_vmx_vm_core(struct guest_info * core, addr_t rip) {
1059     // init vmcs bios
1060     
1061     if ((core->shdw_pg_mode == NESTED_PAGING) && 
1062         (v3_cpu_types[core->pcpu_id] == V3_VMX_EPT_UG_CPU)) {
1063         // easy 
1064         core->rip = 0;
1065         core->segments.cs.selector = rip << 8;
1066         core->segments.cs.limit = 0xffff;
1067         core->segments.cs.base = rip << 12;
1068     } else {
1069         core->vm_regs.rdx = core->vcpu_id;
1070         core->vm_regs.rbx = rip;
1071     }
1072
1073     return 0;
1074 }
1075
1076
1077
1078 void v3_init_vmx_cpu(int cpu_id) {
1079     addr_t vmx_on_region = 0;
1080
1081     if (cpu_id == 0) {
1082         if (v3_init_vmx_hw(&hw_info) == -1) {
1083             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
1084             return;
1085         }
1086     }
1087
1088     enable_vmx();
1089
1090
1091     // Setup VMXON Region
1092     vmx_on_region = allocate_vmcs();
1093
1094
1095     if (vmx_on(vmx_on_region) == VMX_SUCCESS) {
1096         V3_Print("VMX Enabled\n");
1097         host_vmcs_ptrs[cpu_id] = vmx_on_region;
1098     } else {
1099         V3_Print("VMX already enabled\n");
1100         V3_FreePages((void *)vmx_on_region, 1);
1101     }
1102
1103     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);    
1104
1105     {
1106         struct vmx_sec_proc_ctrls sec_proc_ctrls;
1107         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
1108         
1109         if (sec_proc_ctrls.enable_ept == 0) {
1110             V3_Print("VMX EPT (Nested) Paging not supported\n");
1111             v3_cpu_types[cpu_id] = V3_VMX_CPU;
1112         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
1113             V3_Print("VMX EPT (Nested) Paging supported\n");
1114             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
1115         } else {
1116             V3_Print("VMX EPT (Nested) Paging + Unrestricted guest supported\n");
1117             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
1118         }
1119     }
1120 }
1121
1122
1123 void v3_deinit_vmx_cpu(int cpu_id) {
1124     extern v3_cpu_arch_t v3_cpu_types[];
1125     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1126
1127     if (host_vmcs_ptrs[cpu_id] != 0) {
1128         V3_Print("Disabling VMX\n");
1129
1130         if (vmx_off() != VMX_SUCCESS) {
1131             PrintError("Error executing VMXOFF\n");
1132         }
1133
1134         V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
1135
1136         host_vmcs_ptrs[cpu_id] = 0;
1137     }
1138 }