Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


aba80c76a1c187dd09d1a5a03691e43f25cf773d
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_barrier.h>
36 #include <palacios/vmm_timeout.h>
37 #include <palacios/vmm_debug.h>
38
39 #ifdef V3_CONFIG_CHECKPOINT
40 #include <palacios/vmm_checkpoint.h>
41 #endif
42
43 #include <palacios/vmx_ept.h>
44 #include <palacios/vmx_assist.h>
45 #include <palacios/vmx_hw_info.h>
46
47 #ifndef V3_CONFIG_DEBUG_VMX
48 #undef PrintDebug
49 #define PrintDebug(fmt, args...)
50 #endif
51
52
53 /* These fields contain the hardware feature sets supported by the local CPU */
54 static struct vmx_hw_info hw_info;
55
56 extern v3_cpu_arch_t v3_mach_type;
57
58 static addr_t host_vmcs_ptrs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
59
60 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
61 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
62
63 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
64     int ret = 0;
65
66     ret = vmcs_write(field, val);
67
68     if (ret != VMX_SUCCESS) {
69         PrintError(VM_NONE, VCORE_NONE, "VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
70         return 1;
71     }
72
73
74     
75
76     return 0;
77 }
78
79 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
80     int ret = 0;
81
82     ret = vmcs_read(field, val);
83
84     if (ret != VMX_SUCCESS) {
85         PrintError(VM_NONE, VCORE_NONE, "VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
86     }
87
88     return ret;
89 }
90
91
92
93
94 static addr_t allocate_vmcs() {
95     void *temp;
96     struct vmcs_data * vmcs_page = NULL;
97
98     PrintDebug(VM_NONE, VCORE_NONE, "Allocating page\n");
99
100     temp = V3_AllocPages(1); // need not be shadow-safe, not exposed to guest
101     if (!temp) { 
102         PrintError(VM_NONE, VCORE_NONE, "Cannot allocate VMCS\n");
103         return -1;
104     }
105     vmcs_page = (struct vmcs_data *)V3_VAddr(temp);
106     memset(vmcs_page, 0, 4096);
107
108     vmcs_page->revision = hw_info.basic_info.revision;
109     PrintDebug(VM_NONE, VCORE_NONE, "VMX Revision: 0x%x\n", vmcs_page->revision);
110
111     return (addr_t)V3_PAddr((void *)vmcs_page);
112 }
113
114
115 #if 0
116 static int debug_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * src, void * priv_data) {
117     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
118     V3_Print(core->vm_info, core, "\n\nEFER READ (val = %p)\n", (void *)efer->value);
119     
120     v3_print_guest_state(core);
121     v3_print_vmcs();
122
123
124     src->value = efer->value;
125     return 0;
126 }
127
128 static int debug_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
129     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
130     V3_Print(core->vm_info, core, "\n\nEFER WRITE (old_val = %p) (new_val = %p)\n", (void *)efer->value, (void *)src.value);
131     
132     v3_print_guest_state(core);
133     v3_print_vmcs();
134
135     efer->value = src.value;
136
137     return 0;
138 }
139 #endif
140
141
142 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
143     int vmx_ret = 0;
144
145     /* Get Available features */
146     struct vmx_pin_ctrls avail_pin_ctrls;
147     avail_pin_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.pin_ctrls));
148     /* ** */
149
150
151     // disable global interrupts for vm state initialization
152     v3_disable_ints();
153
154     PrintDebug(core->vm_info, core, "Loading VMCS\n");
155     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
156     vmx_state->state = VMX_UNLAUNCHED;
157
158     if (vmx_ret != VMX_SUCCESS) {
159         PrintError(core->vm_info, core, "VMPTRLD failed\n");
160         return -1;
161     }
162
163
164     /*** Setup default state from HW ***/
165
166     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
167     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
168     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
169     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
170     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
171
172     /* Print Control MSRs */
173     V3_Print(core->vm_info, core, "CR0 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr0.req_val, (void *)(addr_t)hw_info.cr0.req_mask);
174     V3_Print(core->vm_info, core, "CR4 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr4.req_val, (void *)(addr_t)hw_info.cr4.req_mask);
175
176
177
178     /******* Setup Host State **********/
179
180     /* Cache GDTR, IDTR, and TR in host struct */
181
182
183     /********** Setup VMX Control Fields ***********/
184
185     /* Add external interrupts, NMI exiting, and virtual NMI */
186     vmx_state->pin_ctrls.nmi_exit = 1;
187     vmx_state->pin_ctrls.virt_nmi = 1;
188     vmx_state->pin_ctrls.ext_int_exit = 1;
189
190
191
192     /* We enable the preemption timer by default to measure accurate guest time */
193     if (avail_pin_ctrls.active_preempt_timer) {
194         V3_Print(core->vm_info, core, "VMX Preemption Timer is available\n");
195         vmx_state->pin_ctrls.active_preempt_timer = 1;
196         vmx_state->exit_ctrls.save_preempt_timer = 1;
197     }
198
199     // we want it to use this when halting
200     vmx_state->pri_proc_ctrls.hlt_exit = 1;
201
202     // cpuid tells it that it does not have these instructions
203     vmx_state->pri_proc_ctrls.monitor_exit = 1;
204     vmx_state->pri_proc_ctrls.mwait_exit = 1;
205
206     // we don't need to handle a pause, although this is where
207     // we could pull out of a spin lock acquire or schedule to find its partner
208     vmx_state->pri_proc_ctrls.pause_exit = 0;
209
210     vmx_state->pri_proc_ctrls.tsc_offset = 1;
211 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
212     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
213 #endif
214
215     /* Setup IO map */
216     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
217     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
218     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
219             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
220
221
222     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
223     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
224
225
226
227 #ifdef __V3_64BIT__
228     // Ensure host runs in 64-bit mode at each VM EXIT
229     vmx_state->exit_ctrls.host_64_on = 1;
230 #endif
231
232
233
234     // Restore host's EFER register on each VM EXIT
235     vmx_state->exit_ctrls.ld_efer = 1;
236
237     // Save/restore guest's EFER register to/from VMCS on VM EXIT/ENTRY
238     vmx_state->exit_ctrls.save_efer = 1;
239     vmx_state->entry_ctrls.ld_efer  = 1;
240
241     vmx_state->exit_ctrls.save_pat = 1;
242     vmx_state->exit_ctrls.ld_pat = 1;
243     vmx_state->entry_ctrls.ld_pat = 1;
244
245     /* Temporary GPF trap */
246     //  vmx_state->excp_bmap.gp = 1;
247
248     // Setup Guests initial PAT field
249     vmx_ret |= check_vmcs_write(VMCS_GUEST_PAT, 0x0007040600070406LL);
250
251     // Capture CR8 mods so that we can keep the apic_tpr correct
252     vmx_state->pri_proc_ctrls.cr8_ld_exit = 1;
253     vmx_state->pri_proc_ctrls.cr8_str_exit = 1;
254
255
256     /* Setup paging */
257     if (core->shdw_pg_mode == SHADOW_PAGING) {
258         PrintDebug(core->vm_info, core, "Creating initial shadow page table\n");
259
260         if (v3_init_passthrough_pts(core) == -1) {
261             PrintError(core->vm_info, core, "Could not initialize passthrough page tables\n");
262             return -1;
263         }
264         
265 #define CR0_PE 0x00000001
266 #define CR0_PG 0x80000000
267 #define CR0_WP 0x00010000 // To ensure mem hooks work
268 #define CR0_NE 0x00000020
269         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
270
271
272         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
273         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE );
274
275         v3_activate_passthrough_pt(core);
276
277         // vmx_state->pinbased_ctrls |= NMI_EXIT;
278
279         /* Add CR exits */
280         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
281         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
282
283         // Note that we intercept cr4.pae writes
284         // and we have cr4 read-shadowed to the shadow pager's cr4
285
286         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
287         
288         /* Add page fault exits */
289         vmx_state->excp_bmap.pf = 1;
290
291         // Setup VMX Assist
292         v3_vmxassist_init(core, vmx_state);
293
294         // Hook all accesses to EFER register
295         v3_hook_msr(core->vm_info, EFER_MSR, 
296                     &v3_handle_efer_read,
297                     &v3_handle_efer_write, 
298                     core);
299
300     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
301                (v3_mach_type == V3_VMX_EPT_CPU)) {
302
303 #define CR0_PE 0x00000001
304 #define CR0_PG 0x80000000
305 #define CR0_WP 0x00010000 // To ensure mem hooks work
306 #define CR0_NE 0x00000020
307         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
308
309         // vmx_state->pinbased_ctrls |= NMI_EXIT;
310
311         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
312         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
313         
314         /* Disable CR exits */
315         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
316         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
317
318         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
319
320         /* Add page fault exits */
321         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
322         
323         // Setup VMX Assist
324         v3_vmxassist_init(core, vmx_state);
325
326         /* Enable EPT */
327         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
328         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
329
330
331
332         if (v3_init_ept(core, &hw_info) == -1) {
333             PrintError(core->vm_info, core, "Error initializing EPT\n");
334             return -1;
335         }
336
337         // Hook all accesses to EFER register
338         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
339
340     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
341                (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
342         int i = 0;
343         // For now we will assume that unrestricted guest mode is assured w/ EPT
344
345
346         core->vm_regs.rsp = 0x00;
347         core->rip = 0xfff0;
348         core->vm_regs.rdx = 0x00000f00;
349         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
350         core->ctrl_regs.cr0 = 0x60010030; 
351         core->ctrl_regs.cr4 = 0x00002010; // Enable VMX and PSE flag
352         
353
354         core->segments.cs.selector = 0xf000;
355         core->segments.cs.limit = 0xffff;
356         core->segments.cs.base = 0x0000000f0000LL;
357
358         // (raw attributes = 0xf3)
359         core->segments.cs.type = 0xb;
360         core->segments.cs.system = 0x1;
361         core->segments.cs.dpl = 0x0;
362         core->segments.cs.present = 1;
363
364
365
366         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
367                                           &(core->segments.es), &(core->segments.fs), 
368                                           &(core->segments.gs), NULL};
369
370         for ( i = 0; segregs[i] != NULL; i++) {
371             struct v3_segment * seg = segregs[i];
372         
373             seg->selector = 0x0000;
374             //    seg->base = seg->selector << 4;
375             seg->base = 0x00000000;
376             seg->limit = 0xffff;
377
378
379             seg->type = 0x3;
380             seg->system = 0x1;
381             seg->dpl = 0x0;
382             seg->present = 1;
383             //    seg->granularity = 1;
384
385         }
386
387
388         core->segments.gdtr.limit = 0x0000ffff;
389         core->segments.gdtr.base = 0x0000000000000000LL;
390
391         core->segments.idtr.limit = 0x0000ffff;
392         core->segments.idtr.base = 0x0000000000000000LL;
393
394         core->segments.ldtr.selector = 0x0000;
395         core->segments.ldtr.limit = 0x0000ffff;
396         core->segments.ldtr.base = 0x0000000000000000LL;
397         core->segments.ldtr.type = 0x2;
398         core->segments.ldtr.present = 1;
399
400         core->segments.tr.selector = 0x0000;
401         core->segments.tr.limit = 0x0000ffff;
402         core->segments.tr.base = 0x0000000000000000LL;
403         core->segments.tr.type = 0xb;
404         core->segments.tr.present = 1;
405
406         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
407         core->dbg_regs.dr7 = 0x0000000000000400LL;
408
409         /* Enable EPT */
410         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
411         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
412         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
413
414
415         /* Disable shadow paging stuff */
416         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
417         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
418
419         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
420
421
422         // Cause VM_EXIT whenever the CR4.VMXE bit is set
423         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
424 #define CR0_NE 0x00000020
425 #define CR0_CD 0x40000000
426         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, CR0_NE | CR0_CD);
427         ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->ne = 1;
428         ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->cd = 0;
429
430         if (v3_init_ept(core, &hw_info) == -1) {
431             PrintError(core->vm_info, core, "Error initializing EPT\n");
432             return -1;
433         }
434
435         // Hook all accesses to EFER register
436         //      v3_hook_msr(core->vm_info, EFER_MSR, &debug_efer_read, &debug_efer_write, core);
437         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
438     } else {
439         PrintError(core->vm_info, core, "Invalid Virtual paging mode (pg_mode=%d) (mach_type=%d)\n", core->shdw_pg_mode, v3_mach_type);
440         return -1;
441     }
442
443
444     // hook vmx msrs
445
446     // Setup SYSCALL/SYSENTER MSRs in load/store area
447     
448     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
449     {
450
451         struct vmcs_msr_save_area * msr_entries = NULL;
452         int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
453         int msr_ret = 0;
454
455         V3_Print(core->vm_info, core, "Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
456
457         if (max_msrs < 4) {
458             PrintError(core->vm_info, core, "Max MSR cache size is too small (%d)\n", max_msrs);
459             return -1;
460         }
461
462         vmx_state->msr_area_paddr = (addr_t)V3_AllocPages(1); // need not be shadow-safe, not exposed to guest
463         
464         if (vmx_state->msr_area_paddr == (addr_t)NULL) {
465             PrintError(core->vm_info, core, "could not allocate msr load/store area\n");
466             return -1;
467         }
468
469         msr_entries = (struct vmcs_msr_save_area *)V3_VAddr((void *)(vmx_state->msr_area_paddr));
470         vmx_state->msr_area = msr_entries; // cache in vmx_info
471
472         memset(msr_entries, 0, PAGE_SIZE);
473
474         msr_entries->guest_star.index = IA32_STAR_MSR;
475         msr_entries->guest_lstar.index = IA32_LSTAR_MSR;
476         msr_entries->guest_fmask.index = IA32_FMASK_MSR;
477         msr_entries->guest_kern_gs.index = IA32_KERN_GS_BASE_MSR;
478
479         msr_entries->host_star.index = IA32_STAR_MSR;
480         msr_entries->host_lstar.index = IA32_LSTAR_MSR;
481         msr_entries->host_fmask.index = IA32_FMASK_MSR;
482         msr_entries->host_kern_gs.index = IA32_KERN_GS_BASE_MSR;
483
484         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
485         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
486         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
487
488         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
489         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
490         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->host_msrs));
491
492
493         msr_ret |= v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
494         msr_ret |= v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
495         msr_ret |= v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
496         msr_ret |= v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
497
498
499         // IMPORTANT: These MSRs appear to be cached by the hardware....
500         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
501         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
502         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
503
504         msr_ret |= v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
505         msr_ret |= v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
506
507         msr_ret |= v3_hook_msr(core->vm_info, IA32_PAT_MSR, NULL, NULL, NULL);
508
509         // Not sure what to do about this... Does not appear to be an explicit hardware cache version...
510         msr_ret |= v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
511
512         if (msr_ret != 0) {
513             PrintError(core->vm_info, core, "Error configuring MSR save/restore area\n");
514             return -1;
515         }
516
517
518     }    
519
520     /* Sanity check ctrl/reg fields against hw_defaults */
521
522
523
524
525     /*** Write all the info to the VMCS ***/
526   
527     /*
528     {
529         // IS THIS NECESSARY???
530 #define DEBUGCTL_MSR 0x1d9
531         struct v3_msr tmp_msr;
532         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
533         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
534         core->dbg_regs.dr7 = 0x400;
535     }
536     */
537
538 #ifdef __V3_64BIT__
539     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
540 #else
541     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
542     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
543 #endif
544
545
546
547  
548
549     if (v3_update_vmcs_ctrl_fields(core)) {
550         PrintError(core->vm_info, core, "Could not write control fields!\n");
551         return -1;
552     }
553     
554     /*
555     if (v3_update_vmcs_host_state(core)) {
556         PrintError(core->vm_info, core, "Could not write host state\n");
557         return -1;
558     }
559     */
560
561     // reenable global interrupts for vm state initialization now
562     // that the vm state is initialized. If another VM kicks us off, 
563     // it'll update our vmx state so that we know to reload ourself
564     v3_enable_ints();
565
566     return 0;
567 }
568
569
570 static void __init_vmx_vmcs(void * arg) {
571     struct guest_info * core = arg;
572     struct vmx_data * vmx_state = NULL;
573     int vmx_ret = 0;
574     
575     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
576
577     if (!vmx_state) {
578         PrintError(core->vm_info, core,  "Unable to allocate in initializing vmx vmcs\n");
579         return;
580     }
581
582     memset(vmx_state, 0, sizeof(struct vmx_data));
583
584     PrintDebug(core->vm_info, core,  "vmx_data pointer: %p\n", (void *)vmx_state);
585
586     PrintDebug(core->vm_info, core, "Allocating VMCS\n");
587     vmx_state->vmcs_ptr_phys = allocate_vmcs();
588
589     PrintDebug(core->vm_info, core, "VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
590
591     core->vmm_data = vmx_state;
592     vmx_state->state = VMX_UNLAUNCHED;
593
594     PrintDebug(core->vm_info, core, "Initializing VMCS (addr=%p)\n", core->vmm_data);
595     
596     // TODO: Fix vmcs fields so they're 32-bit
597
598     PrintDebug(core->vm_info, core, "Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
599     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
600
601     if (vmx_ret != VMX_SUCCESS) {
602         PrintError(core->vm_info, core, "VMCLEAR failed\n");
603         return; 
604     }
605
606     if (core->vm_info->vm_class == V3_PC_VM) {
607         PrintDebug(core->vm_info, core, "Initializing VMCS\n");
608         if (init_vmcs_bios(core, vmx_state) == -1) {
609             PrintError(core->vm_info, core, "Error initializing VMCS to BIOS state\n");
610             return;
611         }
612     } else {
613         PrintError(core->vm_info, core, "Invalid VM Class\n");
614         return;
615     }
616
617     PrintDebug(core->vm_info, core, "Serializing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
618     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
619
620     core->core_run_state = CORE_STOPPED;
621     return;
622 }
623
624
625
626 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
627     extern v3_cpu_arch_t v3_cpu_types[];
628
629     if (v3_cpu_types[V3_Get_CPU()] == V3_INVALID_CPU) {
630         int i = 0;
631
632         for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
633             if (v3_cpu_types[i] != V3_INVALID_CPU) {
634                 break;
635             }
636         }
637
638         if (i == V3_CONFIG_MAX_CPUS) {
639             PrintError(core->vm_info, core, "Could not find VALID CPU for VMX guest initialization\n");
640             return -1;
641         }
642
643         V3_Call_On_CPU(i, __init_vmx_vmcs, core);
644
645     } else {
646         __init_vmx_vmcs(core);
647     }
648
649     if (core->core_run_state != CORE_STOPPED) {
650         PrintError(core->vm_info, core, "Error initializing VMX Core\n");
651         return -1;
652     }
653
654     return 0;
655 }
656
657
658 int v3_deinit_vmx_vmcs(struct guest_info * core) {
659     struct vmx_data * vmx_state = core->vmm_data;
660
661     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
662     V3_FreePages(V3_PAddr(vmx_state->msr_area), 1);
663
664     V3_Free(vmx_state);
665
666     return 0;
667 }
668
669
670
671 #ifdef V3_CONFIG_CHECKPOINT
672 /* 
673  * JRL: This is broken
674  */
675 int v3_vmx_save_core(struct guest_info * core, void * ctx){
676   struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
677   
678   // note that the vmcs pointer is an HPA, but we need an HVA
679   if (v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE_4KB, 
680                     V3_VAddr((void*) (vmx_info->vmcs_ptr_phys)))) {
681     PrintError(core->vm_info, core, "Could not save vmcs data for VMX\n");
682     return -1;
683   }
684   
685   return 0;
686 }
687
688 int v3_vmx_load_core(struct guest_info * core, void * ctx){
689   struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
690   struct cr0_32 * shadow_cr0;
691   addr_t vmcs_page_paddr;  //HPA
692   
693   vmcs_page_paddr = (addr_t) V3_AllocPages(1); // need not be shadow-safe, not exposed to guest
694   
695   if (!vmcs_page_paddr) { 
696     PrintError(core->vm_info, core, "Could not allocate space for a vmcs in VMX\n");
697     return -1;
698   }
699   
700   if (v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, 
701                     V3_VAddr((void *)vmcs_page_paddr)) == -1) { 
702     PrintError(core->vm_info, core, "Could not load vmcs data for VMX\n");
703     V3_FreePages((void*)vmcs_page_paddr,1);
704     return -1;
705   }
706
707   vmcs_clear(vmx_info->vmcs_ptr_phys);
708   
709   // Probably need to delete the old one... 
710   V3_FreePages((void*)(vmx_info->vmcs_ptr_phys),1);
711   
712   vmcs_load(vmcs_page_paddr);
713   
714   v3_vmx_save_vmcs(core);
715
716   shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
717
718
719   /* Get the CPU mode to set the guest_ia32e entry ctrl */
720   
721   if (core->shdw_pg_mode == SHADOW_PAGING) {
722     if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
723       if (v3_activate_shadow_pt(core) == -1) {
724         PrintError(core->vm_info, core, "Failed to activate shadow page tables\n");
725         return -1;
726       }
727     } else {
728       if (v3_activate_passthrough_pt(core) == -1) {
729         PrintError(core->vm_info, core, "Failed to activate passthrough page tables\n");
730         return -1;
731       }
732     }
733   }
734   
735   return 0;
736 }
737 #endif
738
739
740 void v3_flush_vmx_vm_core(struct guest_info * core) {
741     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
742     vmcs_clear(vmx_info->vmcs_ptr_phys);
743     vmx_info->state = VMX_UNLAUNCHED;
744 }
745
746
747
748 static int update_irq_exit_state(struct guest_info * info) {
749     struct vmx_exit_idt_vec_info idt_vec_info;
750
751     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
752
753     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
754 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
755         V3_Print(info->vm_info, info, "Calling v3_injecting_intr\n");
756 #endif
757         info->intr_core_state.irq_started = 0;
758         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
759     }
760
761     return 0;
762 }
763
764 static int update_irq_entry_state(struct guest_info * info) {
765     struct vmx_exit_idt_vec_info idt_vec_info;
766     struct vmcs_interrupt_state intr_core_state;
767     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
768
769     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
770     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
771
772     /* Check for pending exceptions to inject */
773     if (v3_excp_pending(info)) {
774         struct vmx_entry_int_info int_info;
775         int_info.value = 0;
776
777         // In VMX, almost every exception is hardware
778         // Software exceptions are pretty much only for breakpoint or overflow
779         int_info.type = 3;
780         int_info.vector = v3_get_excp_number(info);
781
782         if (info->excp_state.excp_error_code_valid) {
783             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
784             int_info.error_code = 1;
785
786 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
787             V3_Print(info->vm_info, info, "Injecting exception %d with error code %x\n", 
788                     int_info.vector, info->excp_state.excp_error_code);
789 #endif
790         }
791
792         int_info.valid = 1;
793 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
794         V3_Print(info->vm_info, info, "Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
795 #endif
796         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
797
798         v3_injecting_excp(info, int_info.vector);
799
800     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
801                (intr_core_state.val == 0)) {
802        
803         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
804
805 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
806             V3_Print(info->vm_info, info, "IRQ pending from previous injection\n");
807 #endif
808
809             // Copy the IDT vectoring info over to reinject the old interrupt
810             if (idt_vec_info.error_code == 1) {
811                 uint32_t err_code = 0;
812
813                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
814                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
815             }
816
817             idt_vec_info.undef = 0;
818             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
819
820         } else {
821             struct vmx_entry_int_info ent_int;
822             ent_int.value = 0;
823
824             switch (v3_intr_pending(info)) {
825                 case V3_EXTERNAL_IRQ: {
826                   
827                     int irq = v3_get_intr(info); 
828
829                     if (irq<0) {
830                       break;
831                     }
832
833                     info->intr_core_state.irq_vector = irq; 
834                     ent_int.vector = info->intr_core_state.irq_vector;
835                     ent_int.type = 0;
836                     ent_int.error_code = 0;
837                     ent_int.valid = 1;
838
839 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
840                     V3_Print(info->vm_info, info, "Injecting Interrupt %d at exit %u(EIP=%p)\n", 
841                                info->intr_core_state.irq_vector, 
842                                (uint32_t)info->num_exits, 
843                                (void *)(addr_t)info->rip);
844 #endif
845
846                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
847                     info->intr_core_state.irq_started = 1;
848
849                     break;
850                 }
851                 case V3_NMI:
852                     PrintDebug(info->vm_info, info, "Injecting NMI\n");
853
854                     ent_int.type = 2;
855                     ent_int.vector = 2;
856                     ent_int.valid = 1;
857                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
858
859                     break;
860                 case V3_SOFTWARE_INTR:
861                     PrintDebug(info->vm_info, info, "Injecting software interrupt\n");
862                     ent_int.type = 4;
863
864                     ent_int.valid = 1;
865                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
866
867                     break;
868                 case V3_VIRTUAL_IRQ:
869                     // Not sure what to do here, Intel doesn't have virtual IRQs
870                     // May be the same as external interrupts/IRQs
871
872                     break;
873                 case V3_INVALID_INTR:
874                 default:
875                     break;
876             }
877         }
878     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
879         // Enable INTR window exiting so we know when IF=1
880         uint32_t instr_len;
881
882         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
883
884 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
885         V3_Print(info->vm_info, info, "Enabling Interrupt-Window exiting: %d\n", instr_len);
886 #endif
887
888         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
889         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
890     }
891
892
893     return 0;
894 }
895
896
897
898 static struct vmx_exit_info exit_log[10];
899 static uint64_t rip_log[10];
900
901
902
903 static void print_exit_log(struct guest_info * info) {
904     int cnt = info->num_exits % 10;
905     int i = 0;
906     
907
908     V3_Print(info->vm_info, info, "\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
909
910     for (i = 0; i < 10; i++) {
911         struct vmx_exit_info * tmp = &exit_log[cnt];
912
913         V3_Print(info->vm_info, info, "%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
914         V3_Print(info->vm_info, info, "\texit_qual = %p\n", (void *)tmp->exit_qual);
915         V3_Print(info->vm_info, info, "\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
916         V3_Print(info->vm_info, info, "\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
917         V3_Print(info->vm_info, info, "\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
918         V3_Print(info->vm_info, info, "\tguest_linear_addr= %p\n", (void *)(addr_t)tmp->guest_linear_addr);
919         V3_Print(info->vm_info, info, "\tRIP = %p\n", (void *)rip_log[cnt]);
920
921
922         cnt--;
923
924         if (cnt == -1) {
925             cnt = 9;
926         }
927
928     }
929
930 }
931
932 int 
933 v3_vmx_config_tsc_virtualization(struct guest_info * info) {
934     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
935
936     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
937         if  (!vmx_info->pri_proc_ctrls.rdtsc_exit) {
938             vmx_info->pri_proc_ctrls.rdtsc_exit = 1;
939             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
940         }
941     } else {
942         sint64_t tsc_offset;
943         uint32_t tsc_offset_low, tsc_offset_high;
944
945         if  (vmx_info->pri_proc_ctrls.rdtsc_exit) {
946             vmx_info->pri_proc_ctrls.rdtsc_exit = 0;
947             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
948         }
949
950         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
951             tsc_offset = 0;
952         } else {
953             tsc_offset = v3_tsc_host_offset(&info->time_state);
954         }
955         tsc_offset_high = (uint32_t)(( tsc_offset >> 32) & 0xffffffff);
956         tsc_offset_low = (uint32_t)(tsc_offset & 0xffffffff);
957
958         check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
959         check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
960     }
961     return 0;
962 }
963
964 /* 
965  * CAUTION and DANGER!!! 
966  * 
967  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
968  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
969  * on its contents will cause things to break. The contents at the time of the exit WILL 
970  * change before the exit handler is executed.
971  */
972 int v3_vmx_enter(struct guest_info * info) {
973     int ret = 0;
974     struct vmx_exit_info exit_info;
975     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
976     uint64_t guest_cycles = 0;
977
978     // Conditionally yield the CPU if the timeslice has expired
979     v3_schedule(info);
980
981     // Update timer devices late after being in the VM so that as much 
982     // of the time in the VM is accounted for as possible. Also do it before
983     // updating IRQ entry state so that any interrupts the timers raise get 
984     // handled on the next VM entry.
985     v3_advance_time(info, NULL);
986     v3_update_timers(info);
987
988     // disable global interrupts for vm state transition
989     v3_disable_ints();
990
991     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
992         vmcs_clear(vmx_info->vmcs_ptr_phys);
993         vmcs_load(vmx_info->vmcs_ptr_phys);
994         vmx_info->state = VMX_UNLAUNCHED;
995     }
996
997     v3_vmx_restore_vmcs(info);
998
999
1000 #ifdef V3_CONFIG_SYMCALL
1001     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
1002         update_irq_entry_state(info);
1003     }
1004 #else 
1005     update_irq_entry_state(info);
1006 #endif
1007
1008     {
1009         addr_t guest_cr3;
1010         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
1011         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
1012     }
1013
1014
1015     // Perform last-minute time setup prior to entering the VM
1016     v3_vmx_config_tsc_virtualization(info);
1017
1018     if (v3_update_vmcs_host_state(info)) {
1019         v3_enable_ints();
1020         PrintError(info->vm_info, info, "Could not write host state\n");
1021         return -1;
1022     }
1023     
1024     if (vmx_info->pin_ctrls.active_preempt_timer) {
1025         /* Preemption timer is active */
1026         uint32_t preempt_window = 0xffffffff;
1027
1028         if (info->timeouts.timeout_active) {
1029             preempt_window = info->timeouts.next_timeout;
1030         }
1031         
1032         check_vmcs_write(VMCS_PREEMPT_TIMER, preempt_window);
1033     }
1034
1035     V3_FP_ENTRY_RESTORE(info);
1036
1037     {   
1038         uint64_t entry_tsc = 0;
1039         uint64_t exit_tsc = 0;
1040
1041 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1042         v3_pwrstat_telemetry_enter(info);
1043 #endif
1044
1045 #ifdef V3_CONFIG_PMU_TELEMETRY
1046         v3_pmu_telemetry_enter(info);
1047 #endif
1048
1049         if (vmx_info->state == VMX_UNLAUNCHED) {
1050             vmx_info->state = VMX_LAUNCHED;
1051             rdtscll(entry_tsc);
1052             ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
1053             rdtscll(exit_tsc);
1054
1055         } else {
1056             V3_ASSERT(info->vm_info, info,vmx_info->state != VMX_UNLAUNCHED);
1057             rdtscll(entry_tsc);
1058             ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
1059             rdtscll(exit_tsc);
1060         }
1061
1062         guest_cycles = exit_tsc - entry_tsc;    
1063
1064 #ifdef V3_CONFIG_PMU_TELEMETRY
1065         v3_pmu_telemetry_exit(info);
1066 #endif
1067
1068 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1069         v3_pwrstat_telemetry_exit(info);
1070 #endif
1071     }
1072
1073     //  PrintDebug(info->vm_info, info, "VMX Exit: ret=%d\n", ret);
1074
1075     if (ret != VMX_SUCCESS) {
1076         uint32_t error = 0;
1077         vmcs_read(VMCS_INSTR_ERR, &error);
1078
1079         v3_enable_ints();
1080
1081         PrintError(info->vm_info, info, "VMENTRY Error: %d (launch_ret = %d)\n", error, ret);
1082         return -1;
1083     }
1084
1085
1086     info->num_exits++;
1087
1088     V3_FP_EXIT_SAVE(info);
1089
1090     /* If we have the preemption time, then use it to get more accurate guest time */
1091     if (vmx_info->pin_ctrls.active_preempt_timer) {
1092         uint32_t cycles_left = 0;
1093         check_vmcs_read(VMCS_PREEMPT_TIMER, &(cycles_left));
1094
1095         if (info->timeouts.timeout_active) {
1096             guest_cycles = info->timeouts.next_timeout - cycles_left;
1097         } else {
1098             guest_cycles = 0xffffffff - cycles_left;
1099         }
1100     }
1101
1102     // Immediate exit from VM time bookkeeping
1103     v3_advance_time(info, &guest_cycles);
1104
1105     /* Update guest state */
1106     v3_vmx_save_vmcs(info);
1107
1108     // info->cpl = info->segments.cs.selector & 0x3;
1109
1110     info->mem_mode = v3_get_vm_mem_mode(info);
1111     info->cpu_mode = v3_get_vm_cpu_mode(info);
1112
1113
1114
1115     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
1116     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
1117     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
1118     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
1119     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
1120     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
1121     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
1122
1123     if (info->shdw_pg_mode == NESTED_PAGING) {
1124         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
1125     }
1126
1127     //PrintDebug(info->vm_info, info, "VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
1128
1129     exit_log[info->num_exits % 10] = exit_info;
1130     rip_log[info->num_exits % 10] = get_addr_linear(info, info->rip, &(info->segments.cs));
1131
1132 #ifdef V3_CONFIG_SYMCALL
1133     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
1134         update_irq_exit_state(info);
1135     }
1136 #else
1137     update_irq_exit_state(info);
1138 #endif
1139
1140     if (exit_info.exit_reason == VMX_EXIT_INTR_WINDOW) {
1141         // This is a special case whose only job is to inject an interrupt
1142         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
1143         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
1144         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
1145
1146 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
1147        V3_Print(info->vm_info, info, "Interrupts available again! (RIP=%llx)\n", info->rip);
1148 #endif
1149     }
1150
1151
1152     // Lastly we check for an NMI exit, and reinject if so
1153     {
1154         struct vmx_basic_exit_info * basic_info = (struct vmx_basic_exit_info *)&(exit_info.exit_reason);
1155
1156         if (basic_info->reason == VMX_EXIT_INFO_EXCEPTION_OR_NMI) {
1157             if ((uint8_t)exit_info.int_info == 2) {
1158                 asm("int $2");
1159             }
1160         }
1161     }
1162
1163     // reenable global interrupts after vm exit
1164     v3_enable_ints();
1165
1166     // Conditionally yield the CPU if the timeslice has expired
1167     v3_schedule(info);
1168     v3_advance_time(info, NULL);
1169     v3_update_timers(info);
1170
1171     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
1172         PrintError(info->vm_info, info, "Error in VMX exit handler (Exit reason=%x)\n", exit_info.exit_reason);
1173         return -1;
1174     }
1175
1176     if (info->timeouts.timeout_active) {
1177         /* Check to see if any timeouts have expired */
1178         v3_handle_timeouts(info, guest_cycles);
1179     }
1180
1181     return 0;
1182 }
1183
1184
1185 int v3_start_vmx_guest(struct guest_info * info) {
1186
1187     PrintDebug(info->vm_info, info, "Starting VMX core %u\n", info->vcpu_id);
1188
1189     while (1) {
1190         if (info->core_run_state == CORE_STOPPED) {
1191             if (info->vcpu_id == 0) {
1192                 info->core_run_state = CORE_RUNNING;
1193             } else {
1194                 
1195                 PrintDebug(info->vm_info, info, "VMX core %u: Waiting for core initialization\n", info->vcpu_id);
1196
1197                 V3_NO_WORK(info);
1198                 
1199                 while (info->core_run_state == CORE_STOPPED) {
1200                     
1201                     if (info->vm_info->run_state == VM_STOPPED) {
1202                         // The VM was stopped before this core was initialized. 
1203                         return 0;
1204                     }
1205
1206                     V3_STILL_NO_WORK(info);
1207                     //PrintDebug(info->vm_info, info, "VMX core %u: still waiting for INIT\n",info->vcpu_id);
1208                 }
1209
1210                 V3_HAVE_WORK_AGAIN(info);
1211
1212                 PrintDebug(info->vm_info, info, "VMX core %u initialized\n", info->vcpu_id);
1213                 
1214                 // We'll be paranoid about race conditions here
1215                 v3_wait_at_barrier(info);
1216             }
1217             
1218             
1219             PrintDebug(info->vm_info, info, "VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
1220                        info->vcpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
1221                        info->segments.cs.limit, (void *)(info->rip));
1222             
1223             
1224             PrintDebug(info->vm_info, info, "VMX core %u: Launching VMX VM on logical core %u\n", info->vcpu_id, info->pcpu_id);
1225             
1226             v3_start_time(info);
1227             
1228             
1229             if (info->vm_info->run_state == VM_STOPPED) {
1230                 info->core_run_state = CORE_STOPPED;
1231                 break;
1232             }
1233         }
1234         
1235         
1236 #ifdef V3_CONFIG_PMU_TELEMETRY
1237         v3_pmu_telemetry_start(info);
1238 #endif
1239         
1240 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1241         v3_pwrstat_telemetry_start(info);
1242 #endif
1243         
1244         
1245         if (v3_vmx_enter(info) == -1) {
1246             
1247             addr_t host_addr;
1248             addr_t linear_addr = 0;
1249             
1250             info->vm_info->run_state = VM_ERROR;
1251             
1252             V3_Print(info->vm_info, info, "VMX core %u: VMX ERROR!!\n", info->vcpu_id); 
1253             
1254             v3_print_guest_state(info);
1255             
1256             V3_Print(info->vm_info, info, "VMX core %u\n", info->vcpu_id); 
1257
1258             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1259             
1260             if (info->mem_mode == PHYSICAL_MEM) {
1261                 v3_gpa_to_hva(info, linear_addr, &host_addr);
1262             } else if (info->mem_mode == VIRTUAL_MEM) {
1263                 v3_gva_to_hva(info, linear_addr, &host_addr);
1264             }
1265             
1266             V3_Print(info->vm_info, info, "VMX core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1267             
1268             V3_Print(info->vm_info, info, "VMX core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1269             v3_dump_mem((uint8_t *)host_addr, 15);
1270             
1271             v3_print_stack(info);
1272
1273
1274             v3_print_vmcs();
1275             print_exit_log(info);
1276             return -1;
1277         }
1278
1279         v3_wait_at_barrier(info);
1280
1281
1282         if (info->vm_info->run_state == VM_STOPPED) {
1283             info->core_run_state = CORE_STOPPED;
1284             break;
1285         }
1286 /*
1287         if ((info->num_exits % 5000) == 0) {
1288             V3_Print(info->vm_info, info, "VMX Exit number %d\n", (uint32_t)info->num_exits);
1289         }
1290 */
1291
1292     }
1293
1294 #ifdef V3_CONFIG_PMU_TELEMETRY
1295     v3_pmu_telemetry_end(info);
1296 #endif
1297
1298 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1299     v3_pwrstat_telemetry_end(info);
1300 #endif
1301
1302     return 0;
1303 }
1304
1305
1306
1307
1308 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
1309 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
1310 #define CPUID_1_ECX_VTXFLAG 0x00000020
1311
1312 int v3_is_vmx_capable() {
1313     v3_msr_t feature_msr;
1314     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1315
1316     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
1317
1318     PrintDebug(VM_NONE, VCORE_NONE, "ECX: 0x%x\n", ecx);
1319
1320     if (ecx & CPUID_1_ECX_VTXFLAG) {
1321         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
1322         
1323         PrintDebug(VM_NONE, VCORE_NONE,  "MSRREGlow: 0x%.8x\n", feature_msr.lo);
1324
1325         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
1326             PrintDebug(VM_NONE, VCORE_NONE,  "VMX is locked -- enable in the BIOS\n");
1327             return 0;
1328         }
1329
1330     } else {
1331         PrintDebug(VM_NONE, VCORE_NONE,  "VMX not supported on this cpu\n");
1332         return 0;
1333     }
1334
1335     return 1;
1336 }
1337
1338
1339 int v3_reset_vmx_vm_core(struct guest_info * core, addr_t rip) {
1340     // init vmcs bios
1341     
1342     if ((core->shdw_pg_mode == NESTED_PAGING) && 
1343         (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
1344         // easy 
1345         core->rip = 0;
1346         core->segments.cs.selector = rip << 8;
1347         core->segments.cs.limit = 0xffff;
1348         core->segments.cs.base = rip << 12;
1349     } else {
1350         core->vm_regs.rdx = core->vcpu_id;
1351         core->vm_regs.rbx = rip;
1352     }
1353
1354     return 0;
1355 }
1356
1357
1358
1359 void v3_init_vmx_cpu(int cpu_id) {
1360     addr_t vmx_on_region = 0;
1361     extern v3_cpu_arch_t v3_mach_type;
1362     extern v3_cpu_arch_t v3_cpu_types[];
1363
1364     if (v3_mach_type == V3_INVALID_CPU) {
1365         if (v3_init_vmx_hw(&hw_info) == -1) {
1366             PrintError(VM_NONE, VCORE_NONE, "Could not initialize VMX hardware features on cpu %d\n", cpu_id);
1367             return;
1368         }
1369     }
1370
1371     enable_vmx();
1372
1373
1374     // Setup VMXON Region
1375     vmx_on_region = allocate_vmcs();
1376
1377
1378     if (vmx_on(vmx_on_region) == VMX_SUCCESS) {
1379         V3_Print(VM_NONE, VCORE_NONE,  "VMX Enabled\n");
1380         host_vmcs_ptrs[cpu_id] = vmx_on_region;
1381     } else {
1382         V3_Print(VM_NONE, VCORE_NONE,  "VMX already enabled\n");
1383         V3_FreePages((void *)vmx_on_region, 1);
1384     }
1385
1386     PrintDebug(VM_NONE, VCORE_NONE,  "VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);    
1387
1388     {
1389         struct vmx_sec_proc_ctrls sec_proc_ctrls;
1390         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
1391         
1392         if (sec_proc_ctrls.enable_ept == 0) {
1393             V3_Print(VM_NONE, VCORE_NONE, "VMX EPT (Nested) Paging not supported\n");
1394             v3_cpu_types[cpu_id] = V3_VMX_CPU;
1395         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
1396             V3_Print(VM_NONE, VCORE_NONE, "VMX EPT (Nested) Paging supported\n");
1397             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
1398         } else {
1399             V3_Print(VM_NONE, VCORE_NONE, "VMX EPT (Nested) Paging + Unrestricted guest supported\n");
1400             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
1401         }
1402     }
1403     
1404 }
1405
1406
1407 void v3_deinit_vmx_cpu(int cpu_id) {
1408     extern v3_cpu_arch_t v3_cpu_types[];
1409     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1410
1411     if (host_vmcs_ptrs[cpu_id] != 0) {
1412         V3_Print(VM_NONE, VCORE_NONE, "Disabling VMX\n");
1413
1414         if (vmx_off() != VMX_SUCCESS) {
1415             PrintError(VM_NONE, VCORE_NONE, "Error executing VMXOFF\n");
1416         }
1417
1418         V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
1419
1420         host_vmcs_ptrs[cpu_id] = 0;
1421     }
1422 }