Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Cleanup based on cppcheck pass (Core)
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_barrier.h>
36 #include <palacios/vmm_timeout.h>
37 #include <palacios/vmm_debug.h>
38
39 #ifdef V3_CONFIG_CHECKPOINT
40 #include <palacios/vmm_checkpoint.h>
41 #endif
42
43 #include <palacios/vmx_ept.h>
44 #include <palacios/vmx_assist.h>
45 #include <palacios/vmx_hw_info.h>
46
47 #ifdef V3_CONFIG_MEM_TRACK
48 #include <palacios/vmm_mem_track.h>
49 #endif 
50
51 #ifndef V3_CONFIG_DEBUG_VMX
52 #undef PrintDebug
53 #define PrintDebug(fmt, args...)
54 #endif
55
56
57 /* These fields contain the hardware feature sets supported by the local CPU */
58 static struct vmx_hw_info hw_info;
59
60 extern v3_cpu_arch_t v3_mach_type;
61
62 static addr_t host_vmcs_ptrs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
63
64 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
65 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
66
67 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
68     int ret = 0;
69
70     ret = vmcs_write(field, val);
71
72     if (ret != VMX_SUCCESS) {
73         PrintError(VM_NONE, VCORE_NONE, "VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
74         return 1;
75     }
76
77
78     
79
80     return 0;
81 }
82
83 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
84     int ret = 0;
85
86     ret = vmcs_read(field, val);
87
88     if (ret != VMX_SUCCESS) {
89         PrintError(VM_NONE, VCORE_NONE, "VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
90     }
91
92     return ret;
93 }
94
95
96
97
98 static addr_t allocate_vmcs() {
99     void *temp;
100     struct vmcs_data * vmcs_page = NULL;
101
102     PrintDebug(VM_NONE, VCORE_NONE, "Allocating page\n");
103
104     temp = V3_AllocPages(1); // need not be shadow-safe, not exposed to guest
105     if (!temp) { 
106         PrintError(VM_NONE, VCORE_NONE, "Cannot allocate VMCS\n");
107         return -1;
108     }
109     vmcs_page = (struct vmcs_data *)V3_VAddr(temp);
110     memset(vmcs_page, 0, 4096);
111
112     vmcs_page->revision = hw_info.basic_info.revision;
113     PrintDebug(VM_NONE, VCORE_NONE, "VMX Revision: 0x%x\n", vmcs_page->revision);
114
115     return (addr_t)V3_PAddr((void *)vmcs_page);
116 }
117
118
119 #if 0
120 static int debug_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * src, void * priv_data) {
121     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
122     V3_Print(core->vm_info, core, "\n\nEFER READ (val = %p)\n", (void *)efer->value);
123     
124     v3_print_guest_state(core);
125     v3_print_vmcs();
126
127
128     src->value = efer->value;
129     return 0;
130 }
131
132 static int debug_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
133     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
134     V3_Print(core->vm_info, core, "\n\nEFER WRITE (old_val = %p) (new_val = %p)\n", (void *)efer->value, (void *)src.value);
135     
136     v3_print_guest_state(core);
137     v3_print_vmcs();
138
139     efer->value = src.value;
140
141     return 0;
142 }
143 #endif
144
145
146 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
147     int vmx_ret = 0;
148
149     /* Get Available features */
150     struct vmx_pin_ctrls avail_pin_ctrls;
151     avail_pin_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.pin_ctrls));
152     /* ** */
153
154
155     // disable global interrupts for vm state initialization
156     v3_disable_ints();
157
158     PrintDebug(core->vm_info, core, "Loading VMCS\n");
159     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
160     vmx_state->state = VMX_UNLAUNCHED;
161
162     if (vmx_ret != VMX_SUCCESS) {
163         PrintError(core->vm_info, core, "VMPTRLD failed\n");
164         return -1;
165     }
166
167
168     /*** Setup default state from HW ***/
169
170     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
171     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
172     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
173     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
174     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
175
176     /* Print Control MSRs */
177     V3_Print(core->vm_info, core, "CR0 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr0.req_val, (void *)(addr_t)hw_info.cr0.req_mask);
178     V3_Print(core->vm_info, core, "CR4 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr4.req_val, (void *)(addr_t)hw_info.cr4.req_mask);
179
180
181
182     /******* Setup Host State **********/
183
184     /* Cache GDTR, IDTR, and TR in host struct */
185
186
187     /********** Setup VMX Control Fields ***********/
188
189     /* Add external interrupts, NMI exiting, and virtual NMI */
190     vmx_state->pin_ctrls.nmi_exit = 1;
191     vmx_state->pin_ctrls.virt_nmi = 1;
192     vmx_state->pin_ctrls.ext_int_exit = 1;
193
194
195
196     /* We enable the preemption timer by default to measure accurate guest time */
197     if (avail_pin_ctrls.active_preempt_timer) {
198         V3_Print(core->vm_info, core, "VMX Preemption Timer is available\n");
199         vmx_state->pin_ctrls.active_preempt_timer = 1;
200         vmx_state->exit_ctrls.save_preempt_timer = 1;
201     }
202
203     // we want it to use this when halting
204     vmx_state->pri_proc_ctrls.hlt_exit = 1;
205
206     // cpuid tells it that it does not have these instructions
207     vmx_state->pri_proc_ctrls.monitor_exit = 1;
208     vmx_state->pri_proc_ctrls.mwait_exit = 1;
209
210     // we don't need to handle a pause, although this is where
211     // we could pull out of a spin lock acquire or schedule to find its partner
212     vmx_state->pri_proc_ctrls.pause_exit = 0;
213
214     vmx_state->pri_proc_ctrls.tsc_offset = 1;
215 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
216     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
217 #endif
218
219     /* Setup IO map */
220     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
221     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
222     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
223             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
224
225
226     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
227     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
228
229
230
231 #ifdef __V3_64BIT__
232     // Ensure host runs in 64-bit mode at each VM EXIT
233     vmx_state->exit_ctrls.host_64_on = 1;
234 #endif
235
236
237
238     // Restore host's EFER register on each VM EXIT
239     vmx_state->exit_ctrls.ld_efer = 1;
240
241     // Save/restore guest's EFER register to/from VMCS on VM EXIT/ENTRY
242     vmx_state->exit_ctrls.save_efer = 1;
243     vmx_state->entry_ctrls.ld_efer  = 1;
244
245     vmx_state->exit_ctrls.save_pat = 1;
246     vmx_state->exit_ctrls.ld_pat = 1;
247     vmx_state->entry_ctrls.ld_pat = 1;
248
249     /* Temporary GPF trap */
250     //  vmx_state->excp_bmap.gp = 1;
251
252     // Setup Guests initial PAT field
253     vmx_ret |= check_vmcs_write(VMCS_GUEST_PAT, 0x0007040600070406LL);
254
255     // Capture CR8 mods so that we can keep the apic_tpr correct
256     vmx_state->pri_proc_ctrls.cr8_ld_exit = 1;
257     vmx_state->pri_proc_ctrls.cr8_str_exit = 1;
258
259
260     /* Setup paging */
261     if (core->shdw_pg_mode == SHADOW_PAGING) {
262         PrintDebug(core->vm_info, core, "Creating initial shadow page table\n");
263
264         if (v3_init_passthrough_pts(core) == -1) {
265             PrintError(core->vm_info, core, "Could not initialize passthrough page tables\n");
266             return -1;
267         }
268         
269 #define CR0_PE 0x00000001
270 #define CR0_PG 0x80000000
271 #define CR0_WP 0x00010000 // To ensure mem hooks work
272 #define CR0_NE 0x00000020
273         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
274
275
276         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
277         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE );
278
279         v3_activate_passthrough_pt(core);
280
281         // vmx_state->pinbased_ctrls |= NMI_EXIT;
282
283         /* Add CR exits */
284         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
285         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
286
287         // Note that we intercept cr4.pae writes
288         // and we have cr4 read-shadowed to the shadow pager's cr4
289
290         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
291         
292         /* Add page fault exits */
293         vmx_state->excp_bmap.pf = 1;
294
295         // Setup VMX Assist
296         v3_vmxassist_init(core, vmx_state);
297
298         // Hook all accesses to EFER register
299         v3_hook_msr(core->vm_info, EFER_MSR, 
300                     &v3_handle_efer_read,
301                     &v3_handle_efer_write, 
302                     core);
303
304     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
305                (v3_mach_type == V3_VMX_EPT_CPU)) {
306
307 #define CR0_PE 0x00000001
308 #define CR0_PG 0x80000000
309 #define CR0_WP 0x00010000 // To ensure mem hooks work
310 #define CR0_NE 0x00000020
311         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
312
313         // vmx_state->pinbased_ctrls |= NMI_EXIT;
314
315         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
316         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
317         
318         /* Disable CR exits */
319         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
320         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
321
322         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
323
324         /* Add page fault exits */
325         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
326         
327         // Setup VMX Assist
328         v3_vmxassist_init(core, vmx_state);
329
330         /* Enable EPT */
331         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
332         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
333
334
335
336         if (v3_init_nested_paging_core(core, &hw_info) == -1) {
337             PrintError(core->vm_info, core, "Error initializing EPT\n");
338             return -1;
339         }
340
341         // Hook all accesses to EFER register
342         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
343
344     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
345                (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
346         int i = 0;
347         // For now we will assume that unrestricted guest mode is assured w/ EPT
348
349
350         core->vm_regs.rsp = 0x00;
351         core->rip = 0xfff0;
352         core->vm_regs.rdx = 0x00000f00;
353         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
354         core->ctrl_regs.cr0 = 0x60010030; 
355         core->ctrl_regs.cr4 = 0x00002010; // Enable VMX and PSE flag
356         
357
358         core->segments.cs.selector = 0xf000;
359         core->segments.cs.limit = 0xffff;
360         core->segments.cs.base = 0x0000000f0000LL;
361
362         // (raw attributes = 0xf3)
363         core->segments.cs.type = 0xb;
364         core->segments.cs.system = 0x1;
365         core->segments.cs.dpl = 0x0;
366         core->segments.cs.present = 1;
367
368
369
370         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
371                                           &(core->segments.es), &(core->segments.fs), 
372                                           &(core->segments.gs), NULL};
373
374         for ( i = 0; segregs[i] != NULL; i++) {
375             struct v3_segment * seg = segregs[i];
376         
377             seg->selector = 0x0000;
378             //    seg->base = seg->selector << 4;
379             seg->base = 0x00000000;
380             seg->limit = 0xffff;
381
382
383             seg->type = 0x3;
384             seg->system = 0x1;
385             seg->dpl = 0x0;
386             seg->present = 1;
387             //    seg->granularity = 1;
388
389         }
390
391
392         core->segments.gdtr.limit = 0x0000ffff;
393         core->segments.gdtr.base = 0x0000000000000000LL;
394
395         core->segments.idtr.limit = 0x0000ffff;
396         core->segments.idtr.base = 0x0000000000000000LL;
397
398         core->segments.ldtr.selector = 0x0000;
399         core->segments.ldtr.limit = 0x0000ffff;
400         core->segments.ldtr.base = 0x0000000000000000LL;
401         core->segments.ldtr.type = 0x2;
402         core->segments.ldtr.present = 1;
403
404         core->segments.tr.selector = 0x0000;
405         core->segments.tr.limit = 0x0000ffff;
406         core->segments.tr.base = 0x0000000000000000LL;
407         core->segments.tr.type = 0xb;
408         core->segments.tr.present = 1;
409
410         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
411         core->dbg_regs.dr7 = 0x0000000000000400LL;
412
413         /* Enable EPT */
414         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
415         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
416         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
417
418
419         /* Disable shadow paging stuff */
420         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
421         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
422
423         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
424
425
426         // Cause VM_EXIT whenever the CR4.VMXE bit is set
427         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
428 #define CR0_NE 0x00000020
429 #define CR0_CD 0x40000000
430         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, CR0_NE | CR0_CD);
431         ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->ne = 1;
432         ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->cd = 0;
433
434         if (v3_init_nested_paging_core(core, &hw_info) == -1) {
435             PrintError(core->vm_info, core, "Error initializing EPT\n");
436             return -1;
437         }
438
439         // Hook all accesses to EFER register
440         //      v3_hook_msr(core->vm_info, EFER_MSR, &debug_efer_read, &debug_efer_write, core);
441         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
442     } else {
443         PrintError(core->vm_info, core, "Invalid Virtual paging mode (pg_mode=%d) (mach_type=%d)\n", core->shdw_pg_mode, v3_mach_type);
444         return -1;
445     }
446
447
448     // hook vmx msrs
449
450     // Setup SYSCALL/SYSENTER MSRs in load/store area
451     
452     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
453     {
454
455         struct vmcs_msr_save_area * msr_entries = NULL;
456         int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
457         int msr_ret = 0;
458
459         V3_Print(core->vm_info, core, "Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
460
461         if (max_msrs < 4) {
462             PrintError(core->vm_info, core, "Max MSR cache size is too small (%d)\n", max_msrs);
463             return -1;
464         }
465
466         vmx_state->msr_area_paddr = (addr_t)V3_AllocPages(1); // need not be shadow-safe, not exposed to guest
467         
468         if (vmx_state->msr_area_paddr == (addr_t)NULL) {
469             PrintError(core->vm_info, core, "could not allocate msr load/store area\n");
470             return -1;
471         }
472
473         msr_entries = (struct vmcs_msr_save_area *)V3_VAddr((void *)(vmx_state->msr_area_paddr));
474         vmx_state->msr_area = msr_entries; // cache in vmx_info
475
476         memset(msr_entries, 0, PAGE_SIZE);
477
478         msr_entries->guest_star.index = IA32_STAR_MSR;
479         msr_entries->guest_lstar.index = IA32_LSTAR_MSR;
480         msr_entries->guest_fmask.index = IA32_FMASK_MSR;
481         msr_entries->guest_kern_gs.index = IA32_KERN_GS_BASE_MSR;
482
483         msr_entries->host_star.index = IA32_STAR_MSR;
484         msr_entries->host_lstar.index = IA32_LSTAR_MSR;
485         msr_entries->host_fmask.index = IA32_FMASK_MSR;
486         msr_entries->host_kern_gs.index = IA32_KERN_GS_BASE_MSR;
487
488         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
489         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
490         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
491
492         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
493         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
494         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->host_msrs));
495
496
497         msr_ret |= v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
498         msr_ret |= v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
499         msr_ret |= v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
500         msr_ret |= v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
501
502
503         // IMPORTANT: These MSRs appear to be cached by the hardware....
504         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
505         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
506         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
507
508         msr_ret |= v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
509         msr_ret |= v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
510
511         msr_ret |= v3_hook_msr(core->vm_info, IA32_PAT_MSR, NULL, NULL, NULL);
512
513         // Not sure what to do about this... Does not appear to be an explicit hardware cache version...
514         msr_ret |= v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
515
516         if (msr_ret != 0) {
517             PrintError(core->vm_info, core, "Error configuring MSR save/restore area\n");
518             return -1;
519         }
520
521
522     }    
523
524     /* Sanity check ctrl/reg fields against hw_defaults */
525
526
527
528
529     /*** Write all the info to the VMCS ***/
530   
531     /*
532     {
533         // IS THIS NECESSARY???
534 #define DEBUGCTL_MSR 0x1d9
535         struct v3_msr tmp_msr;
536         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
537         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
538         core->dbg_regs.dr7 = 0x400;
539     }
540     */
541
542 #ifdef __V3_64BIT__
543     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
544 #else
545     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
546     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
547 #endif
548
549     if (vmx_ret != VMX_SUCCESS) { 
550         PrintError(core->vm_info, core, "Error configuring VMX\n");
551         return -1;
552     }
553
554  
555
556     if (v3_update_vmcs_ctrl_fields(core)) {
557         PrintError(core->vm_info, core, "Could not write control fields!\n");
558         return -1;
559     }
560     
561     /*
562     if (v3_update_vmcs_host_state(core)) {
563         PrintError(core->vm_info, core, "Could not write host state\n");
564         return -1;
565     }
566     */
567
568     // reenable global interrupts for vm state initialization now
569     // that the vm state is initialized. If another VM kicks us off, 
570     // it'll update our vmx state so that we know to reload ourself
571     v3_enable_ints();
572
573     return 0;
574 }
575
576
577 static void __init_vmx_vmcs(void * arg) {
578     struct guest_info * core = arg;
579     struct vmx_data * vmx_state = NULL;
580     int vmx_ret = 0;
581     
582     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
583
584     if (!vmx_state) {
585         PrintError(core->vm_info, core,  "Unable to allocate in initializing vmx vmcs\n");
586         return;
587     }
588
589     memset(vmx_state, 0, sizeof(struct vmx_data));
590
591     PrintDebug(core->vm_info, core,  "vmx_data pointer: %p\n", (void *)vmx_state);
592
593     PrintDebug(core->vm_info, core, "Allocating VMCS\n");
594     vmx_state->vmcs_ptr_phys = allocate_vmcs();
595
596     PrintDebug(core->vm_info, core, "VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
597
598     core->vmm_data = vmx_state;
599     vmx_state->state = VMX_UNLAUNCHED;
600
601     PrintDebug(core->vm_info, core, "Initializing VMCS (addr=%p)\n", core->vmm_data);
602     
603     // TODO: Fix vmcs fields so they're 32-bit
604
605     PrintDebug(core->vm_info, core, "Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
606     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
607
608     if (vmx_ret != VMX_SUCCESS) {
609         PrintError(core->vm_info, core, "VMCLEAR failed\n");
610         return; 
611     }
612
613     if (core->vm_info->vm_class == V3_PC_VM) {
614         PrintDebug(core->vm_info, core, "Initializing VMCS\n");
615         if (init_vmcs_bios(core, vmx_state) == -1) {
616             PrintError(core->vm_info, core, "Error initializing VMCS to BIOS state\n");
617             return;
618         }
619     } else {
620         PrintError(core->vm_info, core, "Invalid VM Class\n");
621         return;
622     }
623
624     PrintDebug(core->vm_info, core, "Serializing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
625     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
626
627     if (vmx_ret != VMX_SUCCESS) { 
628         PrintError(core->vm_info,core,"VMCS Clear failed\n");
629         return;
630     }
631
632     core->core_run_state = CORE_STOPPED;
633     return;
634 }
635
636
637
638 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
639     extern v3_cpu_arch_t v3_cpu_types[];
640
641     if (v3_cpu_types[V3_Get_CPU()] == V3_INVALID_CPU) {
642         int i = 0;
643
644         for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
645             if (v3_cpu_types[i] != V3_INVALID_CPU) {
646                 break;
647             }
648         }
649
650         if (i == V3_CONFIG_MAX_CPUS) {
651             PrintError(core->vm_info, core, "Could not find VALID CPU for VMX guest initialization\n");
652             return -1;
653         }
654
655         V3_Call_On_CPU(i, __init_vmx_vmcs, core);
656
657     } else {
658         __init_vmx_vmcs(core);
659     }
660
661     if (core->core_run_state != CORE_STOPPED) {
662         PrintError(core->vm_info, core, "Error initializing VMX Core\n");
663         return -1;
664     }
665
666     return 0;
667 }
668
669
670 int v3_deinit_vmx_vmcs(struct guest_info * core) {
671     struct vmx_data * vmx_state = core->vmm_data;
672
673     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
674     V3_FreePages(V3_PAddr(vmx_state->msr_area), 1);
675
676     V3_Free(vmx_state);
677
678     return 0;
679 }
680
681
682
683 #ifdef V3_CONFIG_CHECKPOINT
684 /* 
685  * JRL: This is broken
686  */
687 int v3_vmx_save_core(struct guest_info * core, void * ctx){
688   struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
689   
690   // note that the vmcs pointer is an HPA, but we need an HVA
691   if (v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE_4KB, 
692                     V3_VAddr((void*) (vmx_info->vmcs_ptr_phys)))) {
693     PrintError(core->vm_info, core, "Could not save vmcs data for VMX\n");
694     return -1;
695   }
696   
697   return 0;
698 }
699
700 int v3_vmx_load_core(struct guest_info * core, void * ctx){
701   struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
702   struct cr0_32 * shadow_cr0;
703   addr_t vmcs_page_paddr;  //HPA
704   
705   vmcs_page_paddr = (addr_t) V3_AllocPages(1); // need not be shadow-safe, not exposed to guest
706   
707   if (!vmcs_page_paddr) { 
708     PrintError(core->vm_info, core, "Could not allocate space for a vmcs in VMX\n");
709     return -1;
710   }
711   
712   if (v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, 
713                     V3_VAddr((void *)vmcs_page_paddr)) == -1) { 
714     PrintError(core->vm_info, core, "Could not load vmcs data for VMX\n");
715     V3_FreePages((void*)vmcs_page_paddr,1);
716     return -1;
717   }
718
719   vmcs_clear(vmx_info->vmcs_ptr_phys);
720   
721   // Probably need to delete the old one... 
722   V3_FreePages((void*)(vmx_info->vmcs_ptr_phys),1);
723   
724   vmcs_load(vmcs_page_paddr);
725   
726   v3_vmx_save_vmcs(core);
727
728   shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
729
730
731   /* Get the CPU mode to set the guest_ia32e entry ctrl */
732   
733   if (core->shdw_pg_mode == SHADOW_PAGING) {
734     if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
735       if (v3_activate_shadow_pt(core) == -1) {
736         PrintError(core->vm_info, core, "Failed to activate shadow page tables\n");
737         return -1;
738       }
739     } else {
740       if (v3_activate_passthrough_pt(core) == -1) {
741         PrintError(core->vm_info, core, "Failed to activate passthrough page tables\n");
742         return -1;
743       }
744     }
745   }
746   
747   return 0;
748 }
749 #endif
750
751
752 void v3_flush_vmx_vm_core(struct guest_info * core) {
753     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
754     vmcs_clear(vmx_info->vmcs_ptr_phys);
755     vmx_info->state = VMX_UNLAUNCHED;
756 }
757
758
759
760 static int update_irq_exit_state(struct guest_info * info) {
761     struct vmx_exit_idt_vec_info idt_vec_info;
762
763     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
764
765     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
766 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
767         V3_Print(info->vm_info, info, "Calling v3_injecting_intr\n");
768 #endif
769         info->intr_core_state.irq_started = 0;
770         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
771     }
772
773     return 0;
774 }
775
776 static int update_irq_entry_state(struct guest_info * info) {
777     struct vmx_exit_idt_vec_info idt_vec_info;
778     struct vmcs_interrupt_state intr_core_state;
779     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
780
781     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
782     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
783
784     /* Check for pending exceptions to inject */
785     if (v3_excp_pending(info)) {
786         struct vmx_entry_int_info int_info;
787         int_info.value = 0;
788
789         // In VMX, almost every exception is hardware
790         // Software exceptions are pretty much only for breakpoint or overflow
791         int_info.type = 3;
792         int_info.vector = v3_get_excp_number(info);
793
794         if (info->excp_state.excp_error_code_valid) {
795             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
796             int_info.error_code = 1;
797
798 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
799             V3_Print(info->vm_info, info, "Injecting exception %d with error code %x\n", 
800                     int_info.vector, info->excp_state.excp_error_code);
801 #endif
802         }
803
804         int_info.valid = 1;
805 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
806         V3_Print(info->vm_info, info, "Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
807 #endif
808         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
809
810         v3_injecting_excp(info, int_info.vector);
811
812     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
813                (intr_core_state.val == 0)) {
814        
815         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
816
817 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
818             V3_Print(info->vm_info, info, "IRQ pending from previous injection\n");
819 #endif
820
821             // Copy the IDT vectoring info over to reinject the old interrupt
822             if (idt_vec_info.error_code == 1) {
823                 uint32_t err_code = 0;
824
825                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
826                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
827             }
828
829             idt_vec_info.undef = 0;
830             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
831
832         } else {
833             struct vmx_entry_int_info ent_int;
834             ent_int.value = 0;
835
836             switch (v3_intr_pending(info)) {
837                 case V3_EXTERNAL_IRQ: {
838                   
839                     int irq = v3_get_intr(info); 
840
841                     if (irq<0) {
842                       break;
843                     }
844
845                     info->intr_core_state.irq_vector = irq; 
846                     ent_int.vector = info->intr_core_state.irq_vector;
847                     ent_int.type = 0;
848                     ent_int.error_code = 0;
849                     ent_int.valid = 1;
850
851 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
852                     V3_Print(info->vm_info, info, "Injecting Interrupt %d at exit %u(EIP=%p)\n", 
853                                info->intr_core_state.irq_vector, 
854                                (uint32_t)info->num_exits, 
855                                (void *)(addr_t)info->rip);
856 #endif
857
858                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
859                     info->intr_core_state.irq_started = 1;
860
861                     break;
862                 }
863                 case V3_NMI:
864                     PrintDebug(info->vm_info, info, "Injecting NMI\n");
865
866                     ent_int.type = 2;
867                     ent_int.vector = 2;
868                     ent_int.valid = 1;
869                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
870
871                     break;
872                 case V3_SOFTWARE_INTR:
873                     PrintDebug(info->vm_info, info, "Injecting software interrupt\n");
874                     ent_int.type = 4;
875
876                     ent_int.valid = 1;
877                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
878
879                     break;
880                 case V3_VIRTUAL_IRQ:
881                     // Not sure what to do here, Intel doesn't have virtual IRQs
882                     // May be the same as external interrupts/IRQs
883
884                     break;
885                 case V3_INVALID_INTR:
886                 default:
887                     break;
888             }
889         }
890     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
891         // Enable INTR window exiting so we know when IF=1
892         uint32_t instr_len;
893
894         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
895
896 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
897         V3_Print(info->vm_info, info, "Enabling Interrupt-Window exiting: %d\n", instr_len);
898 #endif
899
900         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
901         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
902     }
903
904
905     return 0;
906 }
907
908
909
910 static struct vmx_exit_info exit_log[10];
911 static uint64_t rip_log[10];
912
913
914
915 static void print_exit_log(struct guest_info * info) {
916     int cnt = info->num_exits % 10;
917     int i = 0;
918     
919
920     V3_Print(info->vm_info, info, "\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
921
922     for (i = 0; i < 10; i++) {
923         struct vmx_exit_info * tmp = &exit_log[cnt];
924
925         V3_Print(info->vm_info, info, "%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
926         V3_Print(info->vm_info, info, "\texit_qual = %p\n", (void *)tmp->exit_qual);
927         V3_Print(info->vm_info, info, "\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
928         V3_Print(info->vm_info, info, "\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
929         V3_Print(info->vm_info, info, "\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
930         V3_Print(info->vm_info, info, "\tguest_linear_addr= %p\n", (void *)(addr_t)tmp->guest_linear_addr);
931         V3_Print(info->vm_info, info, "\tRIP = %p\n", (void *)rip_log[cnt]);
932
933
934         cnt--;
935
936         if (cnt == -1) {
937             cnt = 9;
938         }
939
940     }
941
942 }
943
944 int 
945 v3_vmx_config_tsc_virtualization(struct guest_info * info) {
946     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
947
948     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
949         if  (!vmx_info->pri_proc_ctrls.rdtsc_exit) {
950             vmx_info->pri_proc_ctrls.rdtsc_exit = 1;
951             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
952         }
953     } else {
954         sint64_t tsc_offset;
955         uint32_t tsc_offset_low, tsc_offset_high;
956
957         if  (vmx_info->pri_proc_ctrls.rdtsc_exit) {
958             vmx_info->pri_proc_ctrls.rdtsc_exit = 0;
959             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
960         }
961
962         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
963             tsc_offset = 0;
964         } else {
965             tsc_offset = v3_tsc_host_offset(&info->time_state);
966         }
967         tsc_offset_high = (uint32_t)(( tsc_offset >> 32) & 0xffffffff);
968         tsc_offset_low = (uint32_t)(tsc_offset & 0xffffffff);
969
970         check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
971         check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
972     }
973     return 0;
974 }
975
976 /* 
977  * CAUTION and DANGER!!! 
978  * 
979  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
980  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
981  * on its contents will cause things to break. The contents at the time of the exit WILL 
982  * change before the exit handler is executed.
983  */
984 int v3_vmx_enter(struct guest_info * info) {
985     int ret = 0;
986     struct vmx_exit_info exit_info;
987     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
988     uint64_t guest_cycles = 0;
989
990     // Conditionally yield the CPU if the timeslice has expired
991     v3_schedule(info);
992
993 #ifdef V3_CONFIG_MEM_TRACK
994     v3_mem_track_entry(info);
995 #endif 
996
997     // Update timer devices late after being in the VM so that as much 
998     // of the time in the VM is accounted for as possible. Also do it before
999     // updating IRQ entry state so that any interrupts the timers raise get 
1000     // handled on the next VM entry.
1001     v3_advance_time(info, NULL);
1002     v3_update_timers(info);
1003
1004     // disable global interrupts for vm state transition
1005     v3_disable_ints();
1006
1007     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
1008         vmcs_clear(vmx_info->vmcs_ptr_phys);
1009         vmcs_load(vmx_info->vmcs_ptr_phys);
1010         vmx_info->state = VMX_UNLAUNCHED;
1011     }
1012
1013     v3_vmx_restore_vmcs(info);
1014
1015
1016 #ifdef V3_CONFIG_SYMCALL
1017     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
1018         update_irq_entry_state(info);
1019     }
1020 #else 
1021     update_irq_entry_state(info);
1022 #endif
1023
1024     {
1025         addr_t guest_cr3;
1026         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
1027         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
1028     }
1029
1030
1031     // Perform last-minute time setup prior to entering the VM
1032     v3_vmx_config_tsc_virtualization(info);
1033
1034     if (v3_update_vmcs_host_state(info)) {
1035         v3_enable_ints();
1036         PrintError(info->vm_info, info, "Could not write host state\n");
1037         return -1;
1038     }
1039     
1040     if (vmx_info->pin_ctrls.active_preempt_timer) {
1041         /* Preemption timer is active */
1042         uint32_t preempt_window = 0xffffffff;
1043
1044         if (info->timeouts.timeout_active) {
1045             preempt_window = info->timeouts.next_timeout;
1046         }
1047         
1048         check_vmcs_write(VMCS_PREEMPT_TIMER, preempt_window);
1049     }
1050
1051     V3_FP_ENTRY_RESTORE(info);
1052
1053     {   
1054         uint64_t entry_tsc = 0;
1055         uint64_t exit_tsc = 0;
1056
1057 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1058         v3_pwrstat_telemetry_enter(info);
1059 #endif
1060
1061 #ifdef V3_CONFIG_PMU_TELEMETRY
1062         v3_pmu_telemetry_enter(info);
1063 #endif
1064
1065         if (vmx_info->state == VMX_UNLAUNCHED) {
1066             vmx_info->state = VMX_LAUNCHED;
1067             rdtscll(entry_tsc);
1068             ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
1069             rdtscll(exit_tsc);
1070
1071         } else {
1072             V3_ASSERT(info->vm_info, info,vmx_info->state != VMX_UNLAUNCHED);
1073             rdtscll(entry_tsc);
1074             ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
1075             rdtscll(exit_tsc);
1076         }
1077
1078         guest_cycles = exit_tsc - entry_tsc;    
1079
1080 #ifdef V3_CONFIG_PMU_TELEMETRY
1081         v3_pmu_telemetry_exit(info);
1082 #endif
1083
1084 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1085         v3_pwrstat_telemetry_exit(info);
1086 #endif
1087     }
1088
1089     //  PrintDebug(info->vm_info, info, "VMX Exit: ret=%d\n", ret);
1090
1091     if (ret != VMX_SUCCESS) {
1092         uint32_t error = 0;
1093         vmcs_read(VMCS_INSTR_ERR, &error);
1094
1095         v3_enable_ints();
1096
1097         PrintError(info->vm_info, info, "VMENTRY Error: %d (launch_ret = %d)\n", error, ret);
1098         return -1;
1099     }
1100
1101
1102     info->num_exits++;
1103
1104     V3_FP_EXIT_SAVE(info);
1105
1106     /* If we have the preemption time, then use it to get more accurate guest time */
1107     if (vmx_info->pin_ctrls.active_preempt_timer) {
1108         uint32_t cycles_left = 0;
1109         check_vmcs_read(VMCS_PREEMPT_TIMER, &(cycles_left));
1110
1111         if (info->timeouts.timeout_active) {
1112             guest_cycles = info->timeouts.next_timeout - cycles_left;
1113         } else {
1114             guest_cycles = 0xffffffff - cycles_left;
1115         }
1116     }
1117
1118     // Immediate exit from VM time bookkeeping
1119     v3_advance_time(info, &guest_cycles);
1120
1121     /* Update guest state */
1122     v3_vmx_save_vmcs(info);
1123
1124     // info->cpl = info->segments.cs.selector & 0x3;
1125
1126     info->mem_mode = v3_get_vm_mem_mode(info);
1127     info->cpu_mode = v3_get_vm_cpu_mode(info);
1128
1129
1130
1131     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
1132     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
1133     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
1134     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
1135     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
1136     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
1137     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
1138
1139     if (info->shdw_pg_mode == NESTED_PAGING) {
1140         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
1141     }
1142
1143     //PrintDebug(info->vm_info, info, "VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
1144
1145     exit_log[info->num_exits % 10] = exit_info;
1146     rip_log[info->num_exits % 10] = get_addr_linear(info, info->rip, &(info->segments.cs));
1147
1148 #ifdef V3_CONFIG_SYMCALL
1149     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
1150         update_irq_exit_state(info);
1151     }
1152 #else
1153     update_irq_exit_state(info);
1154 #endif
1155
1156     if (exit_info.exit_reason == VMX_EXIT_INTR_WINDOW) {
1157         // This is a special case whose only job is to inject an interrupt
1158         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
1159         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
1160         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
1161
1162 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
1163        V3_Print(info->vm_info, info, "Interrupts available again! (RIP=%llx)\n", info->rip);
1164 #endif
1165     }
1166
1167
1168     // Lastly we check for an NMI exit, and reinject if so
1169     {
1170         struct vmx_basic_exit_info * basic_info = (struct vmx_basic_exit_info *)&(exit_info.exit_reason);
1171
1172         if (basic_info->reason == VMX_EXIT_INFO_EXCEPTION_OR_NMI) {
1173             if ((uint8_t)exit_info.int_info == 2) {
1174                 asm("int $2");
1175             }
1176         }
1177     }
1178
1179     // reenable global interrupts after vm exit
1180     v3_enable_ints();
1181
1182     // Conditionally yield the CPU if the timeslice has expired
1183     v3_schedule(info);
1184     v3_advance_time(info, NULL);
1185     v3_update_timers(info);
1186
1187     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
1188         PrintError(info->vm_info, info, "Error in VMX exit handler (Exit reason=%x)\n", exit_info.exit_reason);
1189         return -1;
1190     }
1191
1192     if (info->timeouts.timeout_active) {
1193         /* Check to see if any timeouts have expired */
1194         v3_handle_timeouts(info, guest_cycles);
1195     }
1196
1197 #ifdef V3_CONFIG_MEM_TRACK
1198     v3_mem_track_exit(info);
1199 #endif 
1200
1201     return 0;
1202 }
1203
1204
1205 int v3_start_vmx_guest(struct guest_info * info) {
1206
1207     PrintDebug(info->vm_info, info, "Starting VMX core %u\n", info->vcpu_id);
1208
1209 #ifdef V3_CONFIG_MULTIBOOT
1210     if (v3_setup_multiboot_core_for_boot(info)) { 
1211         PrintError(info->vm_info, info, "Failed to setup Multiboot core...\n");
1212         return -1;
1213     }
1214 #endif
1215
1216 #ifdef V3_CONFIG_HVM
1217     if (v3_setup_hvm_hrt_core_for_boot(info)) { 
1218         PrintError(info->vm_info, info, "Failed to setup HRT core...\n");
1219         return -1;
1220     }
1221 #endif
1222     
1223     while (1) {
1224         if (info->core_run_state == CORE_STOPPED) {
1225             if (info->vcpu_id == 0) {
1226                 info->core_run_state = CORE_RUNNING;
1227             } else {
1228                 
1229                 PrintDebug(info->vm_info, info, "VMX core %u: Waiting for core initialization\n", info->vcpu_id);
1230
1231                 V3_NO_WORK(info);
1232                 
1233                 while (info->core_run_state == CORE_STOPPED) {
1234                     
1235                     if (info->vm_info->run_state == VM_STOPPED) {
1236                         // The VM was stopped before this core was initialized. 
1237                         return 0;
1238                     }
1239
1240                     V3_STILL_NO_WORK(info);
1241                     //PrintDebug(info->vm_info, info, "VMX core %u: still waiting for INIT\n",info->vcpu_id);
1242                 }
1243
1244                 V3_HAVE_WORK_AGAIN(info);
1245
1246                 PrintDebug(info->vm_info, info, "VMX core %u initialized\n", info->vcpu_id);
1247                 
1248                 // We'll be paranoid about race conditions here
1249                 v3_wait_at_barrier(info);
1250             }
1251             
1252             
1253             PrintDebug(info->vm_info, info, "VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
1254                        info->vcpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
1255                        info->segments.cs.limit, (void *)(info->rip));
1256             
1257             
1258             PrintDebug(info->vm_info, info, "VMX core %u: Launching VMX VM on logical core %u\n", info->vcpu_id, info->pcpu_id);
1259             
1260             v3_start_time(info);
1261             
1262             
1263             if (info->vm_info->run_state == VM_STOPPED) {
1264                 info->core_run_state = CORE_STOPPED;
1265                 break;
1266             }
1267         }
1268         
1269         
1270 #ifdef V3_CONFIG_HVM
1271         if (v3_handle_hvm_reset(info) > 0) {
1272             continue;
1273         }
1274 #endif
1275         
1276 #ifdef V3_CONFIG_MULTIBOOT
1277         if (v3_handle_multiboot_reset(info) > 0) {
1278             continue;
1279         }
1280 #endif
1281
1282 #ifdef V3_CONFIG_PMU_TELEMETRY
1283         v3_pmu_telemetry_start(info);
1284 #endif
1285         
1286 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1287         v3_pwrstat_telemetry_start(info);
1288 #endif
1289         
1290         
1291         if (v3_vmx_enter(info) == -1) {
1292             
1293             addr_t host_addr;
1294             addr_t linear_addr = 0;
1295             
1296             info->vm_info->run_state = VM_ERROR;
1297             
1298             V3_Print(info->vm_info, info, "VMX core %u: VMX ERROR!!\n", info->vcpu_id); 
1299             
1300             v3_print_guest_state(info);
1301             
1302             V3_Print(info->vm_info, info, "VMX core %u\n", info->vcpu_id); 
1303
1304             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1305             
1306             if (info->mem_mode == PHYSICAL_MEM) {
1307                 v3_gpa_to_hva(info, linear_addr, &host_addr);
1308             } else if (info->mem_mode == VIRTUAL_MEM) {
1309                 v3_gva_to_hva(info, linear_addr, &host_addr);
1310             }
1311             
1312             V3_Print(info->vm_info, info, "VMX core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1313             
1314             V3_Print(info->vm_info, info, "VMX core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1315             v3_dump_mem((uint8_t *)host_addr, 15);
1316             
1317             v3_print_stack(info);
1318
1319
1320             v3_print_vmcs();
1321             print_exit_log(info);
1322             return -1;
1323         }
1324
1325         v3_wait_at_barrier(info);
1326
1327
1328         if (info->vm_info->run_state == VM_STOPPED) {
1329             info->core_run_state = CORE_STOPPED;
1330             break;
1331         }
1332 /*
1333         if ((info->num_exits % 5000) == 0) {
1334             V3_Print(info->vm_info, info, "VMX Exit number %d\n", (uint32_t)info->num_exits);
1335         }
1336 */
1337
1338     }
1339
1340 #ifdef V3_CONFIG_PMU_TELEMETRY
1341     v3_pmu_telemetry_end(info);
1342 #endif
1343
1344 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1345     v3_pwrstat_telemetry_end(info);
1346 #endif
1347
1348     return 0;
1349 }
1350
1351
1352
1353
1354 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
1355 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
1356 #define CPUID_1_ECX_VTXFLAG 0x00000020
1357
1358 int v3_is_vmx_capable() {
1359     v3_msr_t feature_msr;
1360     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1361
1362     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
1363
1364     PrintDebug(VM_NONE, VCORE_NONE, "ECX: 0x%x\n", ecx);
1365
1366     if (ecx & CPUID_1_ECX_VTXFLAG) {
1367         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
1368         
1369         PrintDebug(VM_NONE, VCORE_NONE,  "MSRREGlow: 0x%.8x\n", feature_msr.lo);
1370
1371         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
1372             PrintDebug(VM_NONE, VCORE_NONE,  "VMX is locked -- enable in the BIOS\n");
1373             return 0;
1374         }
1375
1376     } else {
1377         PrintDebug(VM_NONE, VCORE_NONE,  "VMX not supported on this cpu\n");
1378         return 0;
1379     }
1380
1381     return 1;
1382 }
1383
1384
1385 int v3_reset_vmx_vm_core(struct guest_info * core, addr_t rip) {
1386     // init vmcs bios
1387     
1388     if ((core->shdw_pg_mode == NESTED_PAGING) && 
1389         (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
1390         // easy 
1391         core->rip = 0;
1392         core->segments.cs.selector = rip << 8;
1393         core->segments.cs.limit = 0xffff;
1394         core->segments.cs.base = rip << 12;
1395     } else {
1396         core->vm_regs.rdx = core->vcpu_id;
1397         core->vm_regs.rbx = rip;
1398     }
1399
1400     return 0;
1401 }
1402
1403
1404
1405 void v3_init_vmx_cpu(int cpu_id) {
1406     addr_t vmx_on_region = 0;
1407     extern v3_cpu_arch_t v3_mach_type;
1408     extern v3_cpu_arch_t v3_cpu_types[];
1409
1410     if (v3_mach_type == V3_INVALID_CPU) {
1411         if (v3_init_vmx_hw(&hw_info) == -1) {
1412             PrintError(VM_NONE, VCORE_NONE, "Could not initialize VMX hardware features on cpu %d\n", cpu_id);
1413             return;
1414         }
1415     }
1416
1417     enable_vmx();
1418
1419
1420     // Setup VMXON Region
1421     vmx_on_region = allocate_vmcs();
1422
1423
1424     if (vmx_on(vmx_on_region) == VMX_SUCCESS) {
1425         V3_Print(VM_NONE, VCORE_NONE,  "VMX Enabled\n");
1426         host_vmcs_ptrs[cpu_id] = vmx_on_region;
1427     } else {
1428         V3_Print(VM_NONE, VCORE_NONE,  "VMX already enabled\n");
1429         V3_FreePages((void *)vmx_on_region, 1);
1430     }
1431
1432     PrintDebug(VM_NONE, VCORE_NONE,  "VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);    
1433
1434     {
1435         struct vmx_sec_proc_ctrls sec_proc_ctrls;
1436         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
1437         
1438         if (sec_proc_ctrls.enable_ept == 0) {
1439             V3_Print(VM_NONE, VCORE_NONE, "VMX EPT (Nested) Paging not supported\n");
1440             v3_cpu_types[cpu_id] = V3_VMX_CPU;
1441         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
1442             V3_Print(VM_NONE, VCORE_NONE, "VMX EPT (Nested) Paging supported\n");
1443             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
1444         } else {
1445             V3_Print(VM_NONE, VCORE_NONE, "VMX EPT (Nested) Paging + Unrestricted guest supported\n");
1446             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
1447         }
1448     }
1449     
1450 }
1451
1452
1453 void v3_deinit_vmx_cpu(int cpu_id) {
1454     extern v3_cpu_arch_t v3_cpu_types[];
1455     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1456
1457     if (host_vmcs_ptrs[cpu_id] != 0) {
1458         V3_Print(VM_NONE, VCORE_NONE, "Disabling VMX\n");
1459
1460         if (vmx_off() != VMX_SUCCESS) {
1461             PrintError(VM_NONE, VCORE_NONE, "Error executing VMXOFF\n");
1462         }
1463
1464         V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
1465
1466         host_vmcs_ptrs[cpu_id] = 0;
1467     }
1468 }