Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


correctly handle NMI exits on VMX architectures
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_barrier.h>
36 #include <palacios/vmm_timeout.h>
37 #include <palacios/vmm_debug.h>
38
39 #ifdef V3_CONFIG_CHECKPOINT
40 #include <palacios/vmm_checkpoint.h>
41 #endif
42
43 #include <palacios/vmx_ept.h>
44 #include <palacios/vmx_assist.h>
45 #include <palacios/vmx_hw_info.h>
46
47 #ifndef V3_CONFIG_DEBUG_VMX
48 #undef PrintDebug
49 #define PrintDebug(fmt, args...)
50 #endif
51
52
53 /* These fields contain the hardware feature sets supported by the local CPU */
54 static struct vmx_hw_info hw_info;
55
56 extern v3_cpu_arch_t v3_mach_type;
57
58 static addr_t host_vmcs_ptrs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
59
60 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
61 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
62
63 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
64     int ret = 0;
65
66     ret = vmcs_write(field, val);
67
68     if (ret != VMX_SUCCESS) {
69         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
70         return 1;
71     }
72
73
74     
75
76     return 0;
77 }
78
79 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
80     int ret = 0;
81
82     ret = vmcs_read(field, val);
83
84     if (ret != VMX_SUCCESS) {
85         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
86     }
87
88     return ret;
89 }
90
91
92
93
94 static addr_t allocate_vmcs() {
95     void *temp;
96     struct vmcs_data * vmcs_page = NULL;
97
98     PrintDebug("Allocating page\n");
99
100     temp = V3_AllocPages(1);
101     if (!temp) { 
102         PrintError("Cannot allocate VMCS\n");
103         return -1;
104     }
105     vmcs_page = (struct vmcs_data *)V3_VAddr(temp);
106     memset(vmcs_page, 0, 4096);
107
108     vmcs_page->revision = hw_info.basic_info.revision;
109     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
110
111     return (addr_t)V3_PAddr((void *)vmcs_page);
112 }
113
114
115 #if 0
116 static int debug_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * src, void * priv_data) {
117     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
118     V3_Print("\n\nEFER READ (val = %p)\n", (void *)efer->value);
119     
120     v3_print_guest_state(core);
121     v3_print_vmcs();
122
123
124     src->value = efer->value;
125     return 0;
126 }
127
128 static int debug_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
129     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
130     V3_Print("\n\nEFER WRITE (old_val = %p) (new_val = %p)\n", (void *)efer->value, (void *)src.value);
131     
132     v3_print_guest_state(core);
133     v3_print_vmcs();
134
135     efer->value = src.value;
136
137     return 0;
138 }
139 #endif
140
141
142 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
143     int vmx_ret = 0;
144
145     /* Get Available features */
146     struct vmx_pin_ctrls avail_pin_ctrls;
147     avail_pin_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.pin_ctrls));
148     /* ** */
149
150
151     // disable global interrupts for vm state initialization
152     v3_disable_ints();
153
154     PrintDebug("Loading VMCS\n");
155     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
156     vmx_state->state = VMX_UNLAUNCHED;
157
158     if (vmx_ret != VMX_SUCCESS) {
159         PrintError("VMPTRLD failed\n");
160         return -1;
161     }
162
163
164     /*** Setup default state from HW ***/
165
166     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
167     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
168     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
169     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
170     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
171
172     /* Print Control MSRs */
173     V3_Print("CR0 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr0.req_val, (void *)(addr_t)hw_info.cr0.req_mask);
174     V3_Print("CR4 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr4.req_val, (void *)(addr_t)hw_info.cr4.req_mask);
175
176
177
178     /******* Setup Host State **********/
179
180     /* Cache GDTR, IDTR, and TR in host struct */
181
182
183     /********** Setup VMX Control Fields ***********/
184
185     /* Add external interrupts, NMI exiting, and virtual NMI */
186     vmx_state->pin_ctrls.nmi_exit = 1;
187     vmx_state->pin_ctrls.virt_nmi = 1;
188     vmx_state->pin_ctrls.ext_int_exit = 1;
189
190
191
192     /* We enable the preemption timer by default to measure accurate guest time */
193     if (avail_pin_ctrls.active_preempt_timer) {
194         V3_Print("VMX Preemption Timer is available\n");
195         vmx_state->pin_ctrls.active_preempt_timer = 1;
196         vmx_state->exit_ctrls.save_preempt_timer = 1;
197     }
198
199     // we want it to use this when halting
200     vmx_state->pri_proc_ctrls.hlt_exit = 1;
201
202     // cpuid tells it that it does not have these instructions
203     vmx_state->pri_proc_ctrls.monitor_exit = 1;
204     vmx_state->pri_proc_ctrls.mwait_exit = 1;
205
206     // we don't need to handle a pause, although this is where
207     // we could pull out of a spin lock acquire or schedule to find its partner
208     vmx_state->pri_proc_ctrls.pause_exit = 0;
209
210     vmx_state->pri_proc_ctrls.tsc_offset = 1;
211 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
212     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
213 #endif
214
215     /* Setup IO map */
216     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
217     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
218     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
219             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
220
221
222     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
223     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
224
225
226
227 #ifdef __V3_64BIT__
228     // Ensure host runs in 64-bit mode at each VM EXIT
229     vmx_state->exit_ctrls.host_64_on = 1;
230 #endif
231
232
233
234     // Restore host's EFER register on each VM EXIT
235     vmx_state->exit_ctrls.ld_efer = 1;
236
237     // Save/restore guest's EFER register to/from VMCS on VM EXIT/ENTRY
238     vmx_state->exit_ctrls.save_efer = 1;
239     vmx_state->entry_ctrls.ld_efer  = 1;
240
241     vmx_state->exit_ctrls.save_pat = 1;
242     vmx_state->exit_ctrls.ld_pat = 1;
243     vmx_state->entry_ctrls.ld_pat = 1;
244
245     /* Temporary GPF trap */
246     //  vmx_state->excp_bmap.gp = 1;
247
248     // Setup Guests initial PAT field
249     vmx_ret |= check_vmcs_write(VMCS_GUEST_PAT, 0x0007040600070406LL);
250
251     // Capture CR8 mods so that we can keep the apic_tpr correct
252     vmx_state->pri_proc_ctrls.cr8_ld_exit = 1;
253     vmx_state->pri_proc_ctrls.cr8_str_exit = 1;
254
255
256     /* Setup paging */
257     if (core->shdw_pg_mode == SHADOW_PAGING) {
258         PrintDebug("Creating initial shadow page table\n");
259
260         if (v3_init_passthrough_pts(core) == -1) {
261             PrintError("Could not initialize passthrough page tables\n");
262             return -1;
263         }
264         
265 #define CR0_PE 0x00000001
266 #define CR0_PG 0x80000000
267 #define CR0_WP 0x00010000 // To ensure mem hooks work
268 #define CR0_NE 0x00000020
269         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
270
271
272         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
273         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
274
275         core->ctrl_regs.cr3 = core->direct_map_pt;
276
277         // vmx_state->pinbased_ctrls |= NMI_EXIT;
278
279         /* Add CR exits */
280         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
281         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
282         
283         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
284         
285         /* Add page fault exits */
286         vmx_state->excp_bmap.pf = 1;
287
288         // Setup VMX Assist
289         v3_vmxassist_init(core, vmx_state);
290
291         // Hook all accesses to EFER register
292         v3_hook_msr(core->vm_info, EFER_MSR, 
293                     &v3_handle_efer_read,
294                     &v3_handle_efer_write, 
295                     core);
296
297     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
298                (v3_mach_type == V3_VMX_EPT_CPU)) {
299
300 #define CR0_PE 0x00000001
301 #define CR0_PG 0x80000000
302 #define CR0_WP 0x00010000 // To ensure mem hooks work
303 #define CR0_NE 0x00000020
304         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
305
306         // vmx_state->pinbased_ctrls |= NMI_EXIT;
307
308         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
309         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
310         
311         /* Disable CR exits */
312         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
313         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
314
315         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
316
317         /* Add page fault exits */
318         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
319         
320         // Setup VMX Assist
321         v3_vmxassist_init(core, vmx_state);
322
323         /* Enable EPT */
324         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
325         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
326
327
328
329         if (v3_init_ept(core, &hw_info) == -1) {
330             PrintError("Error initializing EPT\n");
331             return -1;
332         }
333
334         // Hook all accesses to EFER register
335         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
336
337     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
338                (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
339         int i = 0;
340         // For now we will assume that unrestricted guest mode is assured w/ EPT
341
342
343         core->vm_regs.rsp = 0x00;
344         core->rip = 0xfff0;
345         core->vm_regs.rdx = 0x00000f00;
346         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
347         core->ctrl_regs.cr0 = 0x60010030; 
348         core->ctrl_regs.cr4 = 0x00002010; // Enable VMX and PSE flag
349         
350
351         core->segments.cs.selector = 0xf000;
352         core->segments.cs.limit = 0xffff;
353         core->segments.cs.base = 0x0000000f0000LL;
354
355         // (raw attributes = 0xf3)
356         core->segments.cs.type = 0xb;
357         core->segments.cs.system = 0x1;
358         core->segments.cs.dpl = 0x0;
359         core->segments.cs.present = 1;
360
361
362
363         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
364                                           &(core->segments.es), &(core->segments.fs), 
365                                           &(core->segments.gs), NULL};
366
367         for ( i = 0; segregs[i] != NULL; i++) {
368             struct v3_segment * seg = segregs[i];
369         
370             seg->selector = 0x0000;
371             //    seg->base = seg->selector << 4;
372             seg->base = 0x00000000;
373             seg->limit = 0xffff;
374
375
376             seg->type = 0x3;
377             seg->system = 0x1;
378             seg->dpl = 0x0;
379             seg->present = 1;
380             //    seg->granularity = 1;
381
382         }
383
384
385         core->segments.gdtr.limit = 0x0000ffff;
386         core->segments.gdtr.base = 0x0000000000000000LL;
387
388         core->segments.idtr.limit = 0x0000ffff;
389         core->segments.idtr.base = 0x0000000000000000LL;
390
391         core->segments.ldtr.selector = 0x0000;
392         core->segments.ldtr.limit = 0x0000ffff;
393         core->segments.ldtr.base = 0x0000000000000000LL;
394         core->segments.ldtr.type = 0x2;
395         core->segments.ldtr.present = 1;
396
397         core->segments.tr.selector = 0x0000;
398         core->segments.tr.limit = 0x0000ffff;
399         core->segments.tr.base = 0x0000000000000000LL;
400         core->segments.tr.type = 0xb;
401         core->segments.tr.present = 1;
402
403         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
404         core->dbg_regs.dr7 = 0x0000000000000400LL;
405
406         /* Enable EPT */
407         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
408         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
409         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
410
411
412         /* Disable shadow paging stuff */
413         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
414         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
415
416         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
417
418
419         // Cause VM_EXIT whenever the CR4.VMXE bit is set
420         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
421 #define CR0_NE 0x00000020
422         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, CR0_NE);
423         ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->ne = 1;
424
425         if (v3_init_ept(core, &hw_info) == -1) {
426             PrintError("Error initializing EPT\n");
427             return -1;
428         }
429
430         // Hook all accesses to EFER register
431         //      v3_hook_msr(core->vm_info, EFER_MSR, &debug_efer_read, &debug_efer_write, core);
432         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
433     } else {
434         PrintError("Invalid Virtual paging mode (pg_mode=%d) (mach_type=%d)\n", core->shdw_pg_mode, v3_mach_type);
435         return -1;
436     }
437
438
439     // hook vmx msrs
440
441     // Setup SYSCALL/SYSENTER MSRs in load/store area
442     
443     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
444     {
445
446         struct vmcs_msr_save_area * msr_entries = NULL;
447         int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
448         int msr_ret = 0;
449
450         V3_Print("Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
451
452         if (max_msrs < 4) {
453             PrintError("Max MSR cache size is too small (%d)\n", max_msrs);
454             return -1;
455         }
456
457         vmx_state->msr_area_paddr = (addr_t)V3_AllocPages(1);
458         
459         if (vmx_state->msr_area_paddr == (addr_t)NULL) {
460             PrintError("could not allocate msr load/store area\n");
461             return -1;
462         }
463
464         msr_entries = (struct vmcs_msr_save_area *)V3_VAddr((void *)(vmx_state->msr_area_paddr));
465         vmx_state->msr_area = msr_entries; // cache in vmx_info
466
467         memset(msr_entries, 0, PAGE_SIZE);
468
469         msr_entries->guest_star.index = IA32_STAR_MSR;
470         msr_entries->guest_lstar.index = IA32_LSTAR_MSR;
471         msr_entries->guest_fmask.index = IA32_FMASK_MSR;
472         msr_entries->guest_kern_gs.index = IA32_KERN_GS_BASE_MSR;
473
474         msr_entries->host_star.index = IA32_STAR_MSR;
475         msr_entries->host_lstar.index = IA32_LSTAR_MSR;
476         msr_entries->host_fmask.index = IA32_FMASK_MSR;
477         msr_entries->host_kern_gs.index = IA32_KERN_GS_BASE_MSR;
478
479         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
480         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
481         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
482
483         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
484         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
485         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->host_msrs));
486
487
488         msr_ret |= v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
489         msr_ret |= v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
490         msr_ret |= v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
491         msr_ret |= v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
492
493
494         // IMPORTANT: These MSRs appear to be cached by the hardware....
495         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
496         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
497         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
498
499         msr_ret |= v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
500         msr_ret |= v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
501
502         msr_ret |= v3_hook_msr(core->vm_info, IA32_PAT_MSR, NULL, NULL, NULL);
503
504         // Not sure what to do about this... Does not appear to be an explicit hardware cache version...
505         msr_ret |= v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
506
507         if (msr_ret != 0) {
508             PrintError("Error configuring MSR save/restore area\n");
509             return -1;
510         }
511
512
513     }    
514
515     /* Sanity check ctrl/reg fields against hw_defaults */
516
517
518
519
520     /*** Write all the info to the VMCS ***/
521   
522     /*
523     {
524         // IS THIS NECESSARY???
525 #define DEBUGCTL_MSR 0x1d9
526         struct v3_msr tmp_msr;
527         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
528         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
529         core->dbg_regs.dr7 = 0x400;
530     }
531     */
532
533 #ifdef __V3_64BIT__
534     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
535 #else
536     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
537     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
538 #endif
539
540
541
542  
543
544     if (v3_update_vmcs_ctrl_fields(core)) {
545         PrintError("Could not write control fields!\n");
546         return -1;
547     }
548     
549     /*
550     if (v3_update_vmcs_host_state(core)) {
551         PrintError("Could not write host state\n");
552         return -1;
553     }
554     */
555
556     // reenable global interrupts for vm state initialization now
557     // that the vm state is initialized. If another VM kicks us off, 
558     // it'll update our vmx state so that we know to reload ourself
559     v3_enable_ints();
560
561     return 0;
562 }
563
564
565 static void __init_vmx_vmcs(void * arg) {
566     struct guest_info * core = arg;
567     struct vmx_data * vmx_state = NULL;
568     int vmx_ret = 0;
569     
570     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
571
572     if (!vmx_state) {
573         PrintError("Unable to allocate in initializing vmx vmcs\n");
574         return;
575     }
576
577     memset(vmx_state, 0, sizeof(struct vmx_data));
578
579     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
580
581     PrintDebug("Allocating VMCS\n");
582     vmx_state->vmcs_ptr_phys = allocate_vmcs();
583
584     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
585
586     core->vmm_data = vmx_state;
587     vmx_state->state = VMX_UNLAUNCHED;
588
589     PrintDebug("Initializing VMCS (addr=%p)\n", core->vmm_data);
590     
591     // TODO: Fix vmcs fields so they're 32-bit
592
593     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
594     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
595
596     if (vmx_ret != VMX_SUCCESS) {
597         PrintError("VMCLEAR failed\n");
598         return; 
599     }
600
601     if (core->vm_info->vm_class == V3_PC_VM) {
602         PrintDebug("Initializing VMCS\n");
603         if (init_vmcs_bios(core, vmx_state) == -1) {
604             PrintError("Error initializing VMCS to BIOS state\n");
605             return;
606         }
607     } else {
608         PrintError("Invalid VM Class\n");
609         return;
610     }
611
612     PrintDebug("Serializing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
613     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
614
615     core->core_run_state = CORE_STOPPED;
616     return;
617 }
618
619
620
621 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
622     extern v3_cpu_arch_t v3_cpu_types[];
623
624     if (v3_cpu_types[V3_Get_CPU()] == V3_INVALID_CPU) {
625         int i = 0;
626
627         for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
628             if (v3_cpu_types[i] != V3_INVALID_CPU) {
629                 break;
630             }
631         }
632
633         if (i == V3_CONFIG_MAX_CPUS) {
634             PrintError("Could not find VALID CPU for VMX guest initialization\n");
635             return -1;
636         }
637
638         V3_Call_On_CPU(i, __init_vmx_vmcs, core);
639
640     } else {
641         __init_vmx_vmcs(core);
642     }
643
644     if (core->core_run_state != CORE_STOPPED) {
645         PrintError("Error initializing VMX Core\n");
646         return -1;
647     }
648
649     return 0;
650 }
651
652
653 int v3_deinit_vmx_vmcs(struct guest_info * core) {
654     struct vmx_data * vmx_state = core->vmm_data;
655
656     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
657     V3_FreePages(V3_PAddr(vmx_state->msr_area), 1);
658
659     V3_Free(vmx_state);
660
661     return 0;
662 }
663
664
665
666 #ifdef V3_CONFIG_CHECKPOINT
667 /* 
668  * JRL: This is broken
669  */
670 int v3_vmx_save_core(struct guest_info * core, void * ctx){
671     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
672
673     // note that the vmcs pointer is an HPA, but we need an HVA
674     if (v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE_4KB, 
675                       V3_VAddr((void*) (vmx_info->vmcs_ptr_phys))) ==-1) {
676         PrintError("Could not save vmcs data for VMX\n");
677         return -1;
678     }
679
680     return 0;
681 }
682
683 int v3_vmx_load_core(struct guest_info * core, void * ctx){
684     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
685     struct cr0_32 * shadow_cr0;
686     addr_t vmcs_page_paddr;  //HPA
687
688     vmcs_page_paddr = (addr_t) V3_AllocPages(1);
689     
690     if (!vmcs_page_paddr) { 
691         PrintError("Could not allocate space for a vmcs in VMX\n");
692         return -1;
693     }
694
695     if (v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, 
696                       V3_VAddr((void *)vmcs_page_paddr)) == -1) { 
697         PrintError("Could not load vmcs data for VMX\n");
698         return -1;
699     }
700
701     vmcs_clear(vmx_info->vmcs_ptr_phys);
702
703     // Probably need to delete the old one... 
704     V3_FreePages((void*)(vmx_info->vmcs_ptr_phys),1);
705
706     vmcs_load(vmcs_page_paddr);
707
708     v3_vmx_save_vmcs(core);
709
710     shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
711
712
713     /* Get the CPU mode to set the guest_ia32e entry ctrl */
714
715     if (core->shdw_pg_mode == SHADOW_PAGING) {
716         if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
717             if (v3_activate_shadow_pt(core) == -1) {
718                 PrintError("Failed to activate shadow page tables\n");
719                 return -1;
720             }
721         } else {
722             if (v3_activate_passthrough_pt(core) == -1) {
723                 PrintError("Failed to activate passthrough page tables\n");
724                 return -1;
725             }
726         }
727     }
728
729     return 0;
730 }
731 #endif
732
733
734 void v3_flush_vmx_vm_core(struct guest_info * core) {
735     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
736     vmcs_clear(vmx_info->vmcs_ptr_phys);
737     vmx_info->state = VMX_UNLAUNCHED;
738 }
739
740
741
742 static int update_irq_exit_state(struct guest_info * info) {
743     struct vmx_exit_idt_vec_info idt_vec_info;
744
745     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
746
747     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
748 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
749         V3_Print("Calling v3_injecting_intr\n");
750 #endif
751         info->intr_core_state.irq_started = 0;
752         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
753     }
754
755     return 0;
756 }
757
758 static int update_irq_entry_state(struct guest_info * info) {
759     struct vmx_exit_idt_vec_info idt_vec_info;
760     struct vmcs_interrupt_state intr_core_state;
761     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
762
763     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
764     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
765
766     /* Check for pending exceptions to inject */
767     if (v3_excp_pending(info)) {
768         struct vmx_entry_int_info int_info;
769         int_info.value = 0;
770
771         // In VMX, almost every exception is hardware
772         // Software exceptions are pretty much only for breakpoint or overflow
773         int_info.type = 3;
774         int_info.vector = v3_get_excp_number(info);
775
776         if (info->excp_state.excp_error_code_valid) {
777             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
778             int_info.error_code = 1;
779
780 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
781             V3_Print("Injecting exception %d with error code %x\n", 
782                     int_info.vector, info->excp_state.excp_error_code);
783 #endif
784         }
785
786         int_info.valid = 1;
787 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
788         V3_Print("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
789 #endif
790         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
791
792         v3_injecting_excp(info, int_info.vector);
793
794     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
795                (intr_core_state.val == 0)) {
796        
797         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
798
799 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
800             V3_Print("IRQ pending from previous injection\n");
801 #endif
802
803             // Copy the IDT vectoring info over to reinject the old interrupt
804             if (idt_vec_info.error_code == 1) {
805                 uint32_t err_code = 0;
806
807                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
808                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
809             }
810
811             idt_vec_info.undef = 0;
812             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
813
814         } else {
815             struct vmx_entry_int_info ent_int;
816             ent_int.value = 0;
817
818             switch (v3_intr_pending(info)) {
819                 case V3_EXTERNAL_IRQ: {
820                     info->intr_core_state.irq_vector = v3_get_intr(info); 
821                     ent_int.vector = info->intr_core_state.irq_vector;
822                     ent_int.type = 0;
823                     ent_int.error_code = 0;
824                     ent_int.valid = 1;
825
826 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
827                     V3_Print("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
828                                info->intr_core_state.irq_vector, 
829                                (uint32_t)info->num_exits, 
830                                (void *)(addr_t)info->rip);
831 #endif
832
833                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
834                     info->intr_core_state.irq_started = 1;
835
836                     break;
837                 }
838                 case V3_NMI:
839                     PrintDebug("Injecting NMI\n");
840
841                     ent_int.type = 2;
842                     ent_int.vector = 2;
843                     ent_int.valid = 1;
844                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
845
846                     break;
847                 case V3_SOFTWARE_INTR:
848                     PrintDebug("Injecting software interrupt\n");
849                     ent_int.type = 4;
850
851                     ent_int.valid = 1;
852                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
853
854                     break;
855                 case V3_VIRTUAL_IRQ:
856                     // Not sure what to do here, Intel doesn't have virtual IRQs
857                     // May be the same as external interrupts/IRQs
858
859                     break;
860                 case V3_INVALID_INTR:
861                 default:
862                     break;
863             }
864         }
865     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
866         // Enable INTR window exiting so we know when IF=1
867         uint32_t instr_len;
868
869         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
870
871 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
872         V3_Print("Enabling Interrupt-Window exiting: %d\n", instr_len);
873 #endif
874
875         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
876         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
877     }
878
879
880     return 0;
881 }
882
883
884
885 static struct vmx_exit_info exit_log[10];
886 static uint64_t rip_log[10];
887
888
889
890 static void print_exit_log(struct guest_info * info) {
891     int cnt = info->num_exits % 10;
892     int i = 0;
893     
894
895     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
896
897     for (i = 0; i < 10; i++) {
898         struct vmx_exit_info * tmp = &exit_log[cnt];
899
900         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
901         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
902         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
903         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
904         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
905         V3_Print("\tguest_linear_addr= %p\n", (void *)(addr_t)tmp->guest_linear_addr);
906         V3_Print("\tRIP = %p\n", (void *)rip_log[cnt]);
907
908
909         cnt--;
910
911         if (cnt == -1) {
912             cnt = 9;
913         }
914
915     }
916
917 }
918
919 int 
920 v3_vmx_config_tsc_virtualization(struct guest_info * info) {
921     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
922
923     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
924         if  (!vmx_info->pri_proc_ctrls.rdtsc_exit) {
925             vmx_info->pri_proc_ctrls.rdtsc_exit = 1;
926             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
927         }
928     } else {
929         sint64_t tsc_offset;
930         uint32_t tsc_offset_low, tsc_offset_high;
931
932         if  (vmx_info->pri_proc_ctrls.rdtsc_exit) {
933             vmx_info->pri_proc_ctrls.rdtsc_exit = 0;
934             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
935         }
936
937         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
938             tsc_offset = 0;
939         } else {
940             tsc_offset = v3_tsc_host_offset(&info->time_state);
941         }
942         tsc_offset_high = (uint32_t)(( tsc_offset >> 32) & 0xffffffff);
943         tsc_offset_low = (uint32_t)(tsc_offset & 0xffffffff);
944
945         check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
946         check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
947     }
948     return 0;
949 }
950
951 /* 
952  * CAUTION and DANGER!!! 
953  * 
954  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
955  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
956  * on its contents will cause things to break. The contents at the time of the exit WILL 
957  * change before the exit handler is executed.
958  */
959 int v3_vmx_enter(struct guest_info * info) {
960     int ret = 0;
961     struct vmx_exit_info exit_info;
962     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
963     uint64_t guest_cycles = 0;
964
965     // Conditionally yield the CPU if the timeslice has expired
966     v3_yield_cond(info,-1);
967
968     // Update timer devices late after being in the VM so that as much 
969     // of the time in the VM is accounted for as possible. Also do it before
970     // updating IRQ entry state so that any interrupts the timers raise get 
971     // handled on the next VM entry.
972     v3_advance_time(info, NULL);
973     v3_update_timers(info);
974
975     // disable global interrupts for vm state transition
976     v3_disable_ints();
977
978     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
979         vmcs_clear(vmx_info->vmcs_ptr_phys);
980         vmcs_load(vmx_info->vmcs_ptr_phys);
981         vmx_info->state = VMX_UNLAUNCHED;
982     }
983
984     v3_vmx_restore_vmcs(info);
985
986
987 #ifdef V3_CONFIG_SYMCALL
988     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
989         update_irq_entry_state(info);
990     }
991 #else 
992     update_irq_entry_state(info);
993 #endif
994
995     {
996         addr_t guest_cr3;
997         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
998         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
999     }
1000
1001
1002     // Perform last-minute time setup prior to entering the VM
1003     v3_vmx_config_tsc_virtualization(info);
1004
1005     if (v3_update_vmcs_host_state(info)) {
1006         v3_enable_ints();
1007         PrintError("Could not write host state\n");
1008         return -1;
1009     }
1010     
1011     if (vmx_info->pin_ctrls.active_preempt_timer) {
1012         /* Preemption timer is active */
1013         uint32_t preempt_window = 0xffffffff;
1014
1015         if (info->timeouts.timeout_active) {
1016             preempt_window = info->timeouts.next_timeout;
1017         }
1018         
1019         check_vmcs_write(VMCS_PREEMPT_TIMER, preempt_window);
1020     }
1021    
1022
1023     {   
1024         uint64_t entry_tsc = 0;
1025         uint64_t exit_tsc = 0;
1026
1027         if (vmx_info->state == VMX_UNLAUNCHED) {
1028             vmx_info->state = VMX_LAUNCHED;
1029             rdtscll(entry_tsc);
1030             ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
1031             rdtscll(exit_tsc);
1032
1033         } else {
1034             V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
1035             rdtscll(entry_tsc);
1036             ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
1037             rdtscll(exit_tsc);
1038         }
1039
1040         guest_cycles = exit_tsc - entry_tsc;    
1041     }
1042
1043     //  PrintDebug("VMX Exit: ret=%d\n", ret);
1044
1045     if (ret != VMX_SUCCESS) {
1046         uint32_t error = 0;
1047         vmcs_read(VMCS_INSTR_ERR, &error);
1048
1049         v3_enable_ints();
1050
1051         PrintError("VMENTRY Error: %d (launch_ret = %d)\n", error, ret);
1052         return -1;
1053     }
1054
1055
1056     info->num_exits++;
1057
1058     /* If we have the preemption time, then use it to get more accurate guest time */
1059     if (vmx_info->pin_ctrls.active_preempt_timer) {
1060         uint32_t cycles_left = 0;
1061         check_vmcs_read(VMCS_PREEMPT_TIMER, &(cycles_left));
1062
1063         if (info->timeouts.timeout_active) {
1064             guest_cycles = info->timeouts.next_timeout - cycles_left;
1065         } else {
1066             guest_cycles = 0xffffffff - cycles_left;
1067         }
1068     }
1069
1070     // Immediate exit from VM time bookkeeping
1071     v3_advance_time(info, &guest_cycles);
1072
1073     /* Update guest state */
1074     v3_vmx_save_vmcs(info);
1075
1076     // info->cpl = info->segments.cs.selector & 0x3;
1077
1078     info->mem_mode = v3_get_vm_mem_mode(info);
1079     info->cpu_mode = v3_get_vm_cpu_mode(info);
1080
1081
1082
1083     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
1084     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
1085     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
1086     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
1087     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
1088     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
1089     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
1090
1091     if (info->shdw_pg_mode == NESTED_PAGING) {
1092         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
1093     }
1094
1095     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
1096
1097     exit_log[info->num_exits % 10] = exit_info;
1098     rip_log[info->num_exits % 10] = get_addr_linear(info, info->rip, &(info->segments.cs));
1099
1100 #ifdef V3_CONFIG_SYMCALL
1101     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
1102         update_irq_exit_state(info);
1103     }
1104 #else
1105     update_irq_exit_state(info);
1106 #endif
1107
1108     if (exit_info.exit_reason == VMX_EXIT_INTR_WINDOW) {
1109         // This is a special case whose only job is to inject an interrupt
1110         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
1111         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
1112         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
1113
1114 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
1115        V3_Print("Interrupts available again! (RIP=%llx)\n", info->rip);
1116 #endif
1117     }
1118
1119
1120     // Lastly we check for an NMI exit, and reinject if so
1121     {
1122         struct vmx_basic_exit_info * basic_info = (struct vmx_basic_exit_info *)&(exit_info.exit_reason);
1123
1124         if (basic_info->reason == VMX_EXIT_INFO_EXCEPTION_OR_NMI) {
1125             if ((uint8_t)exit_info.int_info == 2) {
1126                 asm("int $2");
1127             }
1128         }
1129     }
1130
1131     // reenable global interrupts after vm exit
1132     v3_enable_ints();
1133
1134     // Conditionally yield the CPU if the timeslice has expired
1135     v3_yield_cond(info,-1);
1136     v3_advance_time(info, NULL);
1137     v3_update_timers(info);
1138
1139     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
1140         PrintError("Error in VMX exit handler (Exit reason=%x)\n", exit_info.exit_reason);
1141         return -1;
1142     }
1143
1144     if (info->timeouts.timeout_active) {
1145         /* Check to see if any timeouts have expired */
1146         v3_handle_timeouts(info, guest_cycles);
1147     }
1148
1149     return 0;
1150 }
1151
1152
1153 int v3_start_vmx_guest(struct guest_info * info) {
1154
1155     PrintDebug("Starting VMX core %u\n", info->vcpu_id);
1156
1157     if (info->vcpu_id == 0) {
1158         info->core_run_state = CORE_RUNNING;
1159     } else {
1160
1161         PrintDebug("VMX core %u: Waiting for core initialization\n", info->vcpu_id);
1162
1163         while (info->core_run_state == CORE_STOPPED) {
1164
1165             if (info->vm_info->run_state == VM_STOPPED) {
1166                 // The VM was stopped before this core was initialized. 
1167                 return 0;
1168             }
1169
1170             v3_yield(info,-1);
1171             //PrintDebug("VMX core %u: still waiting for INIT\n",info->vcpu_id);
1172         }
1173         
1174         PrintDebug("VMX core %u initialized\n", info->vcpu_id);
1175
1176         // We'll be paranoid about race conditions here
1177         v3_wait_at_barrier(info);
1178     }
1179
1180
1181     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
1182                info->vcpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
1183                info->segments.cs.limit, (void *)(info->rip));
1184
1185
1186     PrintDebug("VMX core %u: Launching VMX VM on logical core %u\n", info->vcpu_id, info->pcpu_id);
1187
1188     v3_start_time(info);
1189
1190     while (1) {
1191
1192         if (info->vm_info->run_state == VM_STOPPED) {
1193             info->core_run_state = CORE_STOPPED;
1194             break;
1195         }
1196
1197         if (v3_vmx_enter(info) == -1) {
1198
1199             addr_t host_addr;
1200             addr_t linear_addr = 0;
1201             
1202             info->vm_info->run_state = VM_ERROR;
1203             
1204             V3_Print("VMX core %u: VMX ERROR!!\n", info->vcpu_id); 
1205             
1206             v3_print_guest_state(info);
1207             
1208             V3_Print("VMX core %u\n", info->vcpu_id); 
1209
1210             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1211             
1212             if (info->mem_mode == PHYSICAL_MEM) {
1213                 v3_gpa_to_hva(info, linear_addr, &host_addr);
1214             } else if (info->mem_mode == VIRTUAL_MEM) {
1215                 v3_gva_to_hva(info, linear_addr, &host_addr);
1216             }
1217             
1218             V3_Print("VMX core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1219             
1220             V3_Print("VMX core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1221             v3_dump_mem((uint8_t *)host_addr, 15);
1222             
1223             v3_print_stack(info);
1224
1225
1226             v3_print_vmcs();
1227             print_exit_log(info);
1228             return -1;
1229         }
1230
1231         v3_wait_at_barrier(info);
1232
1233
1234         if (info->vm_info->run_state == VM_STOPPED) {
1235             info->core_run_state = CORE_STOPPED;
1236             break;
1237         }
1238 /*
1239         if ((info->num_exits % 5000) == 0) {
1240             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
1241         }
1242 */
1243
1244     }
1245
1246     return 0;
1247 }
1248
1249
1250
1251
1252 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
1253 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
1254 #define CPUID_1_ECX_VTXFLAG 0x00000020
1255
1256 int v3_is_vmx_capable() {
1257     v3_msr_t feature_msr;
1258     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1259
1260     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
1261
1262     PrintDebug("ECX: 0x%x\n", ecx);
1263
1264     if (ecx & CPUID_1_ECX_VTXFLAG) {
1265         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
1266         
1267         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
1268
1269         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
1270             PrintDebug("VMX is locked -- enable in the BIOS\n");
1271             return 0;
1272         }
1273
1274     } else {
1275         PrintDebug("VMX not supported on this cpu\n");
1276         return 0;
1277     }
1278
1279     return 1;
1280 }
1281
1282
1283 int v3_reset_vmx_vm_core(struct guest_info * core, addr_t rip) {
1284     // init vmcs bios
1285     
1286     if ((core->shdw_pg_mode == NESTED_PAGING) && 
1287         (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
1288         // easy 
1289         core->rip = 0;
1290         core->segments.cs.selector = rip << 8;
1291         core->segments.cs.limit = 0xffff;
1292         core->segments.cs.base = rip << 12;
1293     } else {
1294         core->vm_regs.rdx = core->vcpu_id;
1295         core->vm_regs.rbx = rip;
1296     }
1297
1298     return 0;
1299 }
1300
1301
1302
1303 void v3_init_vmx_cpu(int cpu_id) {
1304     addr_t vmx_on_region = 0;
1305     extern v3_cpu_arch_t v3_mach_type;
1306     extern v3_cpu_arch_t v3_cpu_types[];
1307
1308     if (v3_mach_type == V3_INVALID_CPU) {
1309         if (v3_init_vmx_hw(&hw_info) == -1) {
1310             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
1311             return;
1312         }
1313     }
1314
1315     enable_vmx();
1316
1317
1318     // Setup VMXON Region
1319     vmx_on_region = allocate_vmcs();
1320
1321
1322     if (vmx_on(vmx_on_region) == VMX_SUCCESS) {
1323         V3_Print("VMX Enabled\n");
1324         host_vmcs_ptrs[cpu_id] = vmx_on_region;
1325     } else {
1326         V3_Print("VMX already enabled\n");
1327         V3_FreePages((void *)vmx_on_region, 1);
1328     }
1329
1330     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);    
1331
1332     {
1333         struct vmx_sec_proc_ctrls sec_proc_ctrls;
1334         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
1335         
1336         if (sec_proc_ctrls.enable_ept == 0) {
1337             V3_Print("VMX EPT (Nested) Paging not supported\n");
1338             v3_cpu_types[cpu_id] = V3_VMX_CPU;
1339         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
1340             V3_Print("VMX EPT (Nested) Paging supported\n");
1341             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
1342         } else {
1343             V3_Print("VMX EPT (Nested) Paging + Unrestricted guest supported\n");
1344             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
1345         }
1346     }
1347     
1348 }
1349
1350
1351 void v3_deinit_vmx_cpu(int cpu_id) {
1352     extern v3_cpu_arch_t v3_cpu_types[];
1353     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1354
1355     if (host_vmcs_ptrs[cpu_id] != 0) {
1356         V3_Print("Disabling VMX\n");
1357
1358         if (vmx_off() != VMX_SUCCESS) {
1359             PrintError("Error executing VMXOFF\n");
1360         }
1361
1362         V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
1363
1364         host_vmcs_ptrs[cpu_id] = 0;
1365     }
1366 }