Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Added functionality for MONITOR and MWAIT instructions on SVM and VMX:
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_barrier.h>
36 #include <palacios/vmm_timeout.h>
37 #include <palacios/vmm_debug.h>
38
39 #ifdef V3_CONFIG_CHECKPOINT
40 #include <palacios/vmm_checkpoint.h>
41 #endif
42
43 #include <palacios/vmx_ept.h>
44 #include <palacios/vmx_assist.h>
45 #include <palacios/vmx_hw_info.h>
46
47 #ifndef V3_CONFIG_DEBUG_VMX
48 #undef PrintDebug
49 #define PrintDebug(fmt, args...)
50 #endif
51
52
53 /* These fields contain the hardware feature sets supported by the local CPU */
54 static struct vmx_hw_info hw_info;
55
56 extern v3_cpu_arch_t v3_mach_type;
57
58 static addr_t host_vmcs_ptrs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
59
60 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
61 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
62
63 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
64     int ret = 0;
65
66     ret = vmcs_write(field, val);
67
68     if (ret != VMX_SUCCESS) {
69         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
70         return 1;
71     }
72
73
74     
75
76     return 0;
77 }
78
79 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
80     int ret = 0;
81
82     ret = vmcs_read(field, val);
83
84     if (ret != VMX_SUCCESS) {
85         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
86     }
87
88     return ret;
89 }
90
91
92
93
94 static addr_t allocate_vmcs() {
95     void *temp;
96     struct vmcs_data * vmcs_page = NULL;
97
98     PrintDebug("Allocating page\n");
99
100     temp = V3_AllocPages(1);
101     if (!temp) { 
102         PrintError("Cannot allocate VMCS\n");
103         return -1;
104     }
105     vmcs_page = (struct vmcs_data *)V3_VAddr(temp);
106     memset(vmcs_page, 0, 4096);
107
108     vmcs_page->revision = hw_info.basic_info.revision;
109     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
110
111     return (addr_t)V3_PAddr((void *)vmcs_page);
112 }
113
114
115 #if 0
116 static int debug_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * src, void * priv_data) {
117     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
118     V3_Print("\n\nEFER READ (val = %p)\n", (void *)efer->value);
119     
120     v3_print_guest_state(core);
121     v3_print_vmcs();
122
123
124     src->value = efer->value;
125     return 0;
126 }
127
128 static int debug_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
129     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
130     V3_Print("\n\nEFER WRITE (old_val = %p) (new_val = %p)\n", (void *)efer->value, (void *)src.value);
131     
132     v3_print_guest_state(core);
133     v3_print_vmcs();
134
135     efer->value = src.value;
136
137     return 0;
138 }
139 #endif
140
141
142 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
143     int vmx_ret = 0;
144
145     /* Get Available features */
146     struct vmx_pin_ctrls avail_pin_ctrls;
147     avail_pin_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.pin_ctrls));
148     /* ** */
149
150
151     // disable global interrupts for vm state initialization
152     v3_disable_ints();
153
154     PrintDebug("Loading VMCS\n");
155     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
156     vmx_state->state = VMX_UNLAUNCHED;
157
158     if (vmx_ret != VMX_SUCCESS) {
159         PrintError("VMPTRLD failed\n");
160         return -1;
161     }
162
163
164     /*** Setup default state from HW ***/
165
166     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
167     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
168     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
169     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
170     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
171
172     /* Print Control MSRs */
173     V3_Print("CR0 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr0.req_val, (void *)(addr_t)hw_info.cr0.req_mask);
174     V3_Print("CR4 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr4.req_val, (void *)(addr_t)hw_info.cr4.req_mask);
175
176
177
178     /******* Setup Host State **********/
179
180     /* Cache GDTR, IDTR, and TR in host struct */
181
182
183     /********** Setup VMX Control Fields ***********/
184
185     /* Add external interrupts, NMI exiting, and virtual NMI */
186     vmx_state->pin_ctrls.nmi_exit = 1;
187     vmx_state->pin_ctrls.ext_int_exit = 1;
188
189
190
191     /* We enable the preemption timer by default to measure accurate guest time */
192     if (avail_pin_ctrls.active_preempt_timer) {
193         V3_Print("VMX Preemption Timer is available\n");
194         vmx_state->pin_ctrls.active_preempt_timer = 1;
195         vmx_state->exit_ctrls.save_preempt_timer = 1;
196     }
197
198     // we want it to use this when halting
199     vmx_state->pri_proc_ctrls.hlt_exit = 1;
200
201     // cpuid tells it that it does not have these instructions
202     vmx_state->pri_proc_ctrls.monitor_exit = 1;
203     vmx_state->pri_proc_ctrls.mwait_exit = 1;
204
205     // we don't need to handle a pause, although this is where
206     // we could pull out of a spin lock acquire or schedule to find its partner
207     vmx_state->pri_proc_ctrls.pause_exit = 0;
208
209     vmx_state->pri_proc_ctrls.tsc_offset = 1;
210 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
211     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
212 #endif
213
214     /* Setup IO map */
215     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
216     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
217     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
218             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
219
220
221     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
222     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
223
224
225
226 #ifdef __V3_64BIT__
227     // Ensure host runs in 64-bit mode at each VM EXIT
228     vmx_state->exit_ctrls.host_64_on = 1;
229 #endif
230
231
232
233     // Restore host's EFER register on each VM EXIT
234     vmx_state->exit_ctrls.ld_efer = 1;
235
236     // Save/restore guest's EFER register to/from VMCS on VM EXIT/ENTRY
237     vmx_state->exit_ctrls.save_efer = 1;
238     vmx_state->entry_ctrls.ld_efer  = 1;
239
240     vmx_state->exit_ctrls.save_pat = 1;
241     vmx_state->exit_ctrls.ld_pat = 1;
242     vmx_state->entry_ctrls.ld_pat = 1;
243
244     /* Temporary GPF trap */
245     //  vmx_state->excp_bmap.gp = 1;
246
247     // Setup Guests initial PAT field
248     vmx_ret |= check_vmcs_write(VMCS_GUEST_PAT, 0x0007040600070406LL);
249
250     // Capture CR8 mods so that we can keep the apic_tpr correct
251     vmx_state->pri_proc_ctrls.cr8_ld_exit = 1;
252     vmx_state->pri_proc_ctrls.cr8_str_exit = 1;
253
254
255     /* Setup paging */
256     if (core->shdw_pg_mode == SHADOW_PAGING) {
257         PrintDebug("Creating initial shadow page table\n");
258
259         if (v3_init_passthrough_pts(core) == -1) {
260             PrintError("Could not initialize passthrough page tables\n");
261             return -1;
262         }
263         
264 #define CR0_PE 0x00000001
265 #define CR0_PG 0x80000000
266 #define CR0_WP 0x00010000 // To ensure mem hooks work
267 #define CR0_NE 0x00000020
268         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
269
270
271         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
272         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
273
274         core->ctrl_regs.cr3 = core->direct_map_pt;
275
276         // vmx_state->pinbased_ctrls |= NMI_EXIT;
277
278         /* Add CR exits */
279         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
280         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
281         
282         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
283         
284         /* Add page fault exits */
285         vmx_state->excp_bmap.pf = 1;
286
287         // Setup VMX Assist
288         v3_vmxassist_init(core, vmx_state);
289
290         // Hook all accesses to EFER register
291         v3_hook_msr(core->vm_info, EFER_MSR, 
292                     &v3_handle_efer_read,
293                     &v3_handle_efer_write, 
294                     core);
295
296     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
297                (v3_mach_type == V3_VMX_EPT_CPU)) {
298
299 #define CR0_PE 0x00000001
300 #define CR0_PG 0x80000000
301 #define CR0_WP 0x00010000 // To ensure mem hooks work
302 #define CR0_NE 0x00000020
303         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
304
305         // vmx_state->pinbased_ctrls |= NMI_EXIT;
306
307         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
308         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
309         
310         /* Disable CR exits */
311         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
312         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
313
314         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
315
316         /* Add page fault exits */
317         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
318         
319         // Setup VMX Assist
320         v3_vmxassist_init(core, vmx_state);
321
322         /* Enable EPT */
323         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
324         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
325
326
327
328         if (v3_init_ept(core, &hw_info) == -1) {
329             PrintError("Error initializing EPT\n");
330             return -1;
331         }
332
333         // Hook all accesses to EFER register
334         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
335
336     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
337                (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
338         int i = 0;
339         // For now we will assume that unrestricted guest mode is assured w/ EPT
340
341
342         core->vm_regs.rsp = 0x00;
343         core->rip = 0xfff0;
344         core->vm_regs.rdx = 0x00000f00;
345         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
346         core->ctrl_regs.cr0 = 0x60010030; 
347         core->ctrl_regs.cr4 = 0x00002010; // Enable VMX and PSE flag
348         
349
350         core->segments.cs.selector = 0xf000;
351         core->segments.cs.limit = 0xffff;
352         core->segments.cs.base = 0x0000000f0000LL;
353
354         // (raw attributes = 0xf3)
355         core->segments.cs.type = 0xb;
356         core->segments.cs.system = 0x1;
357         core->segments.cs.dpl = 0x0;
358         core->segments.cs.present = 1;
359
360
361
362         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
363                                           &(core->segments.es), &(core->segments.fs), 
364                                           &(core->segments.gs), NULL};
365
366         for ( i = 0; segregs[i] != NULL; i++) {
367             struct v3_segment * seg = segregs[i];
368         
369             seg->selector = 0x0000;
370             //    seg->base = seg->selector << 4;
371             seg->base = 0x00000000;
372             seg->limit = 0xffff;
373
374
375             seg->type = 0x3;
376             seg->system = 0x1;
377             seg->dpl = 0x0;
378             seg->present = 1;
379             //    seg->granularity = 1;
380
381         }
382
383
384         core->segments.gdtr.limit = 0x0000ffff;
385         core->segments.gdtr.base = 0x0000000000000000LL;
386
387         core->segments.idtr.limit = 0x0000ffff;
388         core->segments.idtr.base = 0x0000000000000000LL;
389
390         core->segments.ldtr.selector = 0x0000;
391         core->segments.ldtr.limit = 0x0000ffff;
392         core->segments.ldtr.base = 0x0000000000000000LL;
393         core->segments.ldtr.type = 0x2;
394         core->segments.ldtr.present = 1;
395
396         core->segments.tr.selector = 0x0000;
397         core->segments.tr.limit = 0x0000ffff;
398         core->segments.tr.base = 0x0000000000000000LL;
399         core->segments.tr.type = 0xb;
400         core->segments.tr.present = 1;
401
402         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
403         core->dbg_regs.dr7 = 0x0000000000000400LL;
404
405         /* Enable EPT */
406         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
407         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
408         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
409
410
411         /* Disable shadow paging stuff */
412         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
413         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
414
415         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
416
417
418         // Cause VM_EXIT whenever the CR4.VMXE bit is set
419         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
420 #define CR0_NE 0x00000020
421         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, CR0_NE);
422         ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->ne = 1;
423
424         if (v3_init_ept(core, &hw_info) == -1) {
425             PrintError("Error initializing EPT\n");
426             return -1;
427         }
428
429         // Hook all accesses to EFER register
430         //      v3_hook_msr(core->vm_info, EFER_MSR, &debug_efer_read, &debug_efer_write, core);
431         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
432     } else {
433         PrintError("Invalid Virtual paging mode (pg_mode=%d) (mach_type=%d)\n", core->shdw_pg_mode, v3_mach_type);
434         return -1;
435     }
436
437
438     // hook vmx msrs
439
440     // Setup SYSCALL/SYSENTER MSRs in load/store area
441     
442     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
443     {
444
445         struct vmcs_msr_save_area * msr_entries = NULL;
446         int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
447         int msr_ret = 0;
448
449         V3_Print("Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
450
451         if (max_msrs < 4) {
452             PrintError("Max MSR cache size is too small (%d)\n", max_msrs);
453             return -1;
454         }
455
456         vmx_state->msr_area_paddr = (addr_t)V3_AllocPages(1);
457         
458         if (vmx_state->msr_area_paddr == (addr_t)NULL) {
459             PrintError("could not allocate msr load/store area\n");
460             return -1;
461         }
462
463         msr_entries = (struct vmcs_msr_save_area *)V3_VAddr((void *)(vmx_state->msr_area_paddr));
464         vmx_state->msr_area = msr_entries; // cache in vmx_info
465
466         memset(msr_entries, 0, PAGE_SIZE);
467
468         msr_entries->guest_star.index = IA32_STAR_MSR;
469         msr_entries->guest_lstar.index = IA32_LSTAR_MSR;
470         msr_entries->guest_fmask.index = IA32_FMASK_MSR;
471         msr_entries->guest_kern_gs.index = IA32_KERN_GS_BASE_MSR;
472
473         msr_entries->host_star.index = IA32_STAR_MSR;
474         msr_entries->host_lstar.index = IA32_LSTAR_MSR;
475         msr_entries->host_fmask.index = IA32_FMASK_MSR;
476         msr_entries->host_kern_gs.index = IA32_KERN_GS_BASE_MSR;
477
478         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
479         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
480         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
481
482         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
483         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
484         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->host_msrs));
485
486
487         msr_ret |= v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
488         msr_ret |= v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
489         msr_ret |= v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
490         msr_ret |= v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
491
492
493         // IMPORTANT: These MSRs appear to be cached by the hardware....
494         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
495         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
496         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
497
498         msr_ret |= v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
499         msr_ret |= v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
500
501         msr_ret |= v3_hook_msr(core->vm_info, IA32_PAT_MSR, NULL, NULL, NULL);
502
503         // Not sure what to do about this... Does not appear to be an explicit hardware cache version...
504         msr_ret |= v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
505
506         if (msr_ret != 0) {
507             PrintError("Error configuring MSR save/restore area\n");
508             return -1;
509         }
510
511
512     }    
513
514     /* Sanity check ctrl/reg fields against hw_defaults */
515
516
517
518
519     /*** Write all the info to the VMCS ***/
520   
521     /*
522     {
523         // IS THIS NECESSARY???
524 #define DEBUGCTL_MSR 0x1d9
525         struct v3_msr tmp_msr;
526         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
527         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
528         core->dbg_regs.dr7 = 0x400;
529     }
530     */
531
532 #ifdef __V3_64BIT__
533     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
534 #else
535     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
536     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
537 #endif
538
539
540
541  
542
543     if (v3_update_vmcs_ctrl_fields(core)) {
544         PrintError("Could not write control fields!\n");
545         return -1;
546     }
547     
548     /*
549     if (v3_update_vmcs_host_state(core)) {
550         PrintError("Could not write host state\n");
551         return -1;
552     }
553     */
554
555     // reenable global interrupts for vm state initialization now
556     // that the vm state is initialized. If another VM kicks us off, 
557     // it'll update our vmx state so that we know to reload ourself
558     v3_enable_ints();
559
560     return 0;
561 }
562
563
564 static void __init_vmx_vmcs(void * arg) {
565     struct guest_info * core = arg;
566     struct vmx_data * vmx_state = NULL;
567     int vmx_ret = 0;
568     
569     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
570
571     if (!vmx_state) {
572         PrintError("Unable to allocate in initializing vmx vmcs\n");
573         return;
574     }
575
576     memset(vmx_state, 0, sizeof(struct vmx_data));
577
578     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
579
580     PrintDebug("Allocating VMCS\n");
581     vmx_state->vmcs_ptr_phys = allocate_vmcs();
582
583     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
584
585     core->vmm_data = vmx_state;
586     vmx_state->state = VMX_UNLAUNCHED;
587
588     PrintDebug("Initializing VMCS (addr=%p)\n", core->vmm_data);
589     
590     // TODO: Fix vmcs fields so they're 32-bit
591
592     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
593     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
594
595     if (vmx_ret != VMX_SUCCESS) {
596         PrintError("VMCLEAR failed\n");
597         return; 
598     }
599
600     if (core->vm_info->vm_class == V3_PC_VM) {
601         PrintDebug("Initializing VMCS\n");
602         if (init_vmcs_bios(core, vmx_state) == -1) {
603             PrintError("Error initializing VMCS to BIOS state\n");
604             return;
605         }
606     } else {
607         PrintError("Invalid VM Class\n");
608         return;
609     }
610
611     PrintDebug("Serializing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
612     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
613
614     core->core_run_state = CORE_STOPPED;
615     return;
616 }
617
618
619
620 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
621     extern v3_cpu_arch_t v3_cpu_types[];
622
623     if (v3_cpu_types[V3_Get_CPU()] == V3_INVALID_CPU) {
624         int i = 0;
625
626         for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
627             if (v3_cpu_types[i] != V3_INVALID_CPU) {
628                 break;
629             }
630         }
631
632         if (i == V3_CONFIG_MAX_CPUS) {
633             PrintError("Could not find VALID CPU for VMX guest initialization\n");
634             return -1;
635         }
636
637         V3_Call_On_CPU(i, __init_vmx_vmcs, core);
638
639     } else {
640         __init_vmx_vmcs(core);
641     }
642
643     if (core->core_run_state != CORE_STOPPED) {
644         PrintError("Error initializing VMX Core\n");
645         return -1;
646     }
647
648     return 0;
649 }
650
651
652 int v3_deinit_vmx_vmcs(struct guest_info * core) {
653     struct vmx_data * vmx_state = core->vmm_data;
654
655     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
656     V3_FreePages(V3_PAddr(vmx_state->msr_area), 1);
657
658     V3_Free(vmx_state);
659
660     return 0;
661 }
662
663
664
665 #ifdef V3_CONFIG_CHECKPOINT
666 /* 
667  * JRL: This is broken
668  */
669 int v3_vmx_save_core(struct guest_info * core, void * ctx){
670     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
671
672     // note that the vmcs pointer is an HPA, but we need an HVA
673     if (v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE_4KB, 
674                       V3_VAddr((void*) (vmx_info->vmcs_ptr_phys))) ==-1) {
675         PrintError("Could not save vmcs data for VMX\n");
676         return -1;
677     }
678
679     return 0;
680 }
681
682 int v3_vmx_load_core(struct guest_info * core, void * ctx){
683     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
684     struct cr0_32 * shadow_cr0;
685     addr_t vmcs_page_paddr;  //HPA
686
687     vmcs_page_paddr = (addr_t) V3_AllocPages(1);
688     
689     if (!vmcs_page_paddr) { 
690         PrintError("Could not allocate space for a vmcs in VMX\n");
691         return -1;
692     }
693
694     if (v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, 
695                       V3_VAddr((void *)vmcs_page_paddr)) == -1) { 
696         PrintError("Could not load vmcs data for VMX\n");
697         return -1;
698     }
699
700     vmcs_clear(vmx_info->vmcs_ptr_phys);
701
702     // Probably need to delete the old one... 
703     V3_FreePages((void*)(vmx_info->vmcs_ptr_phys),1);
704
705     vmcs_load(vmcs_page_paddr);
706
707     v3_vmx_save_vmcs(core);
708
709     shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
710
711
712     /* Get the CPU mode to set the guest_ia32e entry ctrl */
713
714     if (core->shdw_pg_mode == SHADOW_PAGING) {
715         if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
716             if (v3_activate_shadow_pt(core) == -1) {
717                 PrintError("Failed to activate shadow page tables\n");
718                 return -1;
719             }
720         } else {
721             if (v3_activate_passthrough_pt(core) == -1) {
722                 PrintError("Failed to activate passthrough page tables\n");
723                 return -1;
724             }
725         }
726     }
727
728     return 0;
729 }
730 #endif
731
732
733 void v3_flush_vmx_vm_core(struct guest_info * core) {
734     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
735     vmcs_clear(vmx_info->vmcs_ptr_phys);
736     vmx_info->state = VMX_UNLAUNCHED;
737 }
738
739
740
741 static int update_irq_exit_state(struct guest_info * info) {
742     struct vmx_exit_idt_vec_info idt_vec_info;
743
744     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
745
746     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
747 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
748         V3_Print("Calling v3_injecting_intr\n");
749 #endif
750         info->intr_core_state.irq_started = 0;
751         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
752     }
753
754     return 0;
755 }
756
757 static int update_irq_entry_state(struct guest_info * info) {
758     struct vmx_exit_idt_vec_info idt_vec_info;
759     struct vmcs_interrupt_state intr_core_state;
760     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
761
762     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
763     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
764
765     /* Check for pending exceptions to inject */
766     if (v3_excp_pending(info)) {
767         struct vmx_entry_int_info int_info;
768         int_info.value = 0;
769
770         // In VMX, almost every exception is hardware
771         // Software exceptions are pretty much only for breakpoint or overflow
772         int_info.type = 3;
773         int_info.vector = v3_get_excp_number(info);
774
775         if (info->excp_state.excp_error_code_valid) {
776             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
777             int_info.error_code = 1;
778
779 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
780             V3_Print("Injecting exception %d with error code %x\n", 
781                     int_info.vector, info->excp_state.excp_error_code);
782 #endif
783         }
784
785         int_info.valid = 1;
786 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
787         V3_Print("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
788 #endif
789         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
790
791         v3_injecting_excp(info, int_info.vector);
792
793     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
794                (intr_core_state.val == 0)) {
795        
796         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
797
798 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
799             V3_Print("IRQ pending from previous injection\n");
800 #endif
801
802             // Copy the IDT vectoring info over to reinject the old interrupt
803             if (idt_vec_info.error_code == 1) {
804                 uint32_t err_code = 0;
805
806                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
807                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
808             }
809
810             idt_vec_info.undef = 0;
811             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
812
813         } else {
814             struct vmx_entry_int_info ent_int;
815             ent_int.value = 0;
816
817             switch (v3_intr_pending(info)) {
818                 case V3_EXTERNAL_IRQ: {
819                     info->intr_core_state.irq_vector = v3_get_intr(info); 
820                     ent_int.vector = info->intr_core_state.irq_vector;
821                     ent_int.type = 0;
822                     ent_int.error_code = 0;
823                     ent_int.valid = 1;
824
825 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
826                     V3_Print("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
827                                info->intr_core_state.irq_vector, 
828                                (uint32_t)info->num_exits, 
829                                (void *)(addr_t)info->rip);
830 #endif
831
832                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
833                     info->intr_core_state.irq_started = 1;
834
835                     break;
836                 }
837                 case V3_NMI:
838                     PrintDebug("Injecting NMI\n");
839
840                     ent_int.type = 2;
841                     ent_int.vector = 2;
842                     ent_int.valid = 1;
843                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
844
845                     break;
846                 case V3_SOFTWARE_INTR:
847                     PrintDebug("Injecting software interrupt\n");
848                     ent_int.type = 4;
849
850                     ent_int.valid = 1;
851                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
852
853                     break;
854                 case V3_VIRTUAL_IRQ:
855                     // Not sure what to do here, Intel doesn't have virtual IRQs
856                     // May be the same as external interrupts/IRQs
857
858                     break;
859                 case V3_INVALID_INTR:
860                 default:
861                     break;
862             }
863         }
864     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
865         // Enable INTR window exiting so we know when IF=1
866         uint32_t instr_len;
867
868         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
869
870 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
871         V3_Print("Enabling Interrupt-Window exiting: %d\n", instr_len);
872 #endif
873
874         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
875         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
876     }
877
878
879     return 0;
880 }
881
882
883
884 static struct vmx_exit_info exit_log[10];
885 static uint64_t rip_log[10];
886
887
888
889 static void print_exit_log(struct guest_info * info) {
890     int cnt = info->num_exits % 10;
891     int i = 0;
892     
893
894     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
895
896     for (i = 0; i < 10; i++) {
897         struct vmx_exit_info * tmp = &exit_log[cnt];
898
899         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
900         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
901         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
902         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
903         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
904         V3_Print("\tguest_linear_addr= %p\n", (void *)(addr_t)tmp->guest_linear_addr);
905         V3_Print("\tRIP = %p\n", (void *)rip_log[cnt]);
906
907
908         cnt--;
909
910         if (cnt == -1) {
911             cnt = 9;
912         }
913
914     }
915
916 }
917
918 int 
919 v3_vmx_config_tsc_virtualization(struct guest_info * info) {
920     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
921
922     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
923         if  (!vmx_info->pri_proc_ctrls.rdtsc_exit) {
924             vmx_info->pri_proc_ctrls.rdtsc_exit = 1;
925             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
926         }
927     } else {
928         sint64_t tsc_offset;
929         uint32_t tsc_offset_low, tsc_offset_high;
930
931         if  (vmx_info->pri_proc_ctrls.rdtsc_exit) {
932             vmx_info->pri_proc_ctrls.rdtsc_exit = 0;
933             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
934         }
935
936         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
937             tsc_offset = 0;
938         } else {
939             tsc_offset = v3_tsc_host_offset(&info->time_state);
940         }
941         tsc_offset_high = (uint32_t)(( tsc_offset >> 32) & 0xffffffff);
942         tsc_offset_low = (uint32_t)(tsc_offset & 0xffffffff);
943
944         check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
945         check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
946     }
947     return 0;
948 }
949
950 /* 
951  * CAUTION and DANGER!!! 
952  * 
953  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
954  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
955  * on its contents will cause things to break. The contents at the time of the exit WILL 
956  * change before the exit handler is executed.
957  */
958 int v3_vmx_enter(struct guest_info * info) {
959     int ret = 0;
960     struct vmx_exit_info exit_info;
961     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
962     uint64_t guest_cycles = 0;
963
964     // Conditionally yield the CPU if the timeslice has expired
965     v3_yield_cond(info,-1);
966
967     // Update timer devices late after being in the VM so that as much 
968     // of the time in the VM is accounted for as possible. Also do it before
969     // updating IRQ entry state so that any interrupts the timers raise get 
970     // handled on the next VM entry.
971     v3_advance_time(info, NULL);
972     v3_update_timers(info);
973
974     // disable global interrupts for vm state transition
975     v3_disable_ints();
976
977     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
978         vmcs_clear(vmx_info->vmcs_ptr_phys);
979         vmcs_load(vmx_info->vmcs_ptr_phys);
980         vmx_info->state = VMX_UNLAUNCHED;
981     }
982
983     v3_vmx_restore_vmcs(info);
984
985
986 #ifdef V3_CONFIG_SYMCALL
987     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
988         update_irq_entry_state(info);
989     }
990 #else 
991     update_irq_entry_state(info);
992 #endif
993
994     {
995         addr_t guest_cr3;
996         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
997         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
998     }
999
1000
1001     // Perform last-minute time setup prior to entering the VM
1002     v3_vmx_config_tsc_virtualization(info);
1003
1004     if (v3_update_vmcs_host_state(info)) {
1005         v3_enable_ints();
1006         PrintError("Could not write host state\n");
1007         return -1;
1008     }
1009     
1010     if (vmx_info->pin_ctrls.active_preempt_timer) {
1011         /* Preemption timer is active */
1012         uint32_t preempt_window = 0xffffffff;
1013
1014         if (info->timeouts.timeout_active) {
1015             preempt_window = info->timeouts.next_timeout;
1016         }
1017         
1018         check_vmcs_write(VMCS_PREEMPT_TIMER, preempt_window);
1019     }
1020    
1021
1022     {   
1023         uint64_t entry_tsc = 0;
1024         uint64_t exit_tsc = 0;
1025
1026         if (vmx_info->state == VMX_UNLAUNCHED) {
1027             vmx_info->state = VMX_LAUNCHED;
1028             rdtscll(entry_tsc);
1029             ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
1030             rdtscll(exit_tsc);
1031
1032         } else {
1033             V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
1034             rdtscll(entry_tsc);
1035             ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
1036             rdtscll(exit_tsc);
1037         }
1038
1039         guest_cycles = exit_tsc - entry_tsc;    
1040     }
1041
1042     //  PrintDebug("VMX Exit: ret=%d\n", ret);
1043
1044     if (ret != VMX_SUCCESS) {
1045         uint32_t error = 0;
1046         vmcs_read(VMCS_INSTR_ERR, &error);
1047
1048         v3_enable_ints();
1049
1050         PrintError("VMENTRY Error: %d (launch_ret = %d)\n", error, ret);
1051         return -1;
1052     }
1053
1054
1055     info->num_exits++;
1056
1057     /* If we have the preemption time, then use it to get more accurate guest time */
1058     if (vmx_info->pin_ctrls.active_preempt_timer) {
1059         uint32_t cycles_left = 0;
1060         check_vmcs_read(VMCS_PREEMPT_TIMER, &(cycles_left));
1061
1062         if (info->timeouts.timeout_active) {
1063             guest_cycles = info->timeouts.next_timeout - cycles_left;
1064         } else {
1065             guest_cycles = 0xffffffff - cycles_left;
1066         }
1067     }
1068
1069     // Immediate exit from VM time bookkeeping
1070     v3_advance_time(info, &guest_cycles);
1071
1072     /* Update guest state */
1073     v3_vmx_save_vmcs(info);
1074
1075     // info->cpl = info->segments.cs.selector & 0x3;
1076
1077     info->mem_mode = v3_get_vm_mem_mode(info);
1078     info->cpu_mode = v3_get_vm_cpu_mode(info);
1079
1080
1081
1082     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
1083     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
1084     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
1085     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
1086     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
1087     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
1088     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
1089
1090     if (info->shdw_pg_mode == NESTED_PAGING) {
1091         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
1092     }
1093
1094     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
1095
1096     exit_log[info->num_exits % 10] = exit_info;
1097     rip_log[info->num_exits % 10] = get_addr_linear(info, info->rip, &(info->segments.cs));
1098
1099 #ifdef V3_CONFIG_SYMCALL
1100     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
1101         update_irq_exit_state(info);
1102     }
1103 #else
1104     update_irq_exit_state(info);
1105 #endif
1106
1107     if (exit_info.exit_reason == VMX_EXIT_INTR_WINDOW) {
1108         // This is a special case whose only job is to inject an interrupt
1109         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
1110         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
1111         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
1112
1113 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
1114        V3_Print("Interrupts available again! (RIP=%llx)\n", info->rip);
1115 #endif
1116     }
1117
1118     // reenable global interrupts after vm exit
1119     v3_enable_ints();
1120
1121     // Conditionally yield the CPU if the timeslice has expired
1122     v3_yield_cond(info,-1);
1123     v3_advance_time(info, NULL);
1124     v3_update_timers(info);
1125
1126     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
1127         PrintError("Error in VMX exit handler (Exit reason=%x)\n", exit_info.exit_reason);
1128         return -1;
1129     }
1130
1131     if (info->timeouts.timeout_active) {
1132         /* Check to see if any timeouts have expired */
1133         v3_handle_timeouts(info, guest_cycles);
1134     }
1135
1136     return 0;
1137 }
1138
1139
1140 int v3_start_vmx_guest(struct guest_info * info) {
1141
1142     PrintDebug("Starting VMX core %u\n", info->vcpu_id);
1143
1144     if (info->vcpu_id == 0) {
1145         info->core_run_state = CORE_RUNNING;
1146     } else {
1147
1148         PrintDebug("VMX core %u: Waiting for core initialization\n", info->vcpu_id);
1149
1150         while (info->core_run_state == CORE_STOPPED) {
1151
1152             if (info->vm_info->run_state == VM_STOPPED) {
1153                 // The VM was stopped before this core was initialized. 
1154                 return 0;
1155             }
1156
1157             v3_yield(info,-1);
1158             //PrintDebug("VMX core %u: still waiting for INIT\n",info->vcpu_id);
1159         }
1160         
1161         PrintDebug("VMX core %u initialized\n", info->vcpu_id);
1162
1163         // We'll be paranoid about race conditions here
1164         v3_wait_at_barrier(info);
1165     }
1166
1167
1168     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
1169                info->vcpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
1170                info->segments.cs.limit, (void *)(info->rip));
1171
1172
1173     PrintDebug("VMX core %u: Launching VMX VM on logical core %u\n", info->vcpu_id, info->pcpu_id);
1174
1175     v3_start_time(info);
1176
1177     while (1) {
1178
1179         if (info->vm_info->run_state == VM_STOPPED) {
1180             info->core_run_state = CORE_STOPPED;
1181             break;
1182         }
1183
1184         if (v3_vmx_enter(info) == -1) {
1185
1186             addr_t host_addr;
1187             addr_t linear_addr = 0;
1188             
1189             info->vm_info->run_state = VM_ERROR;
1190             
1191             V3_Print("VMX core %u: VMX ERROR!!\n", info->vcpu_id); 
1192             
1193             v3_print_guest_state(info);
1194             
1195             V3_Print("VMX core %u\n", info->vcpu_id); 
1196
1197             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1198             
1199             if (info->mem_mode == PHYSICAL_MEM) {
1200                 v3_gpa_to_hva(info, linear_addr, &host_addr);
1201             } else if (info->mem_mode == VIRTUAL_MEM) {
1202                 v3_gva_to_hva(info, linear_addr, &host_addr);
1203             }
1204             
1205             V3_Print("VMX core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1206             
1207             V3_Print("VMX core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1208             v3_dump_mem((uint8_t *)host_addr, 15);
1209             
1210             v3_print_stack(info);
1211
1212
1213             v3_print_vmcs();
1214             print_exit_log(info);
1215             return -1;
1216         }
1217
1218         v3_wait_at_barrier(info);
1219
1220
1221         if (info->vm_info->run_state == VM_STOPPED) {
1222             info->core_run_state = CORE_STOPPED;
1223             break;
1224         }
1225 /*
1226         if ((info->num_exits % 5000) == 0) {
1227             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
1228         }
1229 */
1230
1231     }
1232
1233     return 0;
1234 }
1235
1236
1237
1238
1239 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
1240 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
1241 #define CPUID_1_ECX_VTXFLAG 0x00000020
1242
1243 int v3_is_vmx_capable() {
1244     v3_msr_t feature_msr;
1245     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1246
1247     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
1248
1249     PrintDebug("ECX: 0x%x\n", ecx);
1250
1251     if (ecx & CPUID_1_ECX_VTXFLAG) {
1252         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
1253         
1254         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
1255
1256         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
1257             PrintDebug("VMX is locked -- enable in the BIOS\n");
1258             return 0;
1259         }
1260
1261     } else {
1262         PrintDebug("VMX not supported on this cpu\n");
1263         return 0;
1264     }
1265
1266     return 1;
1267 }
1268
1269
1270 int v3_reset_vmx_vm_core(struct guest_info * core, addr_t rip) {
1271     // init vmcs bios
1272     
1273     if ((core->shdw_pg_mode == NESTED_PAGING) && 
1274         (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
1275         // easy 
1276         core->rip = 0;
1277         core->segments.cs.selector = rip << 8;
1278         core->segments.cs.limit = 0xffff;
1279         core->segments.cs.base = rip << 12;
1280     } else {
1281         core->vm_regs.rdx = core->vcpu_id;
1282         core->vm_regs.rbx = rip;
1283     }
1284
1285     return 0;
1286 }
1287
1288
1289
1290 void v3_init_vmx_cpu(int cpu_id) {
1291     addr_t vmx_on_region = 0;
1292     extern v3_cpu_arch_t v3_mach_type;
1293     extern v3_cpu_arch_t v3_cpu_types[];
1294
1295     if (v3_mach_type == V3_INVALID_CPU) {
1296         if (v3_init_vmx_hw(&hw_info) == -1) {
1297             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
1298             return;
1299         }
1300     }
1301
1302     enable_vmx();
1303
1304
1305     // Setup VMXON Region
1306     vmx_on_region = allocate_vmcs();
1307
1308
1309     if (vmx_on(vmx_on_region) == VMX_SUCCESS) {
1310         V3_Print("VMX Enabled\n");
1311         host_vmcs_ptrs[cpu_id] = vmx_on_region;
1312     } else {
1313         V3_Print("VMX already enabled\n");
1314         V3_FreePages((void *)vmx_on_region, 1);
1315     }
1316
1317     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);    
1318
1319     {
1320         struct vmx_sec_proc_ctrls sec_proc_ctrls;
1321         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
1322         
1323         if (sec_proc_ctrls.enable_ept == 0) {
1324             V3_Print("VMX EPT (Nested) Paging not supported\n");
1325             v3_cpu_types[cpu_id] = V3_VMX_CPU;
1326         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
1327             V3_Print("VMX EPT (Nested) Paging supported\n");
1328             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
1329         } else {
1330             V3_Print("VMX EPT (Nested) Paging + Unrestricted guest supported\n");
1331             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
1332         }
1333     }
1334     
1335 }
1336
1337
1338 void v3_deinit_vmx_cpu(int cpu_id) {
1339     extern v3_cpu_arch_t v3_cpu_types[];
1340     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1341
1342     if (host_vmcs_ptrs[cpu_id] != 0) {
1343         V3_Print("Disabling VMX\n");
1344
1345         if (vmx_off() != VMX_SUCCESS) {
1346             PrintError("Error executing VMXOFF\n");
1347         }
1348
1349         V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
1350
1351         host_vmcs_ptrs[cpu_id] = 0;
1352     }
1353 }