Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


APIC and CR8 changes for vector priorization vs TPR
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_barrier.h>
36 #include <palacios/vmm_timeout.h>
37 #include <palacios/vmm_debug.h>
38
39 #ifdef V3_CONFIG_CHECKPOINT
40 #include <palacios/vmm_checkpoint.h>
41 #endif
42
43 #include <palacios/vmx_ept.h>
44 #include <palacios/vmx_assist.h>
45 #include <palacios/vmx_hw_info.h>
46
47 #ifndef V3_CONFIG_DEBUG_VMX
48 #undef PrintDebug
49 #define PrintDebug(fmt, args...)
50 #endif
51
52
53 /* These fields contain the hardware feature sets supported by the local CPU */
54 static struct vmx_hw_info hw_info;
55
56 extern v3_cpu_arch_t v3_mach_type;
57
58 static addr_t host_vmcs_ptrs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
59
60 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
61 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
62
63 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
64     int ret = 0;
65
66     ret = vmcs_write(field, val);
67
68     if (ret != VMX_SUCCESS) {
69         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
70         return 1;
71     }
72
73
74     
75
76     return 0;
77 }
78
79 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
80     int ret = 0;
81
82     ret = vmcs_read(field, val);
83
84     if (ret != VMX_SUCCESS) {
85         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
86     }
87
88     return ret;
89 }
90
91
92
93
94 static addr_t allocate_vmcs() {
95     void *temp;
96     struct vmcs_data * vmcs_page = NULL;
97
98     PrintDebug("Allocating page\n");
99
100     temp = V3_AllocPages(1);
101     if (!temp) { 
102         PrintError("Cannot allocate VMCS\n");
103         return -1;
104     }
105     vmcs_page = (struct vmcs_data *)V3_VAddr(temp);
106     memset(vmcs_page, 0, 4096);
107
108     vmcs_page->revision = hw_info.basic_info.revision;
109     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
110
111     return (addr_t)V3_PAddr((void *)vmcs_page);
112 }
113
114
115 #if 0
116 static int debug_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * src, void * priv_data) {
117     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
118     V3_Print("\n\nEFER READ (val = %p)\n", (void *)efer->value);
119     
120     v3_print_guest_state(core);
121     v3_print_vmcs();
122
123
124     src->value = efer->value;
125     return 0;
126 }
127
128 static int debug_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
129     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
130     V3_Print("\n\nEFER WRITE (old_val = %p) (new_val = %p)\n", (void *)efer->value, (void *)src.value);
131     
132     v3_print_guest_state(core);
133     v3_print_vmcs();
134
135     efer->value = src.value;
136
137     return 0;
138 }
139 #endif
140
141
142 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
143     int vmx_ret = 0;
144
145     /* Get Available features */
146     struct vmx_pin_ctrls avail_pin_ctrls;
147     avail_pin_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.pin_ctrls));
148     /* ** */
149
150
151     // disable global interrupts for vm state initialization
152     v3_disable_ints();
153
154     PrintDebug("Loading VMCS\n");
155     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
156     vmx_state->state = VMX_UNLAUNCHED;
157
158     if (vmx_ret != VMX_SUCCESS) {
159         PrintError("VMPTRLD failed\n");
160         return -1;
161     }
162
163
164     /*** Setup default state from HW ***/
165
166     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
167     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
168     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
169     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
170     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
171
172     /* Print Control MSRs */
173     V3_Print("CR0 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr0.req_val, (void *)(addr_t)hw_info.cr0.req_mask);
174     V3_Print("CR4 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr4.req_val, (void *)(addr_t)hw_info.cr4.req_mask);
175
176
177
178     /******* Setup Host State **********/
179
180     /* Cache GDTR, IDTR, and TR in host struct */
181
182
183     /********** Setup VMX Control Fields ***********/
184
185     /* Add external interrupts, NMI exiting, and virtual NMI */
186     vmx_state->pin_ctrls.nmi_exit = 1;
187     vmx_state->pin_ctrls.ext_int_exit = 1;
188
189
190
191     /* We enable the preemption timer by default to measure accurate guest time */
192     if (avail_pin_ctrls.active_preempt_timer) {
193         V3_Print("VMX Preemption Timer is available\n");
194         vmx_state->pin_ctrls.active_preempt_timer = 1;
195         vmx_state->exit_ctrls.save_preempt_timer = 1;
196     }
197
198     vmx_state->pri_proc_ctrls.hlt_exit = 1;
199
200
201     vmx_state->pri_proc_ctrls.pause_exit = 0;
202     vmx_state->pri_proc_ctrls.tsc_offset = 1;
203 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
204     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
205 #endif
206
207     /* Setup IO map */
208     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
209     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
210     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
211             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
212
213
214     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
215     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
216
217
218
219 #ifdef __V3_64BIT__
220     // Ensure host runs in 64-bit mode at each VM EXIT
221     vmx_state->exit_ctrls.host_64_on = 1;
222 #endif
223
224
225
226     // Restore host's EFER register on each VM EXIT
227     vmx_state->exit_ctrls.ld_efer = 1;
228
229     // Save/restore guest's EFER register to/from VMCS on VM EXIT/ENTRY
230     vmx_state->exit_ctrls.save_efer = 1;
231     vmx_state->entry_ctrls.ld_efer  = 1;
232
233     vmx_state->exit_ctrls.save_pat = 1;
234     vmx_state->exit_ctrls.ld_pat = 1;
235     vmx_state->entry_ctrls.ld_pat = 1;
236
237     /* Temporary GPF trap */
238     //  vmx_state->excp_bmap.gp = 1;
239
240     // Setup Guests initial PAT field
241     vmx_ret |= check_vmcs_write(VMCS_GUEST_PAT, 0x0007040600070406LL);
242
243     // Capture CR8 mods so that we can keep the apic_tpr correct
244     vmx_state->pri_proc_ctrls.cr8_ld_exit = 1;
245     vmx_state->pri_proc_ctrls.cr8_str_exit = 1;
246
247
248     /* Setup paging */
249     if (core->shdw_pg_mode == SHADOW_PAGING) {
250         PrintDebug("Creating initial shadow page table\n");
251
252         if (v3_init_passthrough_pts(core) == -1) {
253             PrintError("Could not initialize passthrough page tables\n");
254             return -1;
255         }
256         
257 #define CR0_PE 0x00000001
258 #define CR0_PG 0x80000000
259 #define CR0_WP 0x00010000 // To ensure mem hooks work
260 #define CR0_NE 0x00000020
261         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
262
263
264         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
265         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
266
267         core->ctrl_regs.cr3 = core->direct_map_pt;
268
269         // vmx_state->pinbased_ctrls |= NMI_EXIT;
270
271         /* Add CR exits */
272         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
273         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
274         
275         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
276         
277         /* Add page fault exits */
278         vmx_state->excp_bmap.pf = 1;
279
280         // Setup VMX Assist
281         v3_vmxassist_init(core, vmx_state);
282
283         // Hook all accesses to EFER register
284         v3_hook_msr(core->vm_info, EFER_MSR, 
285                     &v3_handle_efer_read,
286                     &v3_handle_efer_write, 
287                     core);
288
289     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
290                (v3_mach_type == V3_VMX_EPT_CPU)) {
291
292 #define CR0_PE 0x00000001
293 #define CR0_PG 0x80000000
294 #define CR0_WP 0x00010000 // To ensure mem hooks work
295 #define CR0_NE 0x00000020
296         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
297
298         // vmx_state->pinbased_ctrls |= NMI_EXIT;
299
300         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
301         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
302         
303         /* Disable CR exits */
304         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
305         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
306
307         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
308
309         /* Add page fault exits */
310         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
311         
312         // Setup VMX Assist
313         v3_vmxassist_init(core, vmx_state);
314
315         /* Enable EPT */
316         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
317         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
318
319
320
321         if (v3_init_ept(core, &hw_info) == -1) {
322             PrintError("Error initializing EPT\n");
323             return -1;
324         }
325
326         // Hook all accesses to EFER register
327         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
328
329     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
330                (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
331         int i = 0;
332         // For now we will assume that unrestricted guest mode is assured w/ EPT
333
334
335         core->vm_regs.rsp = 0x00;
336         core->rip = 0xfff0;
337         core->vm_regs.rdx = 0x00000f00;
338         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
339         core->ctrl_regs.cr0 = 0x60010030; 
340         core->ctrl_regs.cr4 = 0x00002010; // Enable VMX and PSE flag
341         
342
343         core->segments.cs.selector = 0xf000;
344         core->segments.cs.limit = 0xffff;
345         core->segments.cs.base = 0x0000000f0000LL;
346
347         // (raw attributes = 0xf3)
348         core->segments.cs.type = 0xb;
349         core->segments.cs.system = 0x1;
350         core->segments.cs.dpl = 0x0;
351         core->segments.cs.present = 1;
352
353
354
355         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
356                                           &(core->segments.es), &(core->segments.fs), 
357                                           &(core->segments.gs), NULL};
358
359         for ( i = 0; segregs[i] != NULL; i++) {
360             struct v3_segment * seg = segregs[i];
361         
362             seg->selector = 0x0000;
363             //    seg->base = seg->selector << 4;
364             seg->base = 0x00000000;
365             seg->limit = 0xffff;
366
367
368             seg->type = 0x3;
369             seg->system = 0x1;
370             seg->dpl = 0x0;
371             seg->present = 1;
372             //    seg->granularity = 1;
373
374         }
375
376
377         core->segments.gdtr.limit = 0x0000ffff;
378         core->segments.gdtr.base = 0x0000000000000000LL;
379
380         core->segments.idtr.limit = 0x0000ffff;
381         core->segments.idtr.base = 0x0000000000000000LL;
382
383         core->segments.ldtr.selector = 0x0000;
384         core->segments.ldtr.limit = 0x0000ffff;
385         core->segments.ldtr.base = 0x0000000000000000LL;
386         core->segments.ldtr.type = 0x2;
387         core->segments.ldtr.present = 1;
388
389         core->segments.tr.selector = 0x0000;
390         core->segments.tr.limit = 0x0000ffff;
391         core->segments.tr.base = 0x0000000000000000LL;
392         core->segments.tr.type = 0xb;
393         core->segments.tr.present = 1;
394
395         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
396         core->dbg_regs.dr7 = 0x0000000000000400LL;
397
398         /* Enable EPT */
399         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
400         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
401         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
402
403
404         /* Disable shadow paging stuff */
405         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
406         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
407
408         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
409
410
411         // Cause VM_EXIT whenever the CR4.VMXE bit is set
412         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
413 #define CR0_NE 0x00000020
414         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, CR0_NE);
415         ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->ne = 1;
416
417         if (v3_init_ept(core, &hw_info) == -1) {
418             PrintError("Error initializing EPT\n");
419             return -1;
420         }
421
422         // Hook all accesses to EFER register
423         //      v3_hook_msr(core->vm_info, EFER_MSR, &debug_efer_read, &debug_efer_write, core);
424         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
425     } else {
426         PrintError("Invalid Virtual paging mode (pg_mode=%d) (mach_type=%d)\n", core->shdw_pg_mode, v3_mach_type);
427         return -1;
428     }
429
430
431     // hook vmx msrs
432
433     // Setup SYSCALL/SYSENTER MSRs in load/store area
434     
435     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
436     {
437
438         struct vmcs_msr_save_area * msr_entries = NULL;
439         int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
440         int msr_ret = 0;
441
442         V3_Print("Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
443
444         if (max_msrs < 4) {
445             PrintError("Max MSR cache size is too small (%d)\n", max_msrs);
446             return -1;
447         }
448
449         vmx_state->msr_area_paddr = (addr_t)V3_AllocPages(1);
450         
451         if (vmx_state->msr_area_paddr == (addr_t)NULL) {
452             PrintError("could not allocate msr load/store area\n");
453             return -1;
454         }
455
456         msr_entries = (struct vmcs_msr_save_area *)V3_VAddr((void *)(vmx_state->msr_area_paddr));
457         vmx_state->msr_area = msr_entries; // cache in vmx_info
458
459         memset(msr_entries, 0, PAGE_SIZE);
460
461         msr_entries->guest_star.index = IA32_STAR_MSR;
462         msr_entries->guest_lstar.index = IA32_LSTAR_MSR;
463         msr_entries->guest_fmask.index = IA32_FMASK_MSR;
464         msr_entries->guest_kern_gs.index = IA32_KERN_GS_BASE_MSR;
465
466         msr_entries->host_star.index = IA32_STAR_MSR;
467         msr_entries->host_lstar.index = IA32_LSTAR_MSR;
468         msr_entries->host_fmask.index = IA32_FMASK_MSR;
469         msr_entries->host_kern_gs.index = IA32_KERN_GS_BASE_MSR;
470
471         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
472         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
473         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
474
475         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
476         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
477         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->host_msrs));
478
479
480         msr_ret |= v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
481         msr_ret |= v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
482         msr_ret |= v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
483         msr_ret |= v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
484
485
486         // IMPORTANT: These MSRs appear to be cached by the hardware....
487         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
488         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
489         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
490
491         msr_ret |= v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
492         msr_ret |= v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
493
494         msr_ret |= v3_hook_msr(core->vm_info, IA32_PAT_MSR, NULL, NULL, NULL);
495
496         // Not sure what to do about this... Does not appear to be an explicit hardware cache version...
497         msr_ret |= v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
498
499         if (msr_ret != 0) {
500             PrintError("Error configuring MSR save/restore area\n");
501             return -1;
502         }
503
504
505     }    
506
507     /* Sanity check ctrl/reg fields against hw_defaults */
508
509
510
511
512     /*** Write all the info to the VMCS ***/
513   
514     /*
515     {
516         // IS THIS NECESSARY???
517 #define DEBUGCTL_MSR 0x1d9
518         struct v3_msr tmp_msr;
519         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
520         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
521         core->dbg_regs.dr7 = 0x400;
522     }
523     */
524
525 #ifdef __V3_64BIT__
526     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
527 #else
528     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
529     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
530 #endif
531
532
533
534  
535
536     if (v3_update_vmcs_ctrl_fields(core)) {
537         PrintError("Could not write control fields!\n");
538         return -1;
539     }
540     
541     /*
542     if (v3_update_vmcs_host_state(core)) {
543         PrintError("Could not write host state\n");
544         return -1;
545     }
546     */
547
548     // reenable global interrupts for vm state initialization now
549     // that the vm state is initialized. If another VM kicks us off, 
550     // it'll update our vmx state so that we know to reload ourself
551     v3_enable_ints();
552
553     return 0;
554 }
555
556
557 static void __init_vmx_vmcs(void * arg) {
558     struct guest_info * core = arg;
559     struct vmx_data * vmx_state = NULL;
560     int vmx_ret = 0;
561     
562     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
563
564     if (!vmx_state) {
565         PrintError("Unable to allocate in initializing vmx vmcs\n");
566         return;
567     }
568
569     memset(vmx_state, 0, sizeof(struct vmx_data));
570
571     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
572
573     PrintDebug("Allocating VMCS\n");
574     vmx_state->vmcs_ptr_phys = allocate_vmcs();
575
576     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
577
578     core->vmm_data = vmx_state;
579     vmx_state->state = VMX_UNLAUNCHED;
580
581     PrintDebug("Initializing VMCS (addr=%p)\n", core->vmm_data);
582     
583     // TODO: Fix vmcs fields so they're 32-bit
584
585     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
586     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
587
588     if (vmx_ret != VMX_SUCCESS) {
589         PrintError("VMCLEAR failed\n");
590         return; 
591     }
592
593     if (core->vm_info->vm_class == V3_PC_VM) {
594         PrintDebug("Initializing VMCS\n");
595         if (init_vmcs_bios(core, vmx_state) == -1) {
596             PrintError("Error initializing VMCS to BIOS state\n");
597             return;
598         }
599     } else {
600         PrintError("Invalid VM Class\n");
601         return;
602     }
603
604     PrintDebug("Serializing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
605     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
606
607     core->core_run_state = CORE_STOPPED;
608     return;
609 }
610
611
612
613 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
614     extern v3_cpu_arch_t v3_cpu_types[];
615
616     if (v3_cpu_types[V3_Get_CPU()] == V3_INVALID_CPU) {
617         int i = 0;
618
619         for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
620             if (v3_cpu_types[i] != V3_INVALID_CPU) {
621                 break;
622             }
623         }
624
625         if (i == V3_CONFIG_MAX_CPUS) {
626             PrintError("Could not find VALID CPU for VMX guest initialization\n");
627             return -1;
628         }
629
630         V3_Call_On_CPU(i, __init_vmx_vmcs, core);
631
632     } else {
633         __init_vmx_vmcs(core);
634     }
635
636     if (core->core_run_state != CORE_STOPPED) {
637         PrintError("Error initializing VMX Core\n");
638         return -1;
639     }
640
641     return 0;
642 }
643
644
645 int v3_deinit_vmx_vmcs(struct guest_info * core) {
646     struct vmx_data * vmx_state = core->vmm_data;
647
648     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
649     V3_FreePages(V3_PAddr(vmx_state->msr_area), 1);
650
651     V3_Free(vmx_state);
652
653     return 0;
654 }
655
656
657
658 #ifdef V3_CONFIG_CHECKPOINT
659 /* 
660  * JRL: This is broken
661  */
662 int v3_vmx_save_core(struct guest_info * core, void * ctx){
663     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
664
665     // note that the vmcs pointer is an HPA, but we need an HVA
666     if (v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE_4KB, 
667                       V3_VAddr((void*) (vmx_info->vmcs_ptr_phys))) ==-1) {
668         PrintError("Could not save vmcs data for VMX\n");
669         return -1;
670     }
671
672     return 0;
673 }
674
675 int v3_vmx_load_core(struct guest_info * core, void * ctx){
676     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
677     struct cr0_32 * shadow_cr0;
678     addr_t vmcs_page_paddr;  //HPA
679
680     vmcs_page_paddr = (addr_t) V3_AllocPages(1);
681     
682     if (!vmcs_page_paddr) { 
683         PrintError("Could not allocate space for a vmcs in VMX\n");
684         return -1;
685     }
686
687     if (v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, 
688                       V3_VAddr((void *)vmcs_page_paddr)) == -1) { 
689         PrintError("Could not load vmcs data for VMX\n");
690         return -1;
691     }
692
693     vmcs_clear(vmx_info->vmcs_ptr_phys);
694
695     // Probably need to delete the old one... 
696     V3_FreePages((void*)(vmx_info->vmcs_ptr_phys),1);
697
698     vmcs_load(vmcs_page_paddr);
699
700     v3_vmx_save_vmcs(core);
701
702     shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
703
704
705     /* Get the CPU mode to set the guest_ia32e entry ctrl */
706
707     if (core->shdw_pg_mode == SHADOW_PAGING) {
708         if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
709             if (v3_activate_shadow_pt(core) == -1) {
710                 PrintError("Failed to activate shadow page tables\n");
711                 return -1;
712             }
713         } else {
714             if (v3_activate_passthrough_pt(core) == -1) {
715                 PrintError("Failed to activate passthrough page tables\n");
716                 return -1;
717             }
718         }
719     }
720
721     return 0;
722 }
723 #endif
724
725
726 void v3_flush_vmx_vm_core(struct guest_info * core) {
727     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
728     vmcs_clear(vmx_info->vmcs_ptr_phys);
729     vmx_info->state = VMX_UNLAUNCHED;
730 }
731
732
733
734 static int update_irq_exit_state(struct guest_info * info) {
735     struct vmx_exit_idt_vec_info idt_vec_info;
736
737     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
738
739     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
740 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
741         V3_Print("Calling v3_injecting_intr\n");
742 #endif
743         info->intr_core_state.irq_started = 0;
744         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
745     }
746
747     return 0;
748 }
749
750 static int update_irq_entry_state(struct guest_info * info) {
751     struct vmx_exit_idt_vec_info idt_vec_info;
752     struct vmcs_interrupt_state intr_core_state;
753     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
754
755     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
756     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
757
758     /* Check for pending exceptions to inject */
759     if (v3_excp_pending(info)) {
760         struct vmx_entry_int_info int_info;
761         int_info.value = 0;
762
763         // In VMX, almost every exception is hardware
764         // Software exceptions are pretty much only for breakpoint or overflow
765         int_info.type = 3;
766         int_info.vector = v3_get_excp_number(info);
767
768         if (info->excp_state.excp_error_code_valid) {
769             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
770             int_info.error_code = 1;
771
772 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
773             V3_Print("Injecting exception %d with error code %x\n", 
774                     int_info.vector, info->excp_state.excp_error_code);
775 #endif
776         }
777
778         int_info.valid = 1;
779 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
780         V3_Print("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
781 #endif
782         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
783
784         v3_injecting_excp(info, int_info.vector);
785
786     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
787                (intr_core_state.val == 0)) {
788        
789         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
790
791 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
792             V3_Print("IRQ pending from previous injection\n");
793 #endif
794
795             // Copy the IDT vectoring info over to reinject the old interrupt
796             if (idt_vec_info.error_code == 1) {
797                 uint32_t err_code = 0;
798
799                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
800                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
801             }
802
803             idt_vec_info.undef = 0;
804             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
805
806         } else {
807             struct vmx_entry_int_info ent_int;
808             ent_int.value = 0;
809
810             switch (v3_intr_pending(info)) {
811                 case V3_EXTERNAL_IRQ: {
812                     info->intr_core_state.irq_vector = v3_get_intr(info); 
813                     ent_int.vector = info->intr_core_state.irq_vector;
814                     ent_int.type = 0;
815                     ent_int.error_code = 0;
816                     ent_int.valid = 1;
817
818 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
819                     V3_Print("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
820                                info->intr_core_state.irq_vector, 
821                                (uint32_t)info->num_exits, 
822                                (void *)(addr_t)info->rip);
823 #endif
824
825                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
826                     info->intr_core_state.irq_started = 1;
827
828                     break;
829                 }
830                 case V3_NMI:
831                     PrintDebug("Injecting NMI\n");
832
833                     ent_int.type = 2;
834                     ent_int.vector = 2;
835                     ent_int.valid = 1;
836                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
837
838                     break;
839                 case V3_SOFTWARE_INTR:
840                     PrintDebug("Injecting software interrupt\n");
841                     ent_int.type = 4;
842
843                     ent_int.valid = 1;
844                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
845
846                     break;
847                 case V3_VIRTUAL_IRQ:
848                     // Not sure what to do here, Intel doesn't have virtual IRQs
849                     // May be the same as external interrupts/IRQs
850
851                     break;
852                 case V3_INVALID_INTR:
853                 default:
854                     break;
855             }
856         }
857     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
858         // Enable INTR window exiting so we know when IF=1
859         uint32_t instr_len;
860
861         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
862
863 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
864         V3_Print("Enabling Interrupt-Window exiting: %d\n", instr_len);
865 #endif
866
867         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
868         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
869     }
870
871
872     return 0;
873 }
874
875
876
877 static struct vmx_exit_info exit_log[10];
878 static uint64_t rip_log[10];
879
880
881
882 static void print_exit_log(struct guest_info * info) {
883     int cnt = info->num_exits % 10;
884     int i = 0;
885     
886
887     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
888
889     for (i = 0; i < 10; i++) {
890         struct vmx_exit_info * tmp = &exit_log[cnt];
891
892         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
893         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
894         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
895         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
896         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
897         V3_Print("\tguest_linear_addr= %p\n", (void *)(addr_t)tmp->guest_linear_addr);
898         V3_Print("\tRIP = %p\n", (void *)rip_log[cnt]);
899
900
901         cnt--;
902
903         if (cnt == -1) {
904             cnt = 9;
905         }
906
907     }
908
909 }
910
911 int 
912 v3_vmx_config_tsc_virtualization(struct guest_info * info) {
913     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
914
915     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
916         if  (!vmx_info->pri_proc_ctrls.rdtsc_exit) {
917             vmx_info->pri_proc_ctrls.rdtsc_exit = 1;
918             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
919         }
920     } else {
921         sint64_t tsc_offset;
922         uint32_t tsc_offset_low, tsc_offset_high;
923
924         if  (vmx_info->pri_proc_ctrls.rdtsc_exit) {
925             vmx_info->pri_proc_ctrls.rdtsc_exit = 0;
926             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
927         }
928
929         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
930             tsc_offset = 0;
931         } else {
932             tsc_offset = v3_tsc_host_offset(&info->time_state);
933         }
934         tsc_offset_high = (uint32_t)(( tsc_offset >> 32) & 0xffffffff);
935         tsc_offset_low = (uint32_t)(tsc_offset & 0xffffffff);
936
937         check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
938         check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
939     }
940     return 0;
941 }
942
943 /* 
944  * CAUTION and DANGER!!! 
945  * 
946  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
947  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
948  * on its contents will cause things to break. The contents at the time of the exit WILL 
949  * change before the exit handler is executed.
950  */
951 int v3_vmx_enter(struct guest_info * info) {
952     int ret = 0;
953     struct vmx_exit_info exit_info;
954     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
955     uint64_t guest_cycles = 0;
956
957     // Conditionally yield the CPU if the timeslice has expired
958     v3_yield_cond(info,-1);
959
960     // Update timer devices late after being in the VM so that as much 
961     // of the time in the VM is accounted for as possible. Also do it before
962     // updating IRQ entry state so that any interrupts the timers raise get 
963     // handled on the next VM entry.
964     v3_advance_time(info, NULL);
965     v3_update_timers(info);
966
967     // disable global interrupts for vm state transition
968     v3_disable_ints();
969
970     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
971         vmcs_clear(vmx_info->vmcs_ptr_phys);
972         vmcs_load(vmx_info->vmcs_ptr_phys);
973         vmx_info->state = VMX_UNLAUNCHED;
974     }
975
976     v3_vmx_restore_vmcs(info);
977
978
979 #ifdef V3_CONFIG_SYMCALL
980     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
981         update_irq_entry_state(info);
982     }
983 #else 
984     update_irq_entry_state(info);
985 #endif
986
987     {
988         addr_t guest_cr3;
989         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
990         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
991     }
992
993
994     // Perform last-minute time setup prior to entering the VM
995     v3_vmx_config_tsc_virtualization(info);
996
997     if (v3_update_vmcs_host_state(info)) {
998         v3_enable_ints();
999         PrintError("Could not write host state\n");
1000         return -1;
1001     }
1002     
1003     if (vmx_info->pin_ctrls.active_preempt_timer) {
1004         /* Preemption timer is active */
1005         uint32_t preempt_window = 0xffffffff;
1006
1007         if (info->timeouts.timeout_active) {
1008             preempt_window = info->timeouts.next_timeout;
1009         }
1010         
1011         check_vmcs_write(VMCS_PREEMPT_TIMER, preempt_window);
1012     }
1013    
1014
1015     {   
1016         uint64_t entry_tsc = 0;
1017         uint64_t exit_tsc = 0;
1018
1019         if (vmx_info->state == VMX_UNLAUNCHED) {
1020             vmx_info->state = VMX_LAUNCHED;
1021             rdtscll(entry_tsc);
1022             ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
1023             rdtscll(exit_tsc);
1024
1025         } else {
1026             V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
1027             rdtscll(entry_tsc);
1028             ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
1029             rdtscll(exit_tsc);
1030         }
1031
1032         guest_cycles = exit_tsc - entry_tsc;    
1033     }
1034
1035     //  PrintDebug("VMX Exit: ret=%d\n", ret);
1036
1037     if (ret != VMX_SUCCESS) {
1038         uint32_t error = 0;
1039         vmcs_read(VMCS_INSTR_ERR, &error);
1040
1041         v3_enable_ints();
1042
1043         PrintError("VMENTRY Error: %d (launch_ret = %d)\n", error, ret);
1044         return -1;
1045     }
1046
1047
1048     info->num_exits++;
1049
1050     /* If we have the preemption time, then use it to get more accurate guest time */
1051     if (vmx_info->pin_ctrls.active_preempt_timer) {
1052         uint32_t cycles_left = 0;
1053         check_vmcs_read(VMCS_PREEMPT_TIMER, &(cycles_left));
1054
1055         if (info->timeouts.timeout_active) {
1056             guest_cycles = info->timeouts.next_timeout - cycles_left;
1057         } else {
1058             guest_cycles = 0xffffffff - cycles_left;
1059         }
1060     }
1061
1062     // Immediate exit from VM time bookkeeping
1063     v3_advance_time(info, &guest_cycles);
1064
1065     /* Update guest state */
1066     v3_vmx_save_vmcs(info);
1067
1068     // info->cpl = info->segments.cs.selector & 0x3;
1069
1070     info->mem_mode = v3_get_vm_mem_mode(info);
1071     info->cpu_mode = v3_get_vm_cpu_mode(info);
1072
1073
1074
1075     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
1076     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
1077     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
1078     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
1079     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
1080     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
1081     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
1082
1083     if (info->shdw_pg_mode == NESTED_PAGING) {
1084         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
1085     }
1086
1087     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
1088
1089     exit_log[info->num_exits % 10] = exit_info;
1090     rip_log[info->num_exits % 10] = get_addr_linear(info, info->rip, &(info->segments.cs));
1091
1092 #ifdef V3_CONFIG_SYMCALL
1093     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
1094         update_irq_exit_state(info);
1095     }
1096 #else
1097     update_irq_exit_state(info);
1098 #endif
1099
1100     if (exit_info.exit_reason == VMX_EXIT_INTR_WINDOW) {
1101         // This is a special case whose only job is to inject an interrupt
1102         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
1103         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
1104         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
1105
1106 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
1107        V3_Print("Interrupts available again! (RIP=%llx)\n", info->rip);
1108 #endif
1109     }
1110
1111     // reenable global interrupts after vm exit
1112     v3_enable_ints();
1113
1114     // Conditionally yield the CPU if the timeslice has expired
1115     v3_yield_cond(info,-1);
1116     v3_advance_time(info, NULL);
1117     v3_update_timers(info);
1118
1119     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
1120         PrintError("Error in VMX exit handler (Exit reason=%x)\n", exit_info.exit_reason);
1121         return -1;
1122     }
1123
1124     if (info->timeouts.timeout_active) {
1125         /* Check to see if any timeouts have expired */
1126         v3_handle_timeouts(info, guest_cycles);
1127     }
1128
1129     return 0;
1130 }
1131
1132
1133 int v3_start_vmx_guest(struct guest_info * info) {
1134
1135     PrintDebug("Starting VMX core %u\n", info->vcpu_id);
1136
1137     if (info->vcpu_id == 0) {
1138         info->core_run_state = CORE_RUNNING;
1139     } else {
1140
1141         PrintDebug("VMX core %u: Waiting for core initialization\n", info->vcpu_id);
1142
1143         while (info->core_run_state == CORE_STOPPED) {
1144
1145             if (info->vm_info->run_state == VM_STOPPED) {
1146                 // The VM was stopped before this core was initialized. 
1147                 return 0;
1148             }
1149
1150             v3_yield(info,-1);
1151             //PrintDebug("VMX core %u: still waiting for INIT\n",info->vcpu_id);
1152         }
1153         
1154         PrintDebug("VMX core %u initialized\n", info->vcpu_id);
1155
1156         // We'll be paranoid about race conditions here
1157         v3_wait_at_barrier(info);
1158     }
1159
1160
1161     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
1162                info->vcpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
1163                info->segments.cs.limit, (void *)(info->rip));
1164
1165
1166     PrintDebug("VMX core %u: Launching VMX VM on logical core %u\n", info->vcpu_id, info->pcpu_id);
1167
1168     v3_start_time(info);
1169
1170     while (1) {
1171
1172         if (info->vm_info->run_state == VM_STOPPED) {
1173             info->core_run_state = CORE_STOPPED;
1174             break;
1175         }
1176
1177         if (v3_vmx_enter(info) == -1) {
1178
1179             addr_t host_addr;
1180             addr_t linear_addr = 0;
1181             
1182             info->vm_info->run_state = VM_ERROR;
1183             
1184             V3_Print("VMX core %u: VMX ERROR!!\n", info->vcpu_id); 
1185             
1186             v3_print_guest_state(info);
1187             
1188             V3_Print("VMX core %u\n", info->vcpu_id); 
1189
1190             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1191             
1192             if (info->mem_mode == PHYSICAL_MEM) {
1193                 v3_gpa_to_hva(info, linear_addr, &host_addr);
1194             } else if (info->mem_mode == VIRTUAL_MEM) {
1195                 v3_gva_to_hva(info, linear_addr, &host_addr);
1196             }
1197             
1198             V3_Print("VMX core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1199             
1200             V3_Print("VMX core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1201             v3_dump_mem((uint8_t *)host_addr, 15);
1202             
1203             v3_print_stack(info);
1204
1205
1206             v3_print_vmcs();
1207             print_exit_log(info);
1208             return -1;
1209         }
1210
1211         v3_wait_at_barrier(info);
1212
1213
1214         if (info->vm_info->run_state == VM_STOPPED) {
1215             info->core_run_state = CORE_STOPPED;
1216             break;
1217         }
1218 /*
1219         if ((info->num_exits % 5000) == 0) {
1220             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
1221         }
1222 */
1223
1224     }
1225
1226     return 0;
1227 }
1228
1229
1230
1231
1232 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
1233 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
1234 #define CPUID_1_ECX_VTXFLAG 0x00000020
1235
1236 int v3_is_vmx_capable() {
1237     v3_msr_t feature_msr;
1238     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1239
1240     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
1241
1242     PrintDebug("ECX: 0x%x\n", ecx);
1243
1244     if (ecx & CPUID_1_ECX_VTXFLAG) {
1245         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
1246         
1247         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
1248
1249         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
1250             PrintDebug("VMX is locked -- enable in the BIOS\n");
1251             return 0;
1252         }
1253
1254     } else {
1255         PrintDebug("VMX not supported on this cpu\n");
1256         return 0;
1257     }
1258
1259     return 1;
1260 }
1261
1262
1263 int v3_reset_vmx_vm_core(struct guest_info * core, addr_t rip) {
1264     // init vmcs bios
1265     
1266     if ((core->shdw_pg_mode == NESTED_PAGING) && 
1267         (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
1268         // easy 
1269         core->rip = 0;
1270         core->segments.cs.selector = rip << 8;
1271         core->segments.cs.limit = 0xffff;
1272         core->segments.cs.base = rip << 12;
1273     } else {
1274         core->vm_regs.rdx = core->vcpu_id;
1275         core->vm_regs.rbx = rip;
1276     }
1277
1278     return 0;
1279 }
1280
1281
1282
1283 void v3_init_vmx_cpu(int cpu_id) {
1284     addr_t vmx_on_region = 0;
1285     extern v3_cpu_arch_t v3_mach_type;
1286     extern v3_cpu_arch_t v3_cpu_types[];
1287
1288     if (v3_mach_type == V3_INVALID_CPU) {
1289         if (v3_init_vmx_hw(&hw_info) == -1) {
1290             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
1291             return;
1292         }
1293     }
1294
1295     enable_vmx();
1296
1297
1298     // Setup VMXON Region
1299     vmx_on_region = allocate_vmcs();
1300
1301
1302     if (vmx_on(vmx_on_region) == VMX_SUCCESS) {
1303         V3_Print("VMX Enabled\n");
1304         host_vmcs_ptrs[cpu_id] = vmx_on_region;
1305     } else {
1306         V3_Print("VMX already enabled\n");
1307         V3_FreePages((void *)vmx_on_region, 1);
1308     }
1309
1310     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);    
1311
1312     {
1313         struct vmx_sec_proc_ctrls sec_proc_ctrls;
1314         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
1315         
1316         if (sec_proc_ctrls.enable_ept == 0) {
1317             V3_Print("VMX EPT (Nested) Paging not supported\n");
1318             v3_cpu_types[cpu_id] = V3_VMX_CPU;
1319         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
1320             V3_Print("VMX EPT (Nested) Paging supported\n");
1321             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
1322         } else {
1323             V3_Print("VMX EPT (Nested) Paging + Unrestricted guest supported\n");
1324             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
1325         }
1326     }
1327     
1328 }
1329
1330
1331 void v3_deinit_vmx_cpu(int cpu_id) {
1332     extern v3_cpu_arch_t v3_cpu_types[];
1333     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1334
1335     if (host_vmcs_ptrs[cpu_id] != 0) {
1336         V3_Print("Disabling VMX\n");
1337
1338         if (vmx_off() != VMX_SUCCESS) {
1339             PrintError("Error executing VMXOFF\n");
1340         }
1341
1342         V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
1343
1344         host_vmcs_ptrs[cpu_id] = 0;
1345     }
1346 }