Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Added TSC passthrough specification to time handling
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_barrier.h>
36 #include <palacios/vmm_timeout.h>
37
38 #ifdef V3_CONFIG_CHECKPOINT
39 #include <palacios/vmm_checkpoint.h>
40 #endif
41
42 #include <palacios/vmx_ept.h>
43 #include <palacios/vmx_assist.h>
44 #include <palacios/vmx_hw_info.h>
45
46 #ifndef V3_CONFIG_DEBUG_VMX
47 #undef PrintDebug
48 #define PrintDebug(fmt, args...)
49 #endif
50
51
52 /* These fields contain the hardware feature sets supported by the local CPU */
53 static struct vmx_hw_info hw_info;
54
55 extern v3_cpu_arch_t v3_mach_type;
56
57 static addr_t host_vmcs_ptrs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
58
59 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
60 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
61
62 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
63     int ret = 0;
64
65     ret = vmcs_write(field, val);
66
67     if (ret != VMX_SUCCESS) {
68         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
69         return 1;
70     }
71
72
73     
74
75     return 0;
76 }
77
78 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
79     int ret = 0;
80
81     ret = vmcs_read(field, val);
82
83     if (ret != VMX_SUCCESS) {
84         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
85     }
86
87     return ret;
88 }
89
90
91
92
93 static addr_t allocate_vmcs() {
94     struct vmcs_data * vmcs_page = NULL;
95
96     PrintDebug("Allocating page\n");
97
98     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
99     memset(vmcs_page, 0, 4096);
100
101     vmcs_page->revision = hw_info.basic_info.revision;
102     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
103
104     return (addr_t)V3_PAddr((void *)vmcs_page);
105 }
106
107
108 #if 0
109 static int debug_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * src, void * priv_data) {
110     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
111     V3_Print("\n\nEFER READ (val = %p)\n", (void *)efer->value);
112     
113     v3_print_guest_state(core);
114     v3_print_vmcs();
115
116
117     src->value = efer->value;
118     return 0;
119 }
120
121 static int debug_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
122     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
123     V3_Print("\n\nEFER WRITE (old_val = %p) (new_val = %p)\n", (void *)efer->value, (void *)src.value);
124     
125     v3_print_guest_state(core);
126     v3_print_vmcs();
127
128     efer->value = src.value;
129
130     return 0;
131 }
132 #endif
133
134
135 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
136     int vmx_ret = 0;
137
138     /* Get Available features */
139     struct vmx_pin_ctrls avail_pin_ctrls;
140     avail_pin_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.pin_ctrls));
141     /* ** */
142
143
144     // disable global interrupts for vm state initialization
145     v3_disable_ints();
146
147     PrintDebug("Loading VMCS\n");
148     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
149     vmx_state->state = VMX_UNLAUNCHED;
150
151     if (vmx_ret != VMX_SUCCESS) {
152         PrintError("VMPTRLD failed\n");
153         return -1;
154     }
155
156
157     /*** Setup default state from HW ***/
158
159     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
160     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
161     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
162     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
163     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
164
165     /* Print Control MSRs */
166     V3_Print("CR0 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr0.req_val, (void *)(addr_t)hw_info.cr0.req_mask);
167     V3_Print("CR4 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr4.req_val, (void *)(addr_t)hw_info.cr4.req_mask);
168
169
170
171     /******* Setup Host State **********/
172
173     /* Cache GDTR, IDTR, and TR in host struct */
174
175
176     /********** Setup VMX Control Fields ***********/
177
178     /* Add external interrupts, NMI exiting, and virtual NMI */
179     vmx_state->pin_ctrls.nmi_exit = 1;
180     vmx_state->pin_ctrls.ext_int_exit = 1;
181
182
183     /* We enable the preemption timer by default to measure accurate guest time */
184     if (avail_pin_ctrls.active_preempt_timer) {
185         V3_Print("VMX Preemption Timer is available\n");
186         vmx_state->pin_ctrls.active_preempt_timer = 1;
187         vmx_state->exit_ctrls.save_preempt_timer = 1;
188     }
189
190     vmx_state->pri_proc_ctrls.hlt_exit = 1;
191
192
193     vmx_state->pri_proc_ctrls.pause_exit = 0;
194     vmx_state->pri_proc_ctrls.tsc_offset = 1;
195 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
196     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
197 #endif
198
199     /* Setup IO map */
200     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
201     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
202     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
203             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
204
205
206     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
207     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
208
209
210
211 #ifdef __V3_64BIT__
212     // Ensure host runs in 64-bit mode at each VM EXIT
213     vmx_state->exit_ctrls.host_64_on = 1;
214 #endif
215
216
217
218     // Restore host's EFER register on each VM EXIT
219     vmx_state->exit_ctrls.ld_efer = 1;
220
221     // Save/restore guest's EFER register to/from VMCS on VM EXIT/ENTRY
222     vmx_state->exit_ctrls.save_efer = 1;
223     vmx_state->entry_ctrls.ld_efer  = 1;
224
225     vmx_state->exit_ctrls.save_pat = 1;
226     vmx_state->exit_ctrls.ld_pat = 1;
227     vmx_state->entry_ctrls.ld_pat = 1;
228
229     /* Temporary GPF trap */
230     //  vmx_state->excp_bmap.gp = 1;
231
232     // Setup Guests initial PAT field
233     vmx_ret |= check_vmcs_write(VMCS_GUEST_PAT, 0x0007040600070406LL);
234
235     /* Setup paging */
236     if (core->shdw_pg_mode == SHADOW_PAGING) {
237         PrintDebug("Creating initial shadow page table\n");
238
239         if (v3_init_passthrough_pts(core) == -1) {
240             PrintError("Could not initialize passthrough page tables\n");
241             return -1;
242         }
243         
244 #define CR0_PE 0x00000001
245 #define CR0_PG 0x80000000
246 #define CR0_WP 0x00010000 // To ensure mem hooks work
247 #define CR0_NE 0x00000020
248         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
249
250
251         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
252         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
253
254         core->ctrl_regs.cr3 = core->direct_map_pt;
255
256         // vmx_state->pinbased_ctrls |= NMI_EXIT;
257
258         /* Add CR exits */
259         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
260         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
261         
262         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
263         
264         /* Add page fault exits */
265         vmx_state->excp_bmap.pf = 1;
266
267         // Setup VMX Assist
268         v3_vmxassist_init(core, vmx_state);
269
270         // Hook all accesses to EFER register
271         v3_hook_msr(core->vm_info, EFER_MSR, 
272                     &v3_handle_efer_read,
273                     &v3_handle_efer_write, 
274                     core);
275
276     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
277                (v3_mach_type == V3_VMX_EPT_CPU)) {
278
279 #define CR0_PE 0x00000001
280 #define CR0_PG 0x80000000
281 #define CR0_WP 0x00010000 // To ensure mem hooks work
282 #define CR0_NE 0x00000020
283         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
284
285         // vmx_state->pinbased_ctrls |= NMI_EXIT;
286
287         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
288         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
289         
290         /* Disable CR exits */
291         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
292         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
293
294         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
295
296         /* Add page fault exits */
297         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
298         
299         // Setup VMX Assist
300         v3_vmxassist_init(core, vmx_state);
301
302         /* Enable EPT */
303         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
304         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
305
306
307
308         if (v3_init_ept(core, &hw_info) == -1) {
309             PrintError("Error initializing EPT\n");
310             return -1;
311         }
312
313         // Hook all accesses to EFER register
314         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
315
316     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
317                (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
318         int i = 0;
319         // For now we will assume that unrestricted guest mode is assured w/ EPT
320
321
322         core->vm_regs.rsp = 0x00;
323         core->rip = 0xfff0;
324         core->vm_regs.rdx = 0x00000f00;
325         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
326         core->ctrl_regs.cr0 = 0x60010030; 
327         core->ctrl_regs.cr4 = 0x00002010; // Enable VMX and PSE flag
328         
329
330         core->segments.cs.selector = 0xf000;
331         core->segments.cs.limit = 0xffff;
332         core->segments.cs.base = 0x0000000f0000LL;
333
334         // (raw attributes = 0xf3)
335         core->segments.cs.type = 0xb;
336         core->segments.cs.system = 0x1;
337         core->segments.cs.dpl = 0x0;
338         core->segments.cs.present = 1;
339
340
341
342         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
343                                           &(core->segments.es), &(core->segments.fs), 
344                                           &(core->segments.gs), NULL};
345
346         for ( i = 0; segregs[i] != NULL; i++) {
347             struct v3_segment * seg = segregs[i];
348         
349             seg->selector = 0x0000;
350             //    seg->base = seg->selector << 4;
351             seg->base = 0x00000000;
352             seg->limit = 0xffff;
353
354
355             seg->type = 0x3;
356             seg->system = 0x1;
357             seg->dpl = 0x0;
358             seg->present = 1;
359             //    seg->granularity = 1;
360
361         }
362
363
364         core->segments.gdtr.limit = 0x0000ffff;
365         core->segments.gdtr.base = 0x0000000000000000LL;
366
367         core->segments.idtr.limit = 0x0000ffff;
368         core->segments.idtr.base = 0x0000000000000000LL;
369
370         core->segments.ldtr.selector = 0x0000;
371         core->segments.ldtr.limit = 0x0000ffff;
372         core->segments.ldtr.base = 0x0000000000000000LL;
373         core->segments.ldtr.type = 0x2;
374         core->segments.ldtr.present = 1;
375
376         core->segments.tr.selector = 0x0000;
377         core->segments.tr.limit = 0x0000ffff;
378         core->segments.tr.base = 0x0000000000000000LL;
379         core->segments.tr.type = 0xb;
380         core->segments.tr.present = 1;
381
382         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
383         core->dbg_regs.dr7 = 0x0000000000000400LL;
384
385         /* Enable EPT */
386         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
387         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
388         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
389
390
391         /* Disable shadow paging stuff */
392         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
393         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
394
395         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
396
397
398         // Cause VM_EXIT whenever the CR4.VMXE bit is set
399         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
400 #define CR0_NE 0x00000020
401         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, CR0_NE);
402         //((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->ne = 1;
403
404         if (v3_init_ept(core, &hw_info) == -1) {
405             PrintError("Error initializing EPT\n");
406             return -1;
407         }
408
409         // Hook all accesses to EFER register
410         //      v3_hook_msr(core->vm_info, EFER_MSR, &debug_efer_read, &debug_efer_write, core);
411         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
412     } else {
413         PrintError("Invalid Virtual paging mode (pg_mode=%d) (mach_type=%d)\n", core->shdw_pg_mode, v3_mach_type);
414         return -1;
415     }
416
417
418     // hook vmx msrs
419
420     // Setup SYSCALL/SYSENTER MSRs in load/store area
421     
422     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
423     {
424
425         struct vmcs_msr_save_area * msr_entries = NULL;
426         int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
427         int msr_ret = 0;
428
429         V3_Print("Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
430
431         if (max_msrs < 4) {
432             PrintError("Max MSR cache size is too small (%d)\n", max_msrs);
433             return -1;
434         }
435
436         vmx_state->msr_area_paddr = (addr_t)V3_AllocPages(1);
437         
438         if (vmx_state->msr_area_paddr == (addr_t)NULL) {
439             PrintError("could not allocate msr load/store area\n");
440             return -1;
441         }
442
443         msr_entries = (struct vmcs_msr_save_area *)V3_VAddr((void *)(vmx_state->msr_area_paddr));
444         vmx_state->msr_area = msr_entries; // cache in vmx_info
445
446         memset(msr_entries, 0, PAGE_SIZE);
447
448         msr_entries->guest_star.index = IA32_STAR_MSR;
449         msr_entries->guest_lstar.index = IA32_LSTAR_MSR;
450         msr_entries->guest_fmask.index = IA32_FMASK_MSR;
451         msr_entries->guest_kern_gs.index = IA32_KERN_GS_BASE_MSR;
452
453         msr_entries->host_star.index = IA32_STAR_MSR;
454         msr_entries->host_lstar.index = IA32_LSTAR_MSR;
455         msr_entries->host_fmask.index = IA32_FMASK_MSR;
456         msr_entries->host_kern_gs.index = IA32_KERN_GS_BASE_MSR;
457
458         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
459         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
460         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
461
462         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
463         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
464         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->host_msrs));
465
466
467         msr_ret |= v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
468         msr_ret |= v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
469         msr_ret |= v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
470         msr_ret |= v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
471
472
473         // IMPORTANT: These MSRs appear to be cached by the hardware....
474         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
475         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
476         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
477
478         msr_ret |= v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
479         msr_ret |= v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
480
481         msr_ret |= v3_hook_msr(core->vm_info, IA32_PAT_MSR, NULL, NULL, NULL);
482
483         // Not sure what to do about this... Does not appear to be an explicit hardware cache version...
484         msr_ret |= v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
485
486         if (msr_ret != 0) {
487             PrintError("Error configuring MSR save/restore area\n");
488             return -1;
489         }
490
491
492     }    
493
494     /* Sanity check ctrl/reg fields against hw_defaults */
495
496
497
498
499     /*** Write all the info to the VMCS ***/
500   
501     /*
502     {
503         // IS THIS NECESSARY???
504 #define DEBUGCTL_MSR 0x1d9
505         struct v3_msr tmp_msr;
506         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
507         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
508         core->dbg_regs.dr7 = 0x400;
509     }
510     */
511
512 #ifdef __V3_64BIT__
513     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
514 #else
515     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
516     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
517 #endif
518
519
520  
521
522     if (v3_update_vmcs_ctrl_fields(core)) {
523         PrintError("Could not write control fields!\n");
524         return -1;
525     }
526     
527     /*
528     if (v3_update_vmcs_host_state(core)) {
529         PrintError("Could not write host state\n");
530         return -1;
531     }
532     */
533
534     // reenable global interrupts for vm state initialization now
535     // that the vm state is initialized. If another VM kicks us off, 
536     // it'll update our vmx state so that we know to reload ourself
537     v3_enable_ints();
538
539     return 0;
540 }
541
542
543 static void __init_vmx_vmcs(void * arg) {
544     struct guest_info * core = arg;
545     struct vmx_data * vmx_state = NULL;
546     int vmx_ret = 0;
547     
548     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
549     memset(vmx_state, 0, sizeof(struct vmx_data));
550
551     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
552
553     PrintDebug("Allocating VMCS\n");
554     vmx_state->vmcs_ptr_phys = allocate_vmcs();
555
556     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
557
558     core->vmm_data = vmx_state;
559     vmx_state->state = VMX_UNLAUNCHED;
560
561     PrintDebug("Initializing VMCS (addr=%p)\n", core->vmm_data);
562     
563     // TODO: Fix vmcs fields so they're 32-bit
564
565     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
566     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
567
568     if (vmx_ret != VMX_SUCCESS) {
569         PrintError("VMCLEAR failed\n");
570         return; 
571     }
572
573     if (core->vm_info->vm_class == V3_PC_VM) {
574         PrintDebug("Initializing VMCS\n");
575         if (init_vmcs_bios(core, vmx_state) == -1) {
576             PrintError("Error initializing VMCS to BIOS state\n");
577             return;
578         }
579     } else {
580         PrintError("Invalid VM Class\n");
581         return;
582     }
583
584     PrintDebug("Serializing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
585     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
586
587     core->core_run_state = CORE_STOPPED;
588     return;
589 }
590
591
592
593 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
594     extern v3_cpu_arch_t v3_cpu_types[];
595
596     if (v3_cpu_types[V3_Get_CPU()] == V3_INVALID_CPU) {
597         int i = 0;
598
599         for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
600             if (v3_cpu_types[i] != V3_INVALID_CPU) {
601                 break;
602             }
603         }
604
605         if (i == V3_CONFIG_MAX_CPUS) {
606             PrintError("Could not find VALID CPU for VMX guest initialization\n");
607             return -1;
608         }
609
610         V3_Call_On_CPU(i, __init_vmx_vmcs, core);
611
612     } else {
613         __init_vmx_vmcs(core);
614     }
615
616     if (core->core_run_state != CORE_STOPPED) {
617         PrintError("Error initializing VMX Core\n");
618         return -1;
619     }
620
621     return 0;
622 }
623
624
625 int v3_deinit_vmx_vmcs(struct guest_info * core) {
626     struct vmx_data * vmx_state = core->vmm_data;
627
628     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
629     V3_FreePages(V3_PAddr(vmx_state->msr_area), 1);
630
631     V3_Free(vmx_state);
632
633     return 0;
634 }
635
636
637
638 #ifdef V3_CONFIG_CHECKPOINT
639 /* 
640  * JRL: This is broken
641  */
642 int v3_vmx_save_core(struct guest_info * core, void * ctx){
643     uint64_t vmcs_ptr = vmcs_store();
644
645     v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE, (void *)vmcs_ptr);
646
647     return 0;
648 }
649
650 int v3_vmx_load_core(struct guest_info * core, void * ctx){
651     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
652     struct cr0_32 * shadow_cr0;
653     char vmcs[PAGE_SIZE_4KB];
654
655     v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, vmcs);
656
657     vmcs_clear(vmx_info->vmcs_ptr_phys);
658     vmcs_load((addr_t)vmcs);
659
660     v3_vmx_save_vmcs(core);
661
662     shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
663
664
665     /* Get the CPU mode to set the guest_ia32e entry ctrl */
666
667     if (core->shdw_pg_mode == SHADOW_PAGING) {
668         if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
669             if (v3_activate_shadow_pt(core) == -1) {
670                 PrintError("Failed to activate shadow page tables\n");
671                 return -1;
672             }
673         } else {
674             if (v3_activate_passthrough_pt(core) == -1) {
675                 PrintError("Failed to activate passthrough page tables\n");
676                 return -1;
677             }
678         }
679     }
680
681     return 0;
682 }
683 #endif
684
685
686 void v3_flush_vmx_vm_core(struct guest_info * core) {
687     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
688     vmcs_clear(vmx_info->vmcs_ptr_phys);
689     vmx_info->state = VMX_UNLAUNCHED;
690 }
691
692
693
694 static int update_irq_exit_state(struct guest_info * info) {
695     struct vmx_exit_idt_vec_info idt_vec_info;
696
697     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
698
699     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
700 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
701         V3_Print("Calling v3_injecting_intr\n");
702 #endif
703         info->intr_core_state.irq_started = 0;
704         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
705     }
706
707     return 0;
708 }
709
710 static int update_irq_entry_state(struct guest_info * info) {
711     struct vmx_exit_idt_vec_info idt_vec_info;
712     struct vmcs_interrupt_state intr_core_state;
713     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
714
715     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
716     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
717
718     /* Check for pending exceptions to inject */
719     if (v3_excp_pending(info)) {
720         struct vmx_entry_int_info int_info;
721         int_info.value = 0;
722
723         // In VMX, almost every exception is hardware
724         // Software exceptions are pretty much only for breakpoint or overflow
725         int_info.type = 3;
726         int_info.vector = v3_get_excp_number(info);
727
728         if (info->excp_state.excp_error_code_valid) {
729             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
730             int_info.error_code = 1;
731
732 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
733             V3_Print("Injecting exception %d with error code %x\n", 
734                     int_info.vector, info->excp_state.excp_error_code);
735 #endif
736         }
737
738         int_info.valid = 1;
739 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
740         V3_Print("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
741 #endif
742         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
743
744         v3_injecting_excp(info, int_info.vector);
745
746     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
747                (intr_core_state.val == 0)) {
748        
749         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
750
751 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
752             V3_Print("IRQ pending from previous injection\n");
753 #endif
754
755             // Copy the IDT vectoring info over to reinject the old interrupt
756             if (idt_vec_info.error_code == 1) {
757                 uint32_t err_code = 0;
758
759                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
760                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
761             }
762
763             idt_vec_info.undef = 0;
764             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
765
766         } else {
767             struct vmx_entry_int_info ent_int;
768             ent_int.value = 0;
769
770             switch (v3_intr_pending(info)) {
771                 case V3_EXTERNAL_IRQ: {
772                     info->intr_core_state.irq_vector = v3_get_intr(info); 
773                     ent_int.vector = info->intr_core_state.irq_vector;
774                     ent_int.type = 0;
775                     ent_int.error_code = 0;
776                     ent_int.valid = 1;
777
778 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
779                     V3_Print("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
780                                info->intr_core_state.irq_vector, 
781                                (uint32_t)info->num_exits, 
782                                (void *)(addr_t)info->rip);
783 #endif
784
785                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
786                     info->intr_core_state.irq_started = 1;
787
788                     break;
789                 }
790                 case V3_NMI:
791                     PrintDebug("Injecting NMI\n");
792
793                     ent_int.type = 2;
794                     ent_int.vector = 2;
795                     ent_int.valid = 1;
796                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
797
798                     break;
799                 case V3_SOFTWARE_INTR:
800                     PrintDebug("Injecting software interrupt\n");
801                     ent_int.type = 4;
802
803                     ent_int.valid = 1;
804                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
805
806                     break;
807                 case V3_VIRTUAL_IRQ:
808                     // Not sure what to do here, Intel doesn't have virtual IRQs
809                     // May be the same as external interrupts/IRQs
810
811                     break;
812                 case V3_INVALID_INTR:
813                 default:
814                     break;
815             }
816         }
817     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
818         // Enable INTR window exiting so we know when IF=1
819         uint32_t instr_len;
820
821         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
822
823 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
824         V3_Print("Enabling Interrupt-Window exiting: %d\n", instr_len);
825 #endif
826
827         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
828         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
829     }
830
831
832     return 0;
833 }
834
835
836
837 static struct vmx_exit_info exit_log[10];
838 static uint64_t rip_log[10];
839
840
841
842 static void print_exit_log(struct guest_info * info) {
843     int cnt = info->num_exits % 10;
844     int i = 0;
845     
846
847     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
848
849     for (i = 0; i < 10; i++) {
850         struct vmx_exit_info * tmp = &exit_log[cnt];
851
852         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
853         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
854         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
855         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
856         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
857         V3_Print("\tguest_linear_addr= %p\n", (void *)(addr_t)tmp->guest_linear_addr);
858         V3_Print("\tRIP = %p\n", (void *)rip_log[cnt]);
859
860
861         cnt--;
862
863         if (cnt == -1) {
864             cnt = 9;
865         }
866
867     }
868
869 }
870
871 int 
872 v3_vmx_config_tsc_virtualization(struct guest_info * info) {
873     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
874
875     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
876         if  (!vmx_info->pri_proc_ctrls.rdtsc_exit) {
877             vmx_info->pri_proc_ctrls.rdtsc_exit = 1;
878             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
879         }
880     } else {
881         sint64_t tsc_offset;
882         uint32_t tsc_offset_low, tsc_offset_high;
883
884         if  (vmx_info->pri_proc_ctrls.rdtsc_exit) {
885             vmx_info->pri_proc_ctrls.rdtsc_exit = 0;
886             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
887         }
888
889         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
890             tsc_offset = 0;
891         } else {
892             tsc_offset = v3_tsc_host_offset(&info->time_state);
893         }
894         tsc_offset_high = (uint32_t)(( tsc_offset >> 32) & 0xffffffff);
895         tsc_offset_low = (uint32_t)(tsc_offset & 0xffffffff);
896
897         check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
898         check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
899     }
900     return 0;
901 }
902
903 /* 
904  * CAUTION and DANGER!!! 
905  * 
906  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
907  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
908  * on its contents will cause things to break. The contents at the time of the exit WILL 
909  * change before the exit handler is executed.
910  */
911 int v3_vmx_enter(struct guest_info * info) {
912     int ret = 0;
913     struct vmx_exit_info exit_info;
914     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
915     uint64_t guest_cycles = 0;
916
917     // Conditionally yield the CPU if the timeslice has expired
918     v3_yield_cond(info);
919
920     // Update timer devices late after being in the VM so that as much 
921     // of the time in the VM is accounted for as possible. Also do it before
922     // updating IRQ entry state so that any interrupts the timers raise get 
923     // handled on the next VM entry.
924     v3_advance_time(info, NULL);
925     v3_update_timers(info);
926
927     // disable global interrupts for vm state transition
928     v3_disable_ints();
929
930     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
931         vmcs_clear(vmx_info->vmcs_ptr_phys);
932         vmcs_load(vmx_info->vmcs_ptr_phys);
933         vmx_info->state = VMX_UNLAUNCHED;
934     }
935
936     v3_vmx_restore_vmcs(info);
937
938
939 #ifdef V3_CONFIG_SYMCALL
940     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
941         update_irq_entry_state(info);
942     }
943 #else 
944     update_irq_entry_state(info);
945 #endif
946
947     {
948         addr_t guest_cr3;
949         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
950         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
951     }
952
953
954     // Perform last-minute time setup prior to entering the VM
955     v3_vmx_config_tsc_virtualization(info);
956
957     if (v3_update_vmcs_host_state(info)) {
958         v3_enable_ints();
959         PrintError("Could not write host state\n");
960         return -1;
961     }
962     
963     if (vmx_info->pin_ctrls.active_preempt_timer) {
964         /* Preemption timer is active */
965         uint32_t preempt_window = 0xffffffff;
966
967         if (info->timeouts.timeout_active) {
968             preempt_window = info->timeouts.next_timeout;
969         }
970         
971         check_vmcs_write(VMCS_PREEMPT_TIMER, preempt_window);
972     }
973    
974
975     {   
976         uint64_t entry_tsc = 0;
977         uint64_t exit_tsc = 0;
978
979         if (vmx_info->state == VMX_UNLAUNCHED) {
980             vmx_info->state = VMX_LAUNCHED;
981             rdtscll(entry_tsc);
982             ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
983             rdtscll(exit_tsc);
984
985         } else {
986             V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
987             rdtscll(entry_tsc);
988             ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
989             rdtscll(exit_tsc);
990         }
991
992         guest_cycles = exit_tsc - entry_tsc;    
993     }
994
995     //  PrintDebug("VMX Exit: ret=%d\n", ret);
996
997     if (ret != VMX_SUCCESS) {
998         uint32_t error = 0;
999         vmcs_read(VMCS_INSTR_ERR, &error);
1000
1001         v3_enable_ints();
1002
1003         PrintError("VMENTRY Error: %d (launch_ret = %d)\n", error, ret);
1004         return -1;
1005     }
1006
1007
1008     info->num_exits++;
1009
1010     /* If we have the preemption time, then use it to get more accurate guest time */
1011     if (vmx_info->pin_ctrls.active_preempt_timer) {
1012         uint32_t cycles_left = 0;
1013         check_vmcs_read(VMCS_PREEMPT_TIMER, &(cycles_left));
1014
1015         if (info->timeouts.timeout_active) {
1016             guest_cycles = info->timeouts.next_timeout - cycles_left;
1017         } else {
1018             guest_cycles = 0xffffffff - cycles_left;
1019         }
1020     }
1021
1022     // Immediate exit from VM time bookkeeping
1023     v3_advance_time(info, &guest_cycles);
1024
1025     /* Update guest state */
1026     v3_vmx_save_vmcs(info);
1027
1028     // info->cpl = info->segments.cs.selector & 0x3;
1029
1030     info->mem_mode = v3_get_vm_mem_mode(info);
1031     info->cpu_mode = v3_get_vm_cpu_mode(info);
1032
1033
1034
1035     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
1036     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
1037     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
1038     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
1039     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
1040     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
1041     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
1042
1043     if (info->shdw_pg_mode == NESTED_PAGING) {
1044         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
1045     }
1046
1047     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
1048
1049     exit_log[info->num_exits % 10] = exit_info;
1050     rip_log[info->num_exits % 10] = get_addr_linear(info, info->rip, &(info->segments.cs));
1051
1052 #ifdef V3_CONFIG_SYMCALL
1053     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
1054         update_irq_exit_state(info);
1055     }
1056 #else
1057     update_irq_exit_state(info);
1058 #endif
1059
1060     if (exit_info.exit_reason == VMX_EXIT_INTR_WINDOW) {
1061         // This is a special case whose only job is to inject an interrupt
1062         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
1063         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
1064         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
1065
1066 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
1067        V3_Print("Interrupts available again! (RIP=%llx)\n", info->rip);
1068 #endif
1069     }
1070
1071     // reenable global interrupts after vm exit
1072     v3_enable_ints();
1073
1074     // Conditionally yield the CPU if the timeslice has expired
1075     v3_yield_cond(info);
1076     v3_advance_time(info, NULL);
1077     v3_update_timers(info);
1078
1079     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
1080         PrintError("Error in VMX exit handler (Exit reason=%x)\n", exit_info.exit_reason);
1081         return -1;
1082     }
1083
1084     if (info->timeouts.timeout_active) {
1085         /* Check to see if any timeouts have expired */
1086         v3_handle_timeouts(info, guest_cycles);
1087     }
1088
1089     return 0;
1090 }
1091
1092
1093 int v3_start_vmx_guest(struct guest_info * info) {
1094
1095     PrintDebug("Starting VMX core %u\n", info->vcpu_id);
1096
1097     if (info->vcpu_id == 0) {
1098         info->core_run_state = CORE_RUNNING;
1099     } else {
1100
1101         PrintDebug("VMX core %u: Waiting for core initialization\n", info->vcpu_id);
1102
1103         while (info->core_run_state == CORE_STOPPED) {
1104
1105             if (info->vm_info->run_state == VM_STOPPED) {
1106                 // The VM was stopped before this core was initialized. 
1107                 return 0;
1108             }
1109
1110             v3_yield(info);
1111             //PrintDebug("VMX core %u: still waiting for INIT\n",info->vcpu_id);
1112         }
1113         
1114         PrintDebug("VMX core %u initialized\n", info->vcpu_id);
1115
1116         // We'll be paranoid about race conditions here
1117         v3_wait_at_barrier(info);
1118     }
1119
1120
1121     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
1122                info->vcpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
1123                info->segments.cs.limit, (void *)(info->rip));
1124
1125
1126     PrintDebug("VMX core %u: Launching VMX VM on logical core %u\n", info->vcpu_id, info->pcpu_id);
1127
1128     v3_start_time(info);
1129
1130     while (1) {
1131
1132         if (info->vm_info->run_state == VM_STOPPED) {
1133             info->core_run_state = CORE_STOPPED;
1134             break;
1135         }
1136
1137         if (v3_vmx_enter(info) == -1) {
1138
1139             addr_t host_addr;
1140             addr_t linear_addr = 0;
1141             
1142             info->vm_info->run_state = VM_ERROR;
1143             
1144             V3_Print("VMX core %u: VMX ERROR!!\n", info->vcpu_id); 
1145             
1146             v3_print_guest_state(info);
1147             
1148             V3_Print("VMX core %u\n", info->vcpu_id); 
1149
1150             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1151             
1152             if (info->mem_mode == PHYSICAL_MEM) {
1153                 v3_gpa_to_hva(info, linear_addr, &host_addr);
1154             } else if (info->mem_mode == VIRTUAL_MEM) {
1155                 v3_gva_to_hva(info, linear_addr, &host_addr);
1156             }
1157             
1158             V3_Print("VMX core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1159             
1160             V3_Print("VMX core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1161             v3_dump_mem((uint8_t *)host_addr, 15);
1162             
1163             v3_print_stack(info);
1164
1165
1166             v3_print_vmcs();
1167             print_exit_log(info);
1168             return -1;
1169         }
1170
1171         v3_wait_at_barrier(info);
1172
1173
1174         if (info->vm_info->run_state == VM_STOPPED) {
1175             info->core_run_state = CORE_STOPPED;
1176             break;
1177         }
1178 /*
1179         if ((info->num_exits % 5000) == 0) {
1180             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
1181         }
1182 */
1183
1184     }
1185
1186     return 0;
1187 }
1188
1189
1190
1191
1192 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
1193 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
1194 #define CPUID_1_ECX_VTXFLAG 0x00000020
1195
1196 int v3_is_vmx_capable() {
1197     v3_msr_t feature_msr;
1198     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1199
1200     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
1201
1202     PrintDebug("ECX: 0x%x\n", ecx);
1203
1204     if (ecx & CPUID_1_ECX_VTXFLAG) {
1205         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
1206         
1207         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
1208
1209         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
1210             PrintDebug("VMX is locked -- enable in the BIOS\n");
1211             return 0;
1212         }
1213
1214     } else {
1215         PrintDebug("VMX not supported on this cpu\n");
1216         return 0;
1217     }
1218
1219     return 1;
1220 }
1221
1222
1223 int v3_reset_vmx_vm_core(struct guest_info * core, addr_t rip) {
1224     // init vmcs bios
1225     
1226     if ((core->shdw_pg_mode == NESTED_PAGING) && 
1227         (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
1228         // easy 
1229         core->rip = 0;
1230         core->segments.cs.selector = rip << 8;
1231         core->segments.cs.limit = 0xffff;
1232         core->segments.cs.base = rip << 12;
1233     } else {
1234         core->vm_regs.rdx = core->vcpu_id;
1235         core->vm_regs.rbx = rip;
1236     }
1237
1238     return 0;
1239 }
1240
1241
1242
1243 void v3_init_vmx_cpu(int cpu_id) {
1244     addr_t vmx_on_region = 0;
1245     extern v3_cpu_arch_t v3_mach_type;
1246     extern v3_cpu_arch_t v3_cpu_types[];
1247
1248     if (v3_mach_type == V3_INVALID_CPU) {
1249         if (v3_init_vmx_hw(&hw_info) == -1) {
1250             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
1251             return;
1252         }
1253     }
1254
1255     enable_vmx();
1256
1257
1258     // Setup VMXON Region
1259     vmx_on_region = allocate_vmcs();
1260
1261
1262     if (vmx_on(vmx_on_region) == VMX_SUCCESS) {
1263         V3_Print("VMX Enabled\n");
1264         host_vmcs_ptrs[cpu_id] = vmx_on_region;
1265     } else {
1266         V3_Print("VMX already enabled\n");
1267         V3_FreePages((void *)vmx_on_region, 1);
1268     }
1269
1270     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);    
1271
1272     {
1273         struct vmx_sec_proc_ctrls sec_proc_ctrls;
1274         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
1275         
1276         if (sec_proc_ctrls.enable_ept == 0) {
1277             V3_Print("VMX EPT (Nested) Paging not supported\n");
1278             v3_cpu_types[cpu_id] = V3_VMX_CPU;
1279         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
1280             V3_Print("VMX EPT (Nested) Paging supported\n");
1281             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
1282         } else {
1283             V3_Print("VMX EPT (Nested) Paging + Unrestricted guest supported\n");
1284             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
1285         }
1286     }
1287     
1288 }
1289
1290
1291 void v3_deinit_vmx_cpu(int cpu_id) {
1292     extern v3_cpu_arch_t v3_cpu_types[];
1293     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1294
1295     if (host_vmcs_ptrs[cpu_id] != 0) {
1296         V3_Print("Disabling VMX\n");
1297
1298         if (vmx_off() != VMX_SUCCESS) {
1299             PrintError("Error executing VMXOFF\n");
1300         }
1301
1302         V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
1303
1304         host_vmcs_ptrs[cpu_id] = 0;
1305     }
1306 }