Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Merge branch 'devel' of palacios@newskysaw.cs.northwestern.edu:/home/palacios/palacio...
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_barrier.h>
36 #include <palacios/vmm_timeout.h>
37
38 #ifdef V3_CONFIG_CHECKPOINT
39 #include <palacios/vmm_checkpoint.h>
40 #endif
41
42 #include <palacios/vmx_ept.h>
43 #include <palacios/vmx_assist.h>
44 #include <palacios/vmx_hw_info.h>
45
46 #ifndef V3_CONFIG_DEBUG_VMX
47 #undef PrintDebug
48 #define PrintDebug(fmt, args...)
49 #endif
50
51
52 /* These fields contain the hardware feature sets supported by the local CPU */
53 static struct vmx_hw_info hw_info;
54
55 extern v3_cpu_arch_t v3_mach_type;
56
57 static addr_t host_vmcs_ptrs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
58
59 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
60 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
61
62 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
63     int ret = 0;
64
65     ret = vmcs_write(field, val);
66
67     if (ret != VMX_SUCCESS) {
68         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
69         return 1;
70     }
71
72
73     
74
75     return 0;
76 }
77
78 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
79     int ret = 0;
80
81     ret = vmcs_read(field, val);
82
83     if (ret != VMX_SUCCESS) {
84         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
85     }
86
87     return ret;
88 }
89
90
91
92
93 static addr_t allocate_vmcs() {
94     struct vmcs_data * vmcs_page = NULL;
95
96     PrintDebug("Allocating page\n");
97
98     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
99     memset(vmcs_page, 0, 4096);
100
101     vmcs_page->revision = hw_info.basic_info.revision;
102     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
103
104     return (addr_t)V3_PAddr((void *)vmcs_page);
105 }
106
107
108 #if 0
109 static int debug_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * src, void * priv_data) {
110     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
111     V3_Print("\n\nEFER READ (val = %p)\n", (void *)efer->value);
112     
113     v3_print_guest_state(core);
114     v3_print_vmcs();
115
116
117     src->value = efer->value;
118     return 0;
119 }
120
121 static int debug_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
122     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
123     V3_Print("\n\nEFER WRITE (old_val = %p) (new_val = %p)\n", (void *)efer->value, (void *)src.value);
124     
125     v3_print_guest_state(core);
126     v3_print_vmcs();
127
128     efer->value = src.value;
129
130     return 0;
131 }
132 #endif
133
134
135 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
136     int vmx_ret = 0;
137
138     /* Get Available features */
139     struct vmx_pin_ctrls avail_pin_ctrls;
140     avail_pin_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.pin_ctrls));
141     /* ** */
142
143
144     // disable global interrupts for vm state initialization
145     v3_disable_ints();
146
147     PrintDebug("Loading VMCS\n");
148     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
149     vmx_state->state = VMX_UNLAUNCHED;
150
151     if (vmx_ret != VMX_SUCCESS) {
152         PrintError("VMPTRLD failed\n");
153         return -1;
154     }
155
156
157     /*** Setup default state from HW ***/
158
159     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
160     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
161     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
162     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
163     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
164
165     /* Print Control MSRs */
166     V3_Print("CR0 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr0.req_val, (void *)(addr_t)hw_info.cr0.req_mask);
167     V3_Print("CR4 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr4.req_val, (void *)(addr_t)hw_info.cr4.req_mask);
168
169
170
171     /******* Setup Host State **********/
172
173     /* Cache GDTR, IDTR, and TR in host struct */
174
175
176     /********** Setup VMX Control Fields ***********/
177
178     /* Add external interrupts, NMI exiting, and virtual NMI */
179     vmx_state->pin_ctrls.nmi_exit = 1;
180     vmx_state->pin_ctrls.ext_int_exit = 1;
181
182
183     /* We enable the preemption timer by default to measure accurate guest time */
184     if (avail_pin_ctrls.active_preempt_timer) {
185         V3_Print("VMX Preemption Timer is available\n");
186         vmx_state->pin_ctrls.active_preempt_timer = 1;
187         vmx_state->exit_ctrls.save_preempt_timer = 1;
188     }
189
190     vmx_state->pri_proc_ctrls.hlt_exit = 1;
191
192
193     vmx_state->pri_proc_ctrls.pause_exit = 0;
194     vmx_state->pri_proc_ctrls.tsc_offset = 1;
195 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
196     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
197 #endif
198
199     /* Setup IO map */
200     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
201     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
202     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
203             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
204
205
206     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
207     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
208
209
210
211 #ifdef __V3_64BIT__
212     // Ensure host runs in 64-bit mode at each VM EXIT
213     vmx_state->exit_ctrls.host_64_on = 1;
214 #endif
215
216
217
218     // Restore host's EFER register on each VM EXIT
219     vmx_state->exit_ctrls.ld_efer = 1;
220
221     // Save/restore guest's EFER register to/from VMCS on VM EXIT/ENTRY
222     vmx_state->exit_ctrls.save_efer = 1;
223     vmx_state->entry_ctrls.ld_efer  = 1;
224
225     vmx_state->exit_ctrls.save_pat = 1;
226     vmx_state->exit_ctrls.ld_pat = 1;
227     vmx_state->entry_ctrls.ld_pat = 1;
228
229     /* Temporary GPF trap */
230     //  vmx_state->excp_bmap.gp = 1;
231
232     // Setup Guests initial PAT field
233     vmx_ret |= check_vmcs_write(VMCS_GUEST_PAT, 0x0007040600070406LL);
234
235     /* Setup paging */
236     if (core->shdw_pg_mode == SHADOW_PAGING) {
237         PrintDebug("Creating initial shadow page table\n");
238
239         if (v3_init_passthrough_pts(core) == -1) {
240             PrintError("Could not initialize passthrough page tables\n");
241             return -1;
242         }
243         
244 #define CR0_PE 0x00000001
245 #define CR0_PG 0x80000000
246 #define CR0_WP 0x00010000 // To ensure mem hooks work
247 #define CR0_NE 0x00000020
248         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
249
250
251         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
252         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
253
254         core->ctrl_regs.cr3 = core->direct_map_pt;
255
256         // vmx_state->pinbased_ctrls |= NMI_EXIT;
257
258         /* Add CR exits */
259         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
260         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
261         
262         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
263         
264         /* Add page fault exits */
265         vmx_state->excp_bmap.pf = 1;
266
267         // Setup VMX Assist
268         v3_vmxassist_init(core, vmx_state);
269
270         // Hook all accesses to EFER register
271         v3_hook_msr(core->vm_info, EFER_MSR, 
272                     &v3_handle_efer_read,
273                     &v3_handle_efer_write, 
274                     core);
275
276     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
277                (v3_mach_type == V3_VMX_EPT_CPU)) {
278
279 #define CR0_PE 0x00000001
280 #define CR0_PG 0x80000000
281 #define CR0_WP 0x00010000 // To ensure mem hooks work
282 #define CR0_NE 0x00000020
283         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
284
285         // vmx_state->pinbased_ctrls |= NMI_EXIT;
286
287         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
288         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
289         
290         /* Disable CR exits */
291         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
292         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
293
294         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
295
296         /* Add page fault exits */
297         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
298         
299         // Setup VMX Assist
300         v3_vmxassist_init(core, vmx_state);
301
302         /* Enable EPT */
303         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
304         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
305
306
307
308         if (v3_init_ept(core, &hw_info) == -1) {
309             PrintError("Error initializing EPT\n");
310             return -1;
311         }
312
313         // Hook all accesses to EFER register
314         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
315
316     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
317                (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
318         int i = 0;
319         // For now we will assume that unrestricted guest mode is assured w/ EPT
320
321
322         core->vm_regs.rsp = 0x00;
323         core->rip = 0xfff0;
324         core->vm_regs.rdx = 0x00000f00;
325         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
326         core->ctrl_regs.cr0 = 0x60010030; 
327         core->ctrl_regs.cr4 = 0x00002010; // Enable VMX and PSE flag
328         
329
330         core->segments.cs.selector = 0xf000;
331         core->segments.cs.limit = 0xffff;
332         core->segments.cs.base = 0x0000000f0000LL;
333
334         // (raw attributes = 0xf3)
335         core->segments.cs.type = 0xb;
336         core->segments.cs.system = 0x1;
337         core->segments.cs.dpl = 0x0;
338         core->segments.cs.present = 1;
339
340
341
342         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
343                                           &(core->segments.es), &(core->segments.fs), 
344                                           &(core->segments.gs), NULL};
345
346         for ( i = 0; segregs[i] != NULL; i++) {
347             struct v3_segment * seg = segregs[i];
348         
349             seg->selector = 0x0000;
350             //    seg->base = seg->selector << 4;
351             seg->base = 0x00000000;
352             seg->limit = 0xffff;
353
354
355             seg->type = 0x3;
356             seg->system = 0x1;
357             seg->dpl = 0x0;
358             seg->present = 1;
359             //    seg->granularity = 1;
360
361         }
362
363
364         core->segments.gdtr.limit = 0x0000ffff;
365         core->segments.gdtr.base = 0x0000000000000000LL;
366
367         core->segments.idtr.limit = 0x0000ffff;
368         core->segments.idtr.base = 0x0000000000000000LL;
369
370         core->segments.ldtr.selector = 0x0000;
371         core->segments.ldtr.limit = 0x0000ffff;
372         core->segments.ldtr.base = 0x0000000000000000LL;
373         core->segments.ldtr.type = 0x2;
374         core->segments.ldtr.present = 1;
375
376         core->segments.tr.selector = 0x0000;
377         core->segments.tr.limit = 0x0000ffff;
378         core->segments.tr.base = 0x0000000000000000LL;
379         core->segments.tr.type = 0xb;
380         core->segments.tr.present = 1;
381
382         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
383         core->dbg_regs.dr7 = 0x0000000000000400LL;
384
385         /* Enable EPT */
386         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
387         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
388         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
389
390
391         /* Disable shadow paging stuff */
392         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
393         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
394
395         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
396
397
398         // Cause VM_EXIT whenever the CR4.VMXE bit is set
399         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
400 #define CR0_NE 0x00000020
401         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, CR0_NE);
402         //((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->ne = 1;
403
404         if (v3_init_ept(core, &hw_info) == -1) {
405             PrintError("Error initializing EPT\n");
406             return -1;
407         }
408
409         // Hook all accesses to EFER register
410         //      v3_hook_msr(core->vm_info, EFER_MSR, &debug_efer_read, &debug_efer_write, core);
411         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
412     } else {
413         PrintError("Invalid Virtual paging mode (pg_mode=%d) (mach_type=%d)\n", core->shdw_pg_mode, v3_mach_type);
414         return -1;
415     }
416
417
418     // hook vmx msrs
419
420     // Setup SYSCALL/SYSENTER MSRs in load/store area
421     
422     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
423     {
424
425         struct vmcs_msr_save_area * msr_entries = NULL;
426         int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
427         int msr_ret = 0;
428
429         V3_Print("Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
430
431         if (max_msrs < 4) {
432             PrintError("Max MSR cache size is too small (%d)\n", max_msrs);
433             return -1;
434         }
435
436         vmx_state->msr_area_paddr = (addr_t)V3_AllocPages(1);
437         
438         if (vmx_state->msr_area_paddr == (addr_t)NULL) {
439             PrintError("could not allocate msr load/store area\n");
440             return -1;
441         }
442
443         msr_entries = (struct vmcs_msr_save_area *)V3_VAddr((void *)(vmx_state->msr_area_paddr));
444         vmx_state->msr_area = msr_entries; // cache in vmx_info
445
446         memset(msr_entries, 0, PAGE_SIZE);
447
448         msr_entries->guest_star.index = IA32_STAR_MSR;
449         msr_entries->guest_lstar.index = IA32_LSTAR_MSR;
450         msr_entries->guest_fmask.index = IA32_FMASK_MSR;
451         msr_entries->guest_kern_gs.index = IA32_KERN_GS_BASE_MSR;
452
453         msr_entries->host_star.index = IA32_STAR_MSR;
454         msr_entries->host_lstar.index = IA32_LSTAR_MSR;
455         msr_entries->host_fmask.index = IA32_FMASK_MSR;
456         msr_entries->host_kern_gs.index = IA32_KERN_GS_BASE_MSR;
457
458         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
459         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
460         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
461
462         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
463         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
464         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->host_msrs));
465
466
467         msr_ret |= v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
468         msr_ret |= v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
469         msr_ret |= v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
470         msr_ret |= v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
471
472
473         // IMPORTANT: These MSRs appear to be cached by the hardware....
474         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
475         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
476         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
477
478         msr_ret |= v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
479         msr_ret |= v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
480
481         msr_ret |= v3_hook_msr(core->vm_info, IA32_PAT_MSR, NULL, NULL, NULL);
482
483         // Not sure what to do about this... Does not appear to be an explicit hardware cache version...
484         msr_ret |= v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
485
486         if (msr_ret != 0) {
487             PrintError("Error configuring MSR save/restore area\n");
488             return -1;
489         }
490
491
492     }    
493
494     /* Sanity check ctrl/reg fields against hw_defaults */
495
496
497
498
499     /*** Write all the info to the VMCS ***/
500   
501     /*
502     {
503         // IS THIS NECESSARY???
504 #define DEBUGCTL_MSR 0x1d9
505         struct v3_msr tmp_msr;
506         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
507         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
508         core->dbg_regs.dr7 = 0x400;
509     }
510     */
511
512 #ifdef __V3_64BIT__
513     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
514 #else
515     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
516     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
517 #endif
518
519
520  
521
522     if (v3_update_vmcs_ctrl_fields(core)) {
523         PrintError("Could not write control fields!\n");
524         return -1;
525     }
526     
527     /*
528     if (v3_update_vmcs_host_state(core)) {
529         PrintError("Could not write host state\n");
530         return -1;
531     }
532     */
533
534     // reenable global interrupts for vm state initialization now
535     // that the vm state is initialized. If another VM kicks us off, 
536     // it'll update our vmx state so that we know to reload ourself
537     v3_enable_ints();
538
539     return 0;
540 }
541
542
543 static void __init_vmx_vmcs(void * arg) {
544     struct guest_info * core = arg;
545     struct vmx_data * vmx_state = NULL;
546     int vmx_ret = 0;
547     
548     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
549     memset(vmx_state, 0, sizeof(struct vmx_data));
550
551     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
552
553     PrintDebug("Allocating VMCS\n");
554     vmx_state->vmcs_ptr_phys = allocate_vmcs();
555
556     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
557
558     core->vmm_data = vmx_state;
559     vmx_state->state = VMX_UNLAUNCHED;
560
561     PrintDebug("Initializing VMCS (addr=%p)\n", core->vmm_data);
562     
563     // TODO: Fix vmcs fields so they're 32-bit
564
565     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
566     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
567
568     if (vmx_ret != VMX_SUCCESS) {
569         PrintError("VMCLEAR failed\n");
570         return; 
571     }
572
573     if (core->vm_info->vm_class == V3_PC_VM) {
574         PrintDebug("Initializing VMCS\n");
575         if (init_vmcs_bios(core, vmx_state) == -1) {
576             PrintError("Error initializing VMCS to BIOS state\n");
577             return;
578         }
579     } else {
580         PrintError("Invalid VM Class\n");
581         return;
582     }
583
584     PrintDebug("Serializing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
585     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
586
587     core->core_run_state = CORE_STOPPED;
588     return;
589 }
590
591
592
593 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
594     extern v3_cpu_arch_t v3_cpu_types[];
595
596     if (v3_cpu_types[V3_Get_CPU()] == V3_INVALID_CPU) {
597         int i = 0;
598
599         for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
600             if (v3_cpu_types[i] != V3_INVALID_CPU) {
601                 break;
602             }
603         }
604
605         if (i == V3_CONFIG_MAX_CPUS) {
606             PrintError("Could not find VALID CPU for VMX guest initialization\n");
607             return -1;
608         }
609
610         V3_Call_On_CPU(i, __init_vmx_vmcs, core);
611
612     } else {
613         __init_vmx_vmcs(core);
614     }
615
616     if (core->core_run_state != CORE_STOPPED) {
617         PrintError("Error initializing VMX Core\n");
618         return -1;
619     }
620
621     return 0;
622 }
623
624
625 int v3_deinit_vmx_vmcs(struct guest_info * core) {
626     struct vmx_data * vmx_state = core->vmm_data;
627
628     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
629     V3_FreePages(V3_PAddr(vmx_state->msr_area), 1);
630
631     V3_Free(vmx_state);
632
633     return 0;
634 }
635
636
637
638 #ifdef V3_CONFIG_CHECKPOINT
639 /* 
640  * JRL: This is broken
641  */
642 int v3_vmx_save_core(struct guest_info * core, void * ctx){
643     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
644
645     // note that the vmcs pointer is an HPA, but we need an HVA
646     v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE_4KB, V3_VAddr((void*)
647                                                             (vmx_info->vmcs_ptr_phys)));
648
649     return 0;
650 }
651
652 int v3_vmx_load_core(struct guest_info * core, void * ctx){
653     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
654     struct cr0_32 * shadow_cr0;
655     addr_t vmcs_page_paddr;  //HPA
656
657     vmcs_page_paddr = (addr_t) V3_AllocPages(1);
658
659     v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, V3_VAddr((void *)vmcs_page_paddr));
660
661     vmcs_clear(vmx_info->vmcs_ptr_phys);
662
663     // Probably need to delete the old one... 
664     V3_FreePages((void*)(vmx_info->vmcs_ptr_phys),1);
665
666     vmcs_load(vmcs_page_paddr);
667
668     v3_vmx_save_vmcs(core);
669
670     shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
671
672
673     /* Get the CPU mode to set the guest_ia32e entry ctrl */
674
675     if (core->shdw_pg_mode == SHADOW_PAGING) {
676         if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
677             if (v3_activate_shadow_pt(core) == -1) {
678                 PrintError("Failed to activate shadow page tables\n");
679                 return -1;
680             }
681         } else {
682             if (v3_activate_passthrough_pt(core) == -1) {
683                 PrintError("Failed to activate passthrough page tables\n");
684                 return -1;
685             }
686         }
687     }
688
689     return 0;
690 }
691 #endif
692
693
694 void v3_flush_vmx_vm_core(struct guest_info * core) {
695     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
696     vmcs_clear(vmx_info->vmcs_ptr_phys);
697     vmx_info->state = VMX_UNLAUNCHED;
698 }
699
700
701
702 static int update_irq_exit_state(struct guest_info * info) {
703     struct vmx_exit_idt_vec_info idt_vec_info;
704
705     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
706
707     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
708 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
709         V3_Print("Calling v3_injecting_intr\n");
710 #endif
711         info->intr_core_state.irq_started = 0;
712         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
713     }
714
715     return 0;
716 }
717
718 static int update_irq_entry_state(struct guest_info * info) {
719     struct vmx_exit_idt_vec_info idt_vec_info;
720     struct vmcs_interrupt_state intr_core_state;
721     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
722
723     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
724     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
725
726     /* Check for pending exceptions to inject */
727     if (v3_excp_pending(info)) {
728         struct vmx_entry_int_info int_info;
729         int_info.value = 0;
730
731         // In VMX, almost every exception is hardware
732         // Software exceptions are pretty much only for breakpoint or overflow
733         int_info.type = 3;
734         int_info.vector = v3_get_excp_number(info);
735
736         if (info->excp_state.excp_error_code_valid) {
737             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
738             int_info.error_code = 1;
739
740 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
741             V3_Print("Injecting exception %d with error code %x\n", 
742                     int_info.vector, info->excp_state.excp_error_code);
743 #endif
744         }
745
746         int_info.valid = 1;
747 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
748         V3_Print("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
749 #endif
750         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
751
752         v3_injecting_excp(info, int_info.vector);
753
754     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
755                (intr_core_state.val == 0)) {
756        
757         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
758
759 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
760             V3_Print("IRQ pending from previous injection\n");
761 #endif
762
763             // Copy the IDT vectoring info over to reinject the old interrupt
764             if (idt_vec_info.error_code == 1) {
765                 uint32_t err_code = 0;
766
767                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
768                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
769             }
770
771             idt_vec_info.undef = 0;
772             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
773
774         } else {
775             struct vmx_entry_int_info ent_int;
776             ent_int.value = 0;
777
778             switch (v3_intr_pending(info)) {
779                 case V3_EXTERNAL_IRQ: {
780                     info->intr_core_state.irq_vector = v3_get_intr(info); 
781                     ent_int.vector = info->intr_core_state.irq_vector;
782                     ent_int.type = 0;
783                     ent_int.error_code = 0;
784                     ent_int.valid = 1;
785
786 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
787                     V3_Print("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
788                                info->intr_core_state.irq_vector, 
789                                (uint32_t)info->num_exits, 
790                                (void *)(addr_t)info->rip);
791 #endif
792
793                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
794                     info->intr_core_state.irq_started = 1;
795
796                     break;
797                 }
798                 case V3_NMI:
799                     PrintDebug("Injecting NMI\n");
800
801                     ent_int.type = 2;
802                     ent_int.vector = 2;
803                     ent_int.valid = 1;
804                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
805
806                     break;
807                 case V3_SOFTWARE_INTR:
808                     PrintDebug("Injecting software interrupt\n");
809                     ent_int.type = 4;
810
811                     ent_int.valid = 1;
812                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
813
814                     break;
815                 case V3_VIRTUAL_IRQ:
816                     // Not sure what to do here, Intel doesn't have virtual IRQs
817                     // May be the same as external interrupts/IRQs
818
819                     break;
820                 case V3_INVALID_INTR:
821                 default:
822                     break;
823             }
824         }
825     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
826         // Enable INTR window exiting so we know when IF=1
827         uint32_t instr_len;
828
829         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
830
831 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
832         V3_Print("Enabling Interrupt-Window exiting: %d\n", instr_len);
833 #endif
834
835         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
836         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
837     }
838
839
840     return 0;
841 }
842
843
844
845 static struct vmx_exit_info exit_log[10];
846 static uint64_t rip_log[10];
847
848
849
850 static void print_exit_log(struct guest_info * info) {
851     int cnt = info->num_exits % 10;
852     int i = 0;
853     
854
855     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
856
857     for (i = 0; i < 10; i++) {
858         struct vmx_exit_info * tmp = &exit_log[cnt];
859
860         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
861         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
862         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
863         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
864         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
865         V3_Print("\tguest_linear_addr= %p\n", (void *)(addr_t)tmp->guest_linear_addr);
866         V3_Print("\tRIP = %p\n", (void *)rip_log[cnt]);
867
868
869         cnt--;
870
871         if (cnt == -1) {
872             cnt = 9;
873         }
874
875     }
876
877 }
878
879 int 
880 v3_vmx_config_tsc_virtualization(struct guest_info * info) {
881     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
882
883     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
884         if  (!vmx_info->pri_proc_ctrls.rdtsc_exit) {
885             vmx_info->pri_proc_ctrls.rdtsc_exit = 1;
886             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
887         }
888     } else {
889         sint64_t tsc_offset;
890         uint32_t tsc_offset_low, tsc_offset_high;
891
892         if  (vmx_info->pri_proc_ctrls.rdtsc_exit) {
893             vmx_info->pri_proc_ctrls.rdtsc_exit = 0;
894             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
895         }
896
897         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
898             tsc_offset = 0;
899         } else {
900             tsc_offset = v3_tsc_host_offset(&info->time_state);
901         }
902         tsc_offset_high = (uint32_t)(( tsc_offset >> 32) & 0xffffffff);
903         tsc_offset_low = (uint32_t)(tsc_offset & 0xffffffff);
904
905         check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
906         check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
907     }
908     return 0;
909 }
910
911 /* 
912  * CAUTION and DANGER!!! 
913  * 
914  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
915  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
916  * on its contents will cause things to break. The contents at the time of the exit WILL 
917  * change before the exit handler is executed.
918  */
919 int v3_vmx_enter(struct guest_info * info) {
920     int ret = 0;
921     struct vmx_exit_info exit_info;
922     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
923     uint64_t guest_cycles = 0;
924
925     // Conditionally yield the CPU if the timeslice has expired
926     v3_yield_cond(info);
927
928     // Update timer devices late after being in the VM so that as much 
929     // of the time in the VM is accounted for as possible. Also do it before
930     // updating IRQ entry state so that any interrupts the timers raise get 
931     // handled on the next VM entry.
932     v3_advance_time(info, NULL);
933     v3_update_timers(info);
934
935     // disable global interrupts for vm state transition
936     v3_disable_ints();
937
938     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
939         vmcs_clear(vmx_info->vmcs_ptr_phys);
940         vmcs_load(vmx_info->vmcs_ptr_phys);
941         vmx_info->state = VMX_UNLAUNCHED;
942     }
943
944     v3_vmx_restore_vmcs(info);
945
946
947 #ifdef V3_CONFIG_SYMCALL
948     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
949         update_irq_entry_state(info);
950     }
951 #else 
952     update_irq_entry_state(info);
953 #endif
954
955     {
956         addr_t guest_cr3;
957         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
958         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
959     }
960
961
962     // Perform last-minute time setup prior to entering the VM
963     v3_vmx_config_tsc_virtualization(info);
964
965     if (v3_update_vmcs_host_state(info)) {
966         v3_enable_ints();
967         PrintError("Could not write host state\n");
968         return -1;
969     }
970     
971     if (vmx_info->pin_ctrls.active_preempt_timer) {
972         /* Preemption timer is active */
973         uint32_t preempt_window = 0xffffffff;
974
975         if (info->timeouts.timeout_active) {
976             preempt_window = info->timeouts.next_timeout;
977         }
978         
979         check_vmcs_write(VMCS_PREEMPT_TIMER, preempt_window);
980     }
981    
982
983     {   
984         uint64_t entry_tsc = 0;
985         uint64_t exit_tsc = 0;
986
987         if (vmx_info->state == VMX_UNLAUNCHED) {
988             vmx_info->state = VMX_LAUNCHED;
989             rdtscll(entry_tsc);
990             ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
991             rdtscll(exit_tsc);
992
993         } else {
994             V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
995             rdtscll(entry_tsc);
996             ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
997             rdtscll(exit_tsc);
998         }
999
1000         guest_cycles = exit_tsc - entry_tsc;    
1001     }
1002
1003     //  PrintDebug("VMX Exit: ret=%d\n", ret);
1004
1005     if (ret != VMX_SUCCESS) {
1006         uint32_t error = 0;
1007         vmcs_read(VMCS_INSTR_ERR, &error);
1008
1009         v3_enable_ints();
1010
1011         PrintError("VMENTRY Error: %d (launch_ret = %d)\n", error, ret);
1012         return -1;
1013     }
1014
1015
1016     info->num_exits++;
1017
1018     /* If we have the preemption time, then use it to get more accurate guest time */
1019     if (vmx_info->pin_ctrls.active_preempt_timer) {
1020         uint32_t cycles_left = 0;
1021         check_vmcs_read(VMCS_PREEMPT_TIMER, &(cycles_left));
1022
1023         if (info->timeouts.timeout_active) {
1024             guest_cycles = info->timeouts.next_timeout - cycles_left;
1025         } else {
1026             guest_cycles = 0xffffffff - cycles_left;
1027         }
1028     }
1029
1030     // Immediate exit from VM time bookkeeping
1031     v3_advance_time(info, &guest_cycles);
1032
1033     /* Update guest state */
1034     v3_vmx_save_vmcs(info);
1035
1036     // info->cpl = info->segments.cs.selector & 0x3;
1037
1038     info->mem_mode = v3_get_vm_mem_mode(info);
1039     info->cpu_mode = v3_get_vm_cpu_mode(info);
1040
1041
1042
1043     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
1044     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
1045     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
1046     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
1047     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
1048     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
1049     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
1050
1051     if (info->shdw_pg_mode == NESTED_PAGING) {
1052         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
1053     }
1054
1055     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
1056
1057     exit_log[info->num_exits % 10] = exit_info;
1058     rip_log[info->num_exits % 10] = get_addr_linear(info, info->rip, &(info->segments.cs));
1059
1060 #ifdef V3_CONFIG_SYMCALL
1061     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
1062         update_irq_exit_state(info);
1063     }
1064 #else
1065     update_irq_exit_state(info);
1066 #endif
1067
1068     if (exit_info.exit_reason == VMX_EXIT_INTR_WINDOW) {
1069         // This is a special case whose only job is to inject an interrupt
1070         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
1071         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
1072         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
1073
1074 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
1075        V3_Print("Interrupts available again! (RIP=%llx)\n", info->rip);
1076 #endif
1077     }
1078
1079     // reenable global interrupts after vm exit
1080     v3_enable_ints();
1081
1082     // Conditionally yield the CPU if the timeslice has expired
1083     v3_yield_cond(info);
1084     v3_advance_time(info, NULL);
1085     v3_update_timers(info);
1086
1087     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
1088         PrintError("Error in VMX exit handler (Exit reason=%x)\n", exit_info.exit_reason);
1089         return -1;
1090     }
1091
1092     if (info->timeouts.timeout_active) {
1093         /* Check to see if any timeouts have expired */
1094         v3_handle_timeouts(info, guest_cycles);
1095     }
1096
1097     return 0;
1098 }
1099
1100
1101 int v3_start_vmx_guest(struct guest_info * info) {
1102
1103     PrintDebug("Starting VMX core %u\n", info->vcpu_id);
1104
1105     if (info->vcpu_id == 0) {
1106         info->core_run_state = CORE_RUNNING;
1107     } else {
1108
1109         PrintDebug("VMX core %u: Waiting for core initialization\n", info->vcpu_id);
1110
1111         while (info->core_run_state == CORE_STOPPED) {
1112
1113             if (info->vm_info->run_state == VM_STOPPED) {
1114                 // The VM was stopped before this core was initialized. 
1115                 return 0;
1116             }
1117
1118             v3_yield(info);
1119             //PrintDebug("VMX core %u: still waiting for INIT\n",info->vcpu_id);
1120         }
1121         
1122         PrintDebug("VMX core %u initialized\n", info->vcpu_id);
1123
1124         // We'll be paranoid about race conditions here
1125         v3_wait_at_barrier(info);
1126     }
1127
1128
1129     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
1130                info->vcpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
1131                info->segments.cs.limit, (void *)(info->rip));
1132
1133
1134     PrintDebug("VMX core %u: Launching VMX VM on logical core %u\n", info->vcpu_id, info->pcpu_id);
1135
1136     v3_start_time(info);
1137
1138     while (1) {
1139
1140         if (info->vm_info->run_state == VM_STOPPED) {
1141             info->core_run_state = CORE_STOPPED;
1142             break;
1143         }
1144
1145         if (v3_vmx_enter(info) == -1) {
1146
1147             addr_t host_addr;
1148             addr_t linear_addr = 0;
1149             
1150             info->vm_info->run_state = VM_ERROR;
1151             
1152             V3_Print("VMX core %u: VMX ERROR!!\n", info->vcpu_id); 
1153             
1154             v3_print_guest_state(info);
1155             
1156             V3_Print("VMX core %u\n", info->vcpu_id); 
1157
1158             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1159             
1160             if (info->mem_mode == PHYSICAL_MEM) {
1161                 v3_gpa_to_hva(info, linear_addr, &host_addr);
1162             } else if (info->mem_mode == VIRTUAL_MEM) {
1163                 v3_gva_to_hva(info, linear_addr, &host_addr);
1164             }
1165             
1166             V3_Print("VMX core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1167             
1168             V3_Print("VMX core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1169             v3_dump_mem((uint8_t *)host_addr, 15);
1170             
1171             v3_print_stack(info);
1172
1173
1174             v3_print_vmcs();
1175             print_exit_log(info);
1176             return -1;
1177         }
1178
1179         v3_wait_at_barrier(info);
1180
1181
1182         if (info->vm_info->run_state == VM_STOPPED) {
1183             info->core_run_state = CORE_STOPPED;
1184             break;
1185         }
1186 /*
1187         if ((info->num_exits % 5000) == 0) {
1188             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
1189         }
1190 */
1191
1192     }
1193
1194     return 0;
1195 }
1196
1197
1198
1199
1200 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
1201 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
1202 #define CPUID_1_ECX_VTXFLAG 0x00000020
1203
1204 int v3_is_vmx_capable() {
1205     v3_msr_t feature_msr;
1206     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1207
1208     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
1209
1210     PrintDebug("ECX: 0x%x\n", ecx);
1211
1212     if (ecx & CPUID_1_ECX_VTXFLAG) {
1213         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
1214         
1215         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
1216
1217         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
1218             PrintDebug("VMX is locked -- enable in the BIOS\n");
1219             return 0;
1220         }
1221
1222     } else {
1223         PrintDebug("VMX not supported on this cpu\n");
1224         return 0;
1225     }
1226
1227     return 1;
1228 }
1229
1230
1231 int v3_reset_vmx_vm_core(struct guest_info * core, addr_t rip) {
1232     // init vmcs bios
1233     
1234     if ((core->shdw_pg_mode == NESTED_PAGING) && 
1235         (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
1236         // easy 
1237         core->rip = 0;
1238         core->segments.cs.selector = rip << 8;
1239         core->segments.cs.limit = 0xffff;
1240         core->segments.cs.base = rip << 12;
1241     } else {
1242         core->vm_regs.rdx = core->vcpu_id;
1243         core->vm_regs.rbx = rip;
1244     }
1245
1246     return 0;
1247 }
1248
1249
1250
1251 void v3_init_vmx_cpu(int cpu_id) {
1252     addr_t vmx_on_region = 0;
1253     extern v3_cpu_arch_t v3_mach_type;
1254     extern v3_cpu_arch_t v3_cpu_types[];
1255
1256     if (v3_mach_type == V3_INVALID_CPU) {
1257         if (v3_init_vmx_hw(&hw_info) == -1) {
1258             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
1259             return;
1260         }
1261     }
1262
1263     enable_vmx();
1264
1265
1266     // Setup VMXON Region
1267     vmx_on_region = allocate_vmcs();
1268
1269
1270     if (vmx_on(vmx_on_region) == VMX_SUCCESS) {
1271         V3_Print("VMX Enabled\n");
1272         host_vmcs_ptrs[cpu_id] = vmx_on_region;
1273     } else {
1274         V3_Print("VMX already enabled\n");
1275         V3_FreePages((void *)vmx_on_region, 1);
1276     }
1277
1278     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);    
1279
1280     {
1281         struct vmx_sec_proc_ctrls sec_proc_ctrls;
1282         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
1283         
1284         if (sec_proc_ctrls.enable_ept == 0) {
1285             V3_Print("VMX EPT (Nested) Paging not supported\n");
1286             v3_cpu_types[cpu_id] = V3_VMX_CPU;
1287         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
1288             V3_Print("VMX EPT (Nested) Paging supported\n");
1289             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
1290         } else {
1291             V3_Print("VMX EPT (Nested) Paging + Unrestricted guest supported\n");
1292             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
1293         }
1294     }
1295     
1296 }
1297
1298
1299 void v3_deinit_vmx_cpu(int cpu_id) {
1300     extern v3_cpu_arch_t v3_cpu_types[];
1301     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1302
1303     if (host_vmcs_ptrs[cpu_id] != 0) {
1304         V3_Print("Disabling VMX\n");
1305
1306         if (vmx_off() != VMX_SUCCESS) {
1307             PrintError("Error executing VMXOFF\n");
1308         }
1309
1310         V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
1311
1312         host_vmcs_ptrs[cpu_id] = 0;
1313     }
1314 }