Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


added dedicated debugging framework with associated interface
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_barrier.h>
36 #include <palacios/vmm_timeout.h>
37 #include <palacios/vmm_debug.h>
38
39 #ifdef V3_CONFIG_CHECKPOINT
40 #include <palacios/vmm_checkpoint.h>
41 #endif
42
43 #include <palacios/vmx_ept.h>
44 #include <palacios/vmx_assist.h>
45 #include <palacios/vmx_hw_info.h>
46
47 #ifndef V3_CONFIG_DEBUG_VMX
48 #undef PrintDebug
49 #define PrintDebug(fmt, args...)
50 #endif
51
52
53 /* These fields contain the hardware feature sets supported by the local CPU */
54 static struct vmx_hw_info hw_info;
55
56 extern v3_cpu_arch_t v3_mach_type;
57
58 static addr_t host_vmcs_ptrs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
59
60 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
61 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
62
63 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
64     int ret = 0;
65
66     ret = vmcs_write(field, val);
67
68     if (ret != VMX_SUCCESS) {
69         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
70         return 1;
71     }
72
73
74     
75
76     return 0;
77 }
78
79 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
80     int ret = 0;
81
82     ret = vmcs_read(field, val);
83
84     if (ret != VMX_SUCCESS) {
85         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
86     }
87
88     return ret;
89 }
90
91
92
93
94 static addr_t allocate_vmcs() {
95     struct vmcs_data * vmcs_page = NULL;
96
97     PrintDebug("Allocating page\n");
98
99     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
100     memset(vmcs_page, 0, 4096);
101
102     vmcs_page->revision = hw_info.basic_info.revision;
103     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
104
105     return (addr_t)V3_PAddr((void *)vmcs_page);
106 }
107
108
109 #if 0
110 static int debug_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * src, void * priv_data) {
111     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
112     V3_Print("\n\nEFER READ (val = %p)\n", (void *)efer->value);
113     
114     v3_print_guest_state(core);
115     v3_print_vmcs();
116
117
118     src->value = efer->value;
119     return 0;
120 }
121
122 static int debug_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
123     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
124     V3_Print("\n\nEFER WRITE (old_val = %p) (new_val = %p)\n", (void *)efer->value, (void *)src.value);
125     
126     v3_print_guest_state(core);
127     v3_print_vmcs();
128
129     efer->value = src.value;
130
131     return 0;
132 }
133 #endif
134
135
136 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
137     int vmx_ret = 0;
138
139     /* Get Available features */
140     struct vmx_pin_ctrls avail_pin_ctrls;
141     avail_pin_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.pin_ctrls));
142     /* ** */
143
144
145     // disable global interrupts for vm state initialization
146     v3_disable_ints();
147
148     PrintDebug("Loading VMCS\n");
149     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
150     vmx_state->state = VMX_UNLAUNCHED;
151
152     if (vmx_ret != VMX_SUCCESS) {
153         PrintError("VMPTRLD failed\n");
154         return -1;
155     }
156
157
158     /*** Setup default state from HW ***/
159
160     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
161     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
162     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
163     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
164     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
165
166     /* Print Control MSRs */
167     V3_Print("CR0 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr0.req_val, (void *)(addr_t)hw_info.cr0.req_mask);
168     V3_Print("CR4 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr4.req_val, (void *)(addr_t)hw_info.cr4.req_mask);
169
170
171
172     /******* Setup Host State **********/
173
174     /* Cache GDTR, IDTR, and TR in host struct */
175
176
177     /********** Setup VMX Control Fields ***********/
178
179     /* Add external interrupts, NMI exiting, and virtual NMI */
180     vmx_state->pin_ctrls.nmi_exit = 1;
181     vmx_state->pin_ctrls.ext_int_exit = 1;
182
183
184     /* We enable the preemption timer by default to measure accurate guest time */
185     if (avail_pin_ctrls.active_preempt_timer) {
186         V3_Print("VMX Preemption Timer is available\n");
187         vmx_state->pin_ctrls.active_preempt_timer = 1;
188         vmx_state->exit_ctrls.save_preempt_timer = 1;
189     }
190
191     vmx_state->pri_proc_ctrls.hlt_exit = 1;
192
193
194     vmx_state->pri_proc_ctrls.pause_exit = 0;
195     vmx_state->pri_proc_ctrls.tsc_offset = 1;
196 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
197     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
198 #endif
199
200     /* Setup IO map */
201     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
202     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
203     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
204             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
205
206
207     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
208     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
209
210
211
212 #ifdef __V3_64BIT__
213     // Ensure host runs in 64-bit mode at each VM EXIT
214     vmx_state->exit_ctrls.host_64_on = 1;
215 #endif
216
217
218
219     // Restore host's EFER register on each VM EXIT
220     vmx_state->exit_ctrls.ld_efer = 1;
221
222     // Save/restore guest's EFER register to/from VMCS on VM EXIT/ENTRY
223     vmx_state->exit_ctrls.save_efer = 1;
224     vmx_state->entry_ctrls.ld_efer  = 1;
225
226     vmx_state->exit_ctrls.save_pat = 1;
227     vmx_state->exit_ctrls.ld_pat = 1;
228     vmx_state->entry_ctrls.ld_pat = 1;
229
230     /* Temporary GPF trap */
231     //  vmx_state->excp_bmap.gp = 1;
232
233     // Setup Guests initial PAT field
234     vmx_ret |= check_vmcs_write(VMCS_GUEST_PAT, 0x0007040600070406LL);
235
236     /* Setup paging */
237     if (core->shdw_pg_mode == SHADOW_PAGING) {
238         PrintDebug("Creating initial shadow page table\n");
239
240         if (v3_init_passthrough_pts(core) == -1) {
241             PrintError("Could not initialize passthrough page tables\n");
242             return -1;
243         }
244         
245 #define CR0_PE 0x00000001
246 #define CR0_PG 0x80000000
247 #define CR0_WP 0x00010000 // To ensure mem hooks work
248 #define CR0_NE 0x00000020
249         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
250
251
252         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
253         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
254
255         core->ctrl_regs.cr3 = core->direct_map_pt;
256
257         // vmx_state->pinbased_ctrls |= NMI_EXIT;
258
259         /* Add CR exits */
260         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
261         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
262         
263         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
264         
265         /* Add page fault exits */
266         vmx_state->excp_bmap.pf = 1;
267
268         // Setup VMX Assist
269         v3_vmxassist_init(core, vmx_state);
270
271         // Hook all accesses to EFER register
272         v3_hook_msr(core->vm_info, EFER_MSR, 
273                     &v3_handle_efer_read,
274                     &v3_handle_efer_write, 
275                     core);
276
277     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
278                (v3_mach_type == V3_VMX_EPT_CPU)) {
279
280 #define CR0_PE 0x00000001
281 #define CR0_PG 0x80000000
282 #define CR0_WP 0x00010000 // To ensure mem hooks work
283 #define CR0_NE 0x00000020
284         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
285
286         // vmx_state->pinbased_ctrls |= NMI_EXIT;
287
288         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
289         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
290         
291         /* Disable CR exits */
292         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
293         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
294
295         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
296
297         /* Add page fault exits */
298         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
299         
300         // Setup VMX Assist
301         v3_vmxassist_init(core, vmx_state);
302
303         /* Enable EPT */
304         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
305         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
306
307
308
309         if (v3_init_ept(core, &hw_info) == -1) {
310             PrintError("Error initializing EPT\n");
311             return -1;
312         }
313
314         // Hook all accesses to EFER register
315         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
316
317     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
318                (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
319         int i = 0;
320         // For now we will assume that unrestricted guest mode is assured w/ EPT
321
322
323         core->vm_regs.rsp = 0x00;
324         core->rip = 0xfff0;
325         core->vm_regs.rdx = 0x00000f00;
326         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
327         core->ctrl_regs.cr0 = 0x60010030; 
328         core->ctrl_regs.cr4 = 0x00002010; // Enable VMX and PSE flag
329         
330
331         core->segments.cs.selector = 0xf000;
332         core->segments.cs.limit = 0xffff;
333         core->segments.cs.base = 0x0000000f0000LL;
334
335         // (raw attributes = 0xf3)
336         core->segments.cs.type = 0xb;
337         core->segments.cs.system = 0x1;
338         core->segments.cs.dpl = 0x0;
339         core->segments.cs.present = 1;
340
341
342
343         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
344                                           &(core->segments.es), &(core->segments.fs), 
345                                           &(core->segments.gs), NULL};
346
347         for ( i = 0; segregs[i] != NULL; i++) {
348             struct v3_segment * seg = segregs[i];
349         
350             seg->selector = 0x0000;
351             //    seg->base = seg->selector << 4;
352             seg->base = 0x00000000;
353             seg->limit = 0xffff;
354
355
356             seg->type = 0x3;
357             seg->system = 0x1;
358             seg->dpl = 0x0;
359             seg->present = 1;
360             //    seg->granularity = 1;
361
362         }
363
364
365         core->segments.gdtr.limit = 0x0000ffff;
366         core->segments.gdtr.base = 0x0000000000000000LL;
367
368         core->segments.idtr.limit = 0x0000ffff;
369         core->segments.idtr.base = 0x0000000000000000LL;
370
371         core->segments.ldtr.selector = 0x0000;
372         core->segments.ldtr.limit = 0x0000ffff;
373         core->segments.ldtr.base = 0x0000000000000000LL;
374         core->segments.ldtr.type = 0x2;
375         core->segments.ldtr.present = 1;
376
377         core->segments.tr.selector = 0x0000;
378         core->segments.tr.limit = 0x0000ffff;
379         core->segments.tr.base = 0x0000000000000000LL;
380         core->segments.tr.type = 0xb;
381         core->segments.tr.present = 1;
382
383         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
384         core->dbg_regs.dr7 = 0x0000000000000400LL;
385
386         /* Enable EPT */
387         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
388         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
389         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
390
391
392         /* Disable shadow paging stuff */
393         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
394         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
395
396         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
397
398
399         // Cause VM_EXIT whenever the CR4.VMXE bit is set
400         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
401 #define CR0_NE 0x00000020
402         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, CR0_NE);
403         ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->ne = 1;
404
405         if (v3_init_ept(core, &hw_info) == -1) {
406             PrintError("Error initializing EPT\n");
407             return -1;
408         }
409
410         // Hook all accesses to EFER register
411         //      v3_hook_msr(core->vm_info, EFER_MSR, &debug_efer_read, &debug_efer_write, core);
412         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
413     } else {
414         PrintError("Invalid Virtual paging mode (pg_mode=%d) (mach_type=%d)\n", core->shdw_pg_mode, v3_mach_type);
415         return -1;
416     }
417
418
419     // hook vmx msrs
420
421     // Setup SYSCALL/SYSENTER MSRs in load/store area
422     
423     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
424     {
425
426         struct vmcs_msr_save_area * msr_entries = NULL;
427         int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
428         int msr_ret = 0;
429
430         V3_Print("Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
431
432         if (max_msrs < 4) {
433             PrintError("Max MSR cache size is too small (%d)\n", max_msrs);
434             return -1;
435         }
436
437         vmx_state->msr_area_paddr = (addr_t)V3_AllocPages(1);
438         
439         if (vmx_state->msr_area_paddr == (addr_t)NULL) {
440             PrintError("could not allocate msr load/store area\n");
441             return -1;
442         }
443
444         msr_entries = (struct vmcs_msr_save_area *)V3_VAddr((void *)(vmx_state->msr_area_paddr));
445         vmx_state->msr_area = msr_entries; // cache in vmx_info
446
447         memset(msr_entries, 0, PAGE_SIZE);
448
449         msr_entries->guest_star.index = IA32_STAR_MSR;
450         msr_entries->guest_lstar.index = IA32_LSTAR_MSR;
451         msr_entries->guest_fmask.index = IA32_FMASK_MSR;
452         msr_entries->guest_kern_gs.index = IA32_KERN_GS_BASE_MSR;
453
454         msr_entries->host_star.index = IA32_STAR_MSR;
455         msr_entries->host_lstar.index = IA32_LSTAR_MSR;
456         msr_entries->host_fmask.index = IA32_FMASK_MSR;
457         msr_entries->host_kern_gs.index = IA32_KERN_GS_BASE_MSR;
458
459         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
460         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
461         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
462
463         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
464         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
465         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->host_msrs));
466
467
468         msr_ret |= v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
469         msr_ret |= v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
470         msr_ret |= v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
471         msr_ret |= v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
472
473
474         // IMPORTANT: These MSRs appear to be cached by the hardware....
475         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
476         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
477         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
478
479         msr_ret |= v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
480         msr_ret |= v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
481
482         msr_ret |= v3_hook_msr(core->vm_info, IA32_PAT_MSR, NULL, NULL, NULL);
483
484         // Not sure what to do about this... Does not appear to be an explicit hardware cache version...
485         msr_ret |= v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
486
487         if (msr_ret != 0) {
488             PrintError("Error configuring MSR save/restore area\n");
489             return -1;
490         }
491
492
493     }    
494
495     /* Sanity check ctrl/reg fields against hw_defaults */
496
497
498
499
500     /*** Write all the info to the VMCS ***/
501   
502     /*
503     {
504         // IS THIS NECESSARY???
505 #define DEBUGCTL_MSR 0x1d9
506         struct v3_msr tmp_msr;
507         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
508         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
509         core->dbg_regs.dr7 = 0x400;
510     }
511     */
512
513 #ifdef __V3_64BIT__
514     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
515 #else
516     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
517     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
518 #endif
519
520
521  
522
523     if (v3_update_vmcs_ctrl_fields(core)) {
524         PrintError("Could not write control fields!\n");
525         return -1;
526     }
527     
528     /*
529     if (v3_update_vmcs_host_state(core)) {
530         PrintError("Could not write host state\n");
531         return -1;
532     }
533     */
534
535     // reenable global interrupts for vm state initialization now
536     // that the vm state is initialized. If another VM kicks us off, 
537     // it'll update our vmx state so that we know to reload ourself
538     v3_enable_ints();
539
540     return 0;
541 }
542
543
544 static void __init_vmx_vmcs(void * arg) {
545     struct guest_info * core = arg;
546     struct vmx_data * vmx_state = NULL;
547     int vmx_ret = 0;
548     
549     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
550     memset(vmx_state, 0, sizeof(struct vmx_data));
551
552     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
553
554     PrintDebug("Allocating VMCS\n");
555     vmx_state->vmcs_ptr_phys = allocate_vmcs();
556
557     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
558
559     core->vmm_data = vmx_state;
560     vmx_state->state = VMX_UNLAUNCHED;
561
562     PrintDebug("Initializing VMCS (addr=%p)\n", core->vmm_data);
563     
564     // TODO: Fix vmcs fields so they're 32-bit
565
566     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
567     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
568
569     if (vmx_ret != VMX_SUCCESS) {
570         PrintError("VMCLEAR failed\n");
571         return; 
572     }
573
574     if (core->vm_info->vm_class == V3_PC_VM) {
575         PrintDebug("Initializing VMCS\n");
576         if (init_vmcs_bios(core, vmx_state) == -1) {
577             PrintError("Error initializing VMCS to BIOS state\n");
578             return;
579         }
580     } else {
581         PrintError("Invalid VM Class\n");
582         return;
583     }
584
585     PrintDebug("Serializing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
586     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
587
588     core->core_run_state = CORE_STOPPED;
589     return;
590 }
591
592
593
594 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
595     extern v3_cpu_arch_t v3_cpu_types[];
596
597     if (v3_cpu_types[V3_Get_CPU()] == V3_INVALID_CPU) {
598         int i = 0;
599
600         for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
601             if (v3_cpu_types[i] != V3_INVALID_CPU) {
602                 break;
603             }
604         }
605
606         if (i == V3_CONFIG_MAX_CPUS) {
607             PrintError("Could not find VALID CPU for VMX guest initialization\n");
608             return -1;
609         }
610
611         V3_Call_On_CPU(i, __init_vmx_vmcs, core);
612
613     } else {
614         __init_vmx_vmcs(core);
615     }
616
617     if (core->core_run_state != CORE_STOPPED) {
618         PrintError("Error initializing VMX Core\n");
619         return -1;
620     }
621
622     return 0;
623 }
624
625
626 int v3_deinit_vmx_vmcs(struct guest_info * core) {
627     struct vmx_data * vmx_state = core->vmm_data;
628
629     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
630     V3_FreePages(V3_PAddr(vmx_state->msr_area), 1);
631
632     V3_Free(vmx_state);
633
634     return 0;
635 }
636
637
638
639 #ifdef V3_CONFIG_CHECKPOINT
640 /* 
641  * JRL: This is broken
642  */
643 int v3_vmx_save_core(struct guest_info * core, void * ctx){
644     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
645
646     // note that the vmcs pointer is an HPA, but we need an HVA
647     if (v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE_4KB, 
648                       V3_VAddr((void*) (vmx_info->vmcs_ptr_phys))) ==-1) {
649         PrintError("Could not save vmcs data for VMX\n");
650         return -1;
651     }
652
653     return 0;
654 }
655
656 int v3_vmx_load_core(struct guest_info * core, void * ctx){
657     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
658     struct cr0_32 * shadow_cr0;
659     addr_t vmcs_page_paddr;  //HPA
660
661     vmcs_page_paddr = (addr_t) V3_AllocPages(1);
662     
663     if (!vmcs_page_paddr) { 
664         PrintError("Could not allocate space for a vmcs in VMX\n");
665         return -1;
666     }
667
668     if (v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, 
669                       V3_VAddr((void *)vmcs_page_paddr)) == -1) { 
670         PrintError("Could not load vmcs data for VMX\n");
671         return -1;
672     }
673
674     vmcs_clear(vmx_info->vmcs_ptr_phys);
675
676     // Probably need to delete the old one... 
677     V3_FreePages((void*)(vmx_info->vmcs_ptr_phys),1);
678
679     vmcs_load(vmcs_page_paddr);
680
681     v3_vmx_save_vmcs(core);
682
683     shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
684
685
686     /* Get the CPU mode to set the guest_ia32e entry ctrl */
687
688     if (core->shdw_pg_mode == SHADOW_PAGING) {
689         if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
690             if (v3_activate_shadow_pt(core) == -1) {
691                 PrintError("Failed to activate shadow page tables\n");
692                 return -1;
693             }
694         } else {
695             if (v3_activate_passthrough_pt(core) == -1) {
696                 PrintError("Failed to activate passthrough page tables\n");
697                 return -1;
698             }
699         }
700     }
701
702     return 0;
703 }
704 #endif
705
706
707 void v3_flush_vmx_vm_core(struct guest_info * core) {
708     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
709     vmcs_clear(vmx_info->vmcs_ptr_phys);
710     vmx_info->state = VMX_UNLAUNCHED;
711 }
712
713
714
715 static int update_irq_exit_state(struct guest_info * info) {
716     struct vmx_exit_idt_vec_info idt_vec_info;
717
718     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
719
720     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
721 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
722         V3_Print("Calling v3_injecting_intr\n");
723 #endif
724         info->intr_core_state.irq_started = 0;
725         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
726     }
727
728     return 0;
729 }
730
731 static int update_irq_entry_state(struct guest_info * info) {
732     struct vmx_exit_idt_vec_info idt_vec_info;
733     struct vmcs_interrupt_state intr_core_state;
734     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
735
736     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
737     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
738
739     /* Check for pending exceptions to inject */
740     if (v3_excp_pending(info)) {
741         struct vmx_entry_int_info int_info;
742         int_info.value = 0;
743
744         // In VMX, almost every exception is hardware
745         // Software exceptions are pretty much only for breakpoint or overflow
746         int_info.type = 3;
747         int_info.vector = v3_get_excp_number(info);
748
749         if (info->excp_state.excp_error_code_valid) {
750             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
751             int_info.error_code = 1;
752
753 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
754             V3_Print("Injecting exception %d with error code %x\n", 
755                     int_info.vector, info->excp_state.excp_error_code);
756 #endif
757         }
758
759         int_info.valid = 1;
760 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
761         V3_Print("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
762 #endif
763         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
764
765         v3_injecting_excp(info, int_info.vector);
766
767     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
768                (intr_core_state.val == 0)) {
769        
770         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
771
772 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
773             V3_Print("IRQ pending from previous injection\n");
774 #endif
775
776             // Copy the IDT vectoring info over to reinject the old interrupt
777             if (idt_vec_info.error_code == 1) {
778                 uint32_t err_code = 0;
779
780                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
781                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
782             }
783
784             idt_vec_info.undef = 0;
785             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
786
787         } else {
788             struct vmx_entry_int_info ent_int;
789             ent_int.value = 0;
790
791             switch (v3_intr_pending(info)) {
792                 case V3_EXTERNAL_IRQ: {
793                     info->intr_core_state.irq_vector = v3_get_intr(info); 
794                     ent_int.vector = info->intr_core_state.irq_vector;
795                     ent_int.type = 0;
796                     ent_int.error_code = 0;
797                     ent_int.valid = 1;
798
799 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
800                     V3_Print("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
801                                info->intr_core_state.irq_vector, 
802                                (uint32_t)info->num_exits, 
803                                (void *)(addr_t)info->rip);
804 #endif
805
806                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
807                     info->intr_core_state.irq_started = 1;
808
809                     break;
810                 }
811                 case V3_NMI:
812                     PrintDebug("Injecting NMI\n");
813
814                     ent_int.type = 2;
815                     ent_int.vector = 2;
816                     ent_int.valid = 1;
817                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
818
819                     break;
820                 case V3_SOFTWARE_INTR:
821                     PrintDebug("Injecting software interrupt\n");
822                     ent_int.type = 4;
823
824                     ent_int.valid = 1;
825                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
826
827                     break;
828                 case V3_VIRTUAL_IRQ:
829                     // Not sure what to do here, Intel doesn't have virtual IRQs
830                     // May be the same as external interrupts/IRQs
831
832                     break;
833                 case V3_INVALID_INTR:
834                 default:
835                     break;
836             }
837         }
838     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
839         // Enable INTR window exiting so we know when IF=1
840         uint32_t instr_len;
841
842         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
843
844 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
845         V3_Print("Enabling Interrupt-Window exiting: %d\n", instr_len);
846 #endif
847
848         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
849         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
850     }
851
852
853     return 0;
854 }
855
856
857
858 static struct vmx_exit_info exit_log[10];
859 static uint64_t rip_log[10];
860
861
862
863 static void print_exit_log(struct guest_info * info) {
864     int cnt = info->num_exits % 10;
865     int i = 0;
866     
867
868     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
869
870     for (i = 0; i < 10; i++) {
871         struct vmx_exit_info * tmp = &exit_log[cnt];
872
873         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
874         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
875         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
876         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
877         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
878         V3_Print("\tguest_linear_addr= %p\n", (void *)(addr_t)tmp->guest_linear_addr);
879         V3_Print("\tRIP = %p\n", (void *)rip_log[cnt]);
880
881
882         cnt--;
883
884         if (cnt == -1) {
885             cnt = 9;
886         }
887
888     }
889
890 }
891
892 int 
893 v3_vmx_config_tsc_virtualization(struct guest_info * info) {
894     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
895
896     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
897         if  (!vmx_info->pri_proc_ctrls.rdtsc_exit) {
898             vmx_info->pri_proc_ctrls.rdtsc_exit = 1;
899             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
900         }
901     } else {
902         sint64_t tsc_offset;
903         uint32_t tsc_offset_low, tsc_offset_high;
904
905         if  (vmx_info->pri_proc_ctrls.rdtsc_exit) {
906             vmx_info->pri_proc_ctrls.rdtsc_exit = 0;
907             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
908         }
909
910         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
911             tsc_offset = 0;
912         } else {
913             tsc_offset = v3_tsc_host_offset(&info->time_state);
914         }
915         tsc_offset_high = (uint32_t)(( tsc_offset >> 32) & 0xffffffff);
916         tsc_offset_low = (uint32_t)(tsc_offset & 0xffffffff);
917
918         check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
919         check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
920     }
921     return 0;
922 }
923
924 /* 
925  * CAUTION and DANGER!!! 
926  * 
927  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
928  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
929  * on its contents will cause things to break. The contents at the time of the exit WILL 
930  * change before the exit handler is executed.
931  */
932 int v3_vmx_enter(struct guest_info * info) {
933     int ret = 0;
934     struct vmx_exit_info exit_info;
935     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
936     uint64_t guest_cycles = 0;
937
938     // Conditionally yield the CPU if the timeslice has expired
939     v3_yield_cond(info);
940
941     // Update timer devices late after being in the VM so that as much 
942     // of the time in the VM is accounted for as possible. Also do it before
943     // updating IRQ entry state so that any interrupts the timers raise get 
944     // handled on the next VM entry.
945     v3_advance_time(info, NULL);
946     v3_update_timers(info);
947
948     // disable global interrupts for vm state transition
949     v3_disable_ints();
950
951     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
952         vmcs_clear(vmx_info->vmcs_ptr_phys);
953         vmcs_load(vmx_info->vmcs_ptr_phys);
954         vmx_info->state = VMX_UNLAUNCHED;
955     }
956
957     v3_vmx_restore_vmcs(info);
958
959
960 #ifdef V3_CONFIG_SYMCALL
961     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
962         update_irq_entry_state(info);
963     }
964 #else 
965     update_irq_entry_state(info);
966 #endif
967
968     {
969         addr_t guest_cr3;
970         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
971         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
972     }
973
974
975     // Perform last-minute time setup prior to entering the VM
976     v3_vmx_config_tsc_virtualization(info);
977
978     if (v3_update_vmcs_host_state(info)) {
979         v3_enable_ints();
980         PrintError("Could not write host state\n");
981         return -1;
982     }
983     
984     if (vmx_info->pin_ctrls.active_preempt_timer) {
985         /* Preemption timer is active */
986         uint32_t preempt_window = 0xffffffff;
987
988         if (info->timeouts.timeout_active) {
989             preempt_window = info->timeouts.next_timeout;
990         }
991         
992         check_vmcs_write(VMCS_PREEMPT_TIMER, preempt_window);
993     }
994    
995
996     {   
997         uint64_t entry_tsc = 0;
998         uint64_t exit_tsc = 0;
999
1000         if (vmx_info->state == VMX_UNLAUNCHED) {
1001             vmx_info->state = VMX_LAUNCHED;
1002             rdtscll(entry_tsc);
1003             ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
1004             rdtscll(exit_tsc);
1005
1006         } else {
1007             V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
1008             rdtscll(entry_tsc);
1009             ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
1010             rdtscll(exit_tsc);
1011         }
1012
1013         guest_cycles = exit_tsc - entry_tsc;    
1014     }
1015
1016     //  PrintDebug("VMX Exit: ret=%d\n", ret);
1017
1018     if (ret != VMX_SUCCESS) {
1019         uint32_t error = 0;
1020         vmcs_read(VMCS_INSTR_ERR, &error);
1021
1022         v3_enable_ints();
1023
1024         PrintError("VMENTRY Error: %d (launch_ret = %d)\n", error, ret);
1025         return -1;
1026     }
1027
1028
1029     info->num_exits++;
1030
1031     /* If we have the preemption time, then use it to get more accurate guest time */
1032     if (vmx_info->pin_ctrls.active_preempt_timer) {
1033         uint32_t cycles_left = 0;
1034         check_vmcs_read(VMCS_PREEMPT_TIMER, &(cycles_left));
1035
1036         if (info->timeouts.timeout_active) {
1037             guest_cycles = info->timeouts.next_timeout - cycles_left;
1038         } else {
1039             guest_cycles = 0xffffffff - cycles_left;
1040         }
1041     }
1042
1043     // Immediate exit from VM time bookkeeping
1044     v3_advance_time(info, &guest_cycles);
1045
1046     /* Update guest state */
1047     v3_vmx_save_vmcs(info);
1048
1049     // info->cpl = info->segments.cs.selector & 0x3;
1050
1051     info->mem_mode = v3_get_vm_mem_mode(info);
1052     info->cpu_mode = v3_get_vm_cpu_mode(info);
1053
1054
1055
1056     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
1057     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
1058     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
1059     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
1060     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
1061     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
1062     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
1063
1064     if (info->shdw_pg_mode == NESTED_PAGING) {
1065         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
1066     }
1067
1068     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
1069
1070     exit_log[info->num_exits % 10] = exit_info;
1071     rip_log[info->num_exits % 10] = get_addr_linear(info, info->rip, &(info->segments.cs));
1072
1073 #ifdef V3_CONFIG_SYMCALL
1074     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
1075         update_irq_exit_state(info);
1076     }
1077 #else
1078     update_irq_exit_state(info);
1079 #endif
1080
1081     if (exit_info.exit_reason == VMX_EXIT_INTR_WINDOW) {
1082         // This is a special case whose only job is to inject an interrupt
1083         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
1084         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
1085         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
1086
1087 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
1088        V3_Print("Interrupts available again! (RIP=%llx)\n", info->rip);
1089 #endif
1090     }
1091
1092     // reenable global interrupts after vm exit
1093     v3_enable_ints();
1094
1095     // Conditionally yield the CPU if the timeslice has expired
1096     v3_yield_cond(info);
1097     v3_advance_time(info, NULL);
1098     v3_update_timers(info);
1099
1100     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
1101         PrintError("Error in VMX exit handler (Exit reason=%x)\n", exit_info.exit_reason);
1102         return -1;
1103     }
1104
1105     if (info->timeouts.timeout_active) {
1106         /* Check to see if any timeouts have expired */
1107         v3_handle_timeouts(info, guest_cycles);
1108     }
1109
1110     return 0;
1111 }
1112
1113
1114 int v3_start_vmx_guest(struct guest_info * info) {
1115
1116     PrintDebug("Starting VMX core %u\n", info->vcpu_id);
1117
1118     if (info->vcpu_id == 0) {
1119         info->core_run_state = CORE_RUNNING;
1120     } else {
1121
1122         PrintDebug("VMX core %u: Waiting for core initialization\n", info->vcpu_id);
1123
1124         while (info->core_run_state == CORE_STOPPED) {
1125
1126             if (info->vm_info->run_state == VM_STOPPED) {
1127                 // The VM was stopped before this core was initialized. 
1128                 return 0;
1129             }
1130
1131             v3_yield(info);
1132             //PrintDebug("VMX core %u: still waiting for INIT\n",info->vcpu_id);
1133         }
1134         
1135         PrintDebug("VMX core %u initialized\n", info->vcpu_id);
1136
1137         // We'll be paranoid about race conditions here
1138         v3_wait_at_barrier(info);
1139     }
1140
1141
1142     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
1143                info->vcpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
1144                info->segments.cs.limit, (void *)(info->rip));
1145
1146
1147     PrintDebug("VMX core %u: Launching VMX VM on logical core %u\n", info->vcpu_id, info->pcpu_id);
1148
1149     v3_start_time(info);
1150
1151     while (1) {
1152
1153         if (info->vm_info->run_state == VM_STOPPED) {
1154             info->core_run_state = CORE_STOPPED;
1155             break;
1156         }
1157
1158         if (v3_vmx_enter(info) == -1) {
1159
1160             addr_t host_addr;
1161             addr_t linear_addr = 0;
1162             
1163             info->vm_info->run_state = VM_ERROR;
1164             
1165             V3_Print("VMX core %u: VMX ERROR!!\n", info->vcpu_id); 
1166             
1167             v3_print_guest_state(info);
1168             
1169             V3_Print("VMX core %u\n", info->vcpu_id); 
1170
1171             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1172             
1173             if (info->mem_mode == PHYSICAL_MEM) {
1174                 v3_gpa_to_hva(info, linear_addr, &host_addr);
1175             } else if (info->mem_mode == VIRTUAL_MEM) {
1176                 v3_gva_to_hva(info, linear_addr, &host_addr);
1177             }
1178             
1179             V3_Print("VMX core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1180             
1181             V3_Print("VMX core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1182             v3_dump_mem((uint8_t *)host_addr, 15);
1183             
1184             v3_print_stack(info);
1185
1186
1187             v3_print_vmcs();
1188             print_exit_log(info);
1189             return -1;
1190         }
1191
1192         v3_wait_at_barrier(info);
1193
1194
1195         if (info->vm_info->run_state == VM_STOPPED) {
1196             info->core_run_state = CORE_STOPPED;
1197             break;
1198         }
1199 /*
1200         if ((info->num_exits % 5000) == 0) {
1201             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
1202         }
1203 */
1204
1205     }
1206
1207     return 0;
1208 }
1209
1210
1211
1212
1213 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
1214 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
1215 #define CPUID_1_ECX_VTXFLAG 0x00000020
1216
1217 int v3_is_vmx_capable() {
1218     v3_msr_t feature_msr;
1219     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1220
1221     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
1222
1223     PrintDebug("ECX: 0x%x\n", ecx);
1224
1225     if (ecx & CPUID_1_ECX_VTXFLAG) {
1226         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
1227         
1228         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
1229
1230         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
1231             PrintDebug("VMX is locked -- enable in the BIOS\n");
1232             return 0;
1233         }
1234
1235     } else {
1236         PrintDebug("VMX not supported on this cpu\n");
1237         return 0;
1238     }
1239
1240     return 1;
1241 }
1242
1243
1244 int v3_reset_vmx_vm_core(struct guest_info * core, addr_t rip) {
1245     // init vmcs bios
1246     
1247     if ((core->shdw_pg_mode == NESTED_PAGING) && 
1248         (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
1249         // easy 
1250         core->rip = 0;
1251         core->segments.cs.selector = rip << 8;
1252         core->segments.cs.limit = 0xffff;
1253         core->segments.cs.base = rip << 12;
1254     } else {
1255         core->vm_regs.rdx = core->vcpu_id;
1256         core->vm_regs.rbx = rip;
1257     }
1258
1259     return 0;
1260 }
1261
1262
1263
1264 void v3_init_vmx_cpu(int cpu_id) {
1265     addr_t vmx_on_region = 0;
1266     extern v3_cpu_arch_t v3_mach_type;
1267     extern v3_cpu_arch_t v3_cpu_types[];
1268
1269     if (v3_mach_type == V3_INVALID_CPU) {
1270         if (v3_init_vmx_hw(&hw_info) == -1) {
1271             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
1272             return;
1273         }
1274     }
1275
1276     enable_vmx();
1277
1278
1279     // Setup VMXON Region
1280     vmx_on_region = allocate_vmcs();
1281
1282
1283     if (vmx_on(vmx_on_region) == VMX_SUCCESS) {
1284         V3_Print("VMX Enabled\n");
1285         host_vmcs_ptrs[cpu_id] = vmx_on_region;
1286     } else {
1287         V3_Print("VMX already enabled\n");
1288         V3_FreePages((void *)vmx_on_region, 1);
1289     }
1290
1291     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);    
1292
1293     {
1294         struct vmx_sec_proc_ctrls sec_proc_ctrls;
1295         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
1296         
1297         if (sec_proc_ctrls.enable_ept == 0) {
1298             V3_Print("VMX EPT (Nested) Paging not supported\n");
1299             v3_cpu_types[cpu_id] = V3_VMX_CPU;
1300         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
1301             V3_Print("VMX EPT (Nested) Paging supported\n");
1302             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
1303         } else {
1304             V3_Print("VMX EPT (Nested) Paging + Unrestricted guest supported\n");
1305             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
1306         }
1307     }
1308     
1309 }
1310
1311
1312 void v3_deinit_vmx_cpu(int cpu_id) {
1313     extern v3_cpu_arch_t v3_cpu_types[];
1314     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1315
1316     if (host_vmcs_ptrs[cpu_id] != 0) {
1317         V3_Print("Disabling VMX\n");
1318
1319         if (vmx_off() != VMX_SUCCESS) {
1320             PrintError("Error executing VMXOFF\n");
1321         }
1322
1323         V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
1324
1325         host_vmcs_ptrs[cpu_id] = 0;
1326     }
1327 }