Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


vmx fixes
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_barrier.h>
36 #include <palacios/vmm_timeout.h>
37
38 #ifdef V3_CONFIG_CHECKPOINT
39 #include <palacios/vmm_checkpoint.h>
40 #endif
41
42 #include <palacios/vmx_ept.h>
43 #include <palacios/vmx_assist.h>
44 #include <palacios/vmx_hw_info.h>
45
46 #ifndef V3_CONFIG_DEBUG_VMX
47 #undef PrintDebug
48 #define PrintDebug(fmt, args...)
49 #endif
50
51
52 /* These fields contain the hardware feature sets supported by the local CPU */
53 static struct vmx_hw_info hw_info;
54
55 extern v3_cpu_arch_t v3_mach_type;
56
57 static addr_t host_vmcs_ptrs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
58
59 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
60 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
61
62 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
63     int ret = 0;
64
65     ret = vmcs_write(field, val);
66
67     if (ret != VMX_SUCCESS) {
68         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
69         return 1;
70     }
71
72
73     
74
75     return 0;
76 }
77
78 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
79     int ret = 0;
80
81     ret = vmcs_read(field, val);
82
83     if (ret != VMX_SUCCESS) {
84         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
85     }
86
87     return ret;
88 }
89
90
91
92
93 static addr_t allocate_vmcs() {
94     struct vmcs_data * vmcs_page = NULL;
95
96     PrintDebug("Allocating page\n");
97
98     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
99     memset(vmcs_page, 0, 4096);
100
101     vmcs_page->revision = hw_info.basic_info.revision;
102     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
103
104     return (addr_t)V3_PAddr((void *)vmcs_page);
105 }
106
107
108 #if 0
109 static int debug_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * src, void * priv_data) {
110     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
111     V3_Print("\n\nEFER READ (val = %p)\n", (void *)efer->value);
112     
113     v3_print_guest_state(core);
114     v3_print_vmcs();
115
116
117     src->value = efer->value;
118     return 0;
119 }
120
121 static int debug_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
122     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
123     V3_Print("\n\nEFER WRITE (old_val = %p) (new_val = %p)\n", (void *)efer->value, (void *)src.value);
124     
125     v3_print_guest_state(core);
126     v3_print_vmcs();
127
128     efer->value = src.value;
129
130     return 0;
131 }
132 #endif
133
134
135 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
136     int vmx_ret = 0;
137
138     /* Get Available features */
139     struct vmx_pin_ctrls avail_pin_ctrls;
140     avail_pin_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.pin_ctrls));
141     /* ** */
142
143
144     // disable global interrupts for vm state initialization
145     v3_disable_ints();
146
147     PrintDebug("Loading VMCS\n");
148     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
149     vmx_state->state = VMX_UNLAUNCHED;
150
151     if (vmx_ret != VMX_SUCCESS) {
152         PrintError("VMPTRLD failed\n");
153         return -1;
154     }
155
156
157     /*** Setup default state from HW ***/
158
159     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
160     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
161     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
162     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
163     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
164
165     /* Print Control MSRs */
166     V3_Print("CR0 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr0.req_val, (void *)(addr_t)hw_info.cr0.req_mask);
167     V3_Print("CR4 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr4.req_val, (void *)(addr_t)hw_info.cr4.req_mask);
168
169
170
171     /******* Setup Host State **********/
172
173     /* Cache GDTR, IDTR, and TR in host struct */
174
175
176     /********** Setup VMX Control Fields ***********/
177
178     /* Add external interrupts, NMI exiting, and virtual NMI */
179     vmx_state->pin_ctrls.nmi_exit = 1;
180     vmx_state->pin_ctrls.ext_int_exit = 1;
181
182
183     /* We enable the preemption timer by default to measure accurate guest time */
184     if (avail_pin_ctrls.active_preempt_timer) {
185         V3_Print("VMX Preemption Timer is available\n");
186         vmx_state->pin_ctrls.active_preempt_timer = 1;
187         vmx_state->exit_ctrls.save_preempt_timer = 1;
188     }
189
190     vmx_state->pri_proc_ctrls.hlt_exit = 1;
191
192
193     vmx_state->pri_proc_ctrls.pause_exit = 0;
194     vmx_state->pri_proc_ctrls.tsc_offset = 1;
195 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
196     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
197 #endif
198
199     /* Setup IO map */
200     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
201     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
202     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
203             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
204
205
206     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
207     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
208
209
210
211 #ifdef __V3_64BIT__
212     // Ensure host runs in 64-bit mode at each VM EXIT
213     vmx_state->exit_ctrls.host_64_on = 1;
214 #endif
215
216
217
218     // Restore host's EFER register on each VM EXIT
219     vmx_state->exit_ctrls.ld_efer = 1;
220
221     // Save/restore guest's EFER register to/from VMCS on VM EXIT/ENTRY
222     vmx_state->exit_ctrls.save_efer = 1;
223     vmx_state->entry_ctrls.ld_efer  = 1;
224
225     vmx_state->exit_ctrls.save_pat = 1;
226     vmx_state->exit_ctrls.ld_pat = 1;
227     vmx_state->entry_ctrls.ld_pat = 1;
228
229     /* Temporary GPF trap */
230     //  vmx_state->excp_bmap.gp = 1;
231
232     // Setup Guests initial PAT field
233     vmx_ret |= check_vmcs_write(VMCS_GUEST_PAT, 0x0007040600070406LL);
234
235     /* Setup paging */
236     if (core->shdw_pg_mode == SHADOW_PAGING) {
237         PrintDebug("Creating initial shadow page table\n");
238
239         if (v3_init_passthrough_pts(core) == -1) {
240             PrintError("Could not initialize passthrough page tables\n");
241             return -1;
242         }
243         
244 #define CR0_PE 0x00000001
245 #define CR0_PG 0x80000000
246 #define CR0_WP 0x00010000 // To ensure mem hooks work
247 #define CR0_NE 0x00000020
248         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
249
250
251         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
252         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
253
254         core->ctrl_regs.cr3 = core->direct_map_pt;
255
256         // vmx_state->pinbased_ctrls |= NMI_EXIT;
257
258         /* Add CR exits */
259         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
260         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
261         
262         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
263         
264         /* Add page fault exits */
265         vmx_state->excp_bmap.pf = 1;
266
267         // Setup VMX Assist
268         v3_vmxassist_init(core, vmx_state);
269
270         // Hook all accesses to EFER register
271         v3_hook_msr(core->vm_info, EFER_MSR, 
272                     &v3_handle_efer_read,
273                     &v3_handle_efer_write, 
274                     core);
275
276     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
277                (v3_mach_type == V3_VMX_EPT_CPU)) {
278
279 #define CR0_PE 0x00000001
280 #define CR0_PG 0x80000000
281 #define CR0_WP 0x00010000 // To ensure mem hooks work
282 #define CR0_NE 0x00000020
283         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
284
285         // vmx_state->pinbased_ctrls |= NMI_EXIT;
286
287         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
288         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
289         
290         /* Disable CR exits */
291         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
292         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
293
294         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
295
296         /* Add page fault exits */
297         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
298         
299         // Setup VMX Assist
300         v3_vmxassist_init(core, vmx_state);
301
302         /* Enable EPT */
303         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
304         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
305
306
307
308         if (v3_init_ept(core, &hw_info) == -1) {
309             PrintError("Error initializing EPT\n");
310             return -1;
311         }
312
313         // Hook all accesses to EFER register
314         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
315
316     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
317                (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
318         int i = 0;
319         // For now we will assume that unrestricted guest mode is assured w/ EPT
320
321
322         core->vm_regs.rsp = 0x00;
323         core->rip = 0xfff0;
324         core->vm_regs.rdx = 0x00000f00;
325         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
326         core->ctrl_regs.cr0 = 0x60010030; 
327         core->ctrl_regs.cr4 = 0x00002010; // Enable VMX and PSE flag
328         
329
330         core->segments.cs.selector = 0xf000;
331         core->segments.cs.limit = 0xffff;
332         core->segments.cs.base = 0x0000000f0000LL;
333
334         // (raw attributes = 0xf3)
335         core->segments.cs.type = 0xb;
336         core->segments.cs.system = 0x1;
337         core->segments.cs.dpl = 0x0;
338         core->segments.cs.present = 1;
339
340
341
342         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
343                                           &(core->segments.es), &(core->segments.fs), 
344                                           &(core->segments.gs), NULL};
345
346         for ( i = 0; segregs[i] != NULL; i++) {
347             struct v3_segment * seg = segregs[i];
348         
349             seg->selector = 0x0000;
350             //    seg->base = seg->selector << 4;
351             seg->base = 0x00000000;
352             seg->limit = 0xffff;
353
354
355             seg->type = 0x3;
356             seg->system = 0x1;
357             seg->dpl = 0x0;
358             seg->present = 1;
359             //    seg->granularity = 1;
360
361         }
362
363
364         core->segments.gdtr.limit = 0x0000ffff;
365         core->segments.gdtr.base = 0x0000000000000000LL;
366
367         core->segments.idtr.limit = 0x0000ffff;
368         core->segments.idtr.base = 0x0000000000000000LL;
369
370         core->segments.ldtr.selector = 0x0000;
371         core->segments.ldtr.limit = 0x0000ffff;
372         core->segments.ldtr.base = 0x0000000000000000LL;
373         core->segments.ldtr.type = 0x2;
374         core->segments.ldtr.present = 1;
375
376         core->segments.tr.selector = 0x0000;
377         core->segments.tr.limit = 0x0000ffff;
378         core->segments.tr.base = 0x0000000000000000LL;
379         core->segments.tr.type = 0xb;
380         core->segments.tr.present = 1;
381
382         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
383         core->dbg_regs.dr7 = 0x0000000000000400LL;
384
385         /* Enable EPT */
386         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
387         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
388         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
389
390
391         /* Disable shadow paging stuff */
392         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
393         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
394
395         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
396
397
398         // Cause VM_EXIT whenever the CR4.VMXE bit is set
399         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
400 #define CR0_NE 0x00000020
401         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, CR0_NE);
402         ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->ne = 1;
403
404         if (v3_init_ept(core, &hw_info) == -1) {
405             PrintError("Error initializing EPT\n");
406             return -1;
407         }
408
409         // Hook all accesses to EFER register
410         //      v3_hook_msr(core->vm_info, EFER_MSR, &debug_efer_read, &debug_efer_write, core);
411         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
412     } else {
413         PrintError("Invalid Virtual paging mode (pg_mode=%d) (mach_type=%d)\n", core->shdw_pg_mode, v3_mach_type);
414         return -1;
415     }
416
417
418     // hook vmx msrs
419
420     // Setup SYSCALL/SYSENTER MSRs in load/store area
421     
422     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
423     {
424
425         struct vmcs_msr_save_area * msr_entries = NULL;
426         int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
427         int msr_ret = 0;
428
429         V3_Print("Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
430
431         if (max_msrs < 4) {
432             PrintError("Max MSR cache size is too small (%d)\n", max_msrs);
433             return -1;
434         }
435
436         vmx_state->msr_area_paddr = (addr_t)V3_AllocPages(1);
437         
438         if (vmx_state->msr_area_paddr == (addr_t)NULL) {
439             PrintError("could not allocate msr load/store area\n");
440             return -1;
441         }
442
443         msr_entries = (struct vmcs_msr_save_area *)V3_VAddr((void *)(vmx_state->msr_area_paddr));
444         vmx_state->msr_area = msr_entries; // cache in vmx_info
445
446         memset(msr_entries, 0, PAGE_SIZE);
447
448         msr_entries->guest_star.index = IA32_STAR_MSR;
449         msr_entries->guest_lstar.index = IA32_LSTAR_MSR;
450         msr_entries->guest_fmask.index = IA32_FMASK_MSR;
451         msr_entries->guest_kern_gs.index = IA32_KERN_GS_BASE_MSR;
452
453         msr_entries->host_star.index = IA32_STAR_MSR;
454         msr_entries->host_lstar.index = IA32_LSTAR_MSR;
455         msr_entries->host_fmask.index = IA32_FMASK_MSR;
456         msr_entries->host_kern_gs.index = IA32_KERN_GS_BASE_MSR;
457
458         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
459         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
460         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
461
462         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
463         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
464         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->host_msrs));
465
466
467         msr_ret |= v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
468         msr_ret |= v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
469         msr_ret |= v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
470         msr_ret |= v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
471
472
473         // IMPORTANT: These MSRs appear to be cached by the hardware....
474         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
475         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
476         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
477
478         msr_ret |= v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
479         msr_ret |= v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
480
481         msr_ret |= v3_hook_msr(core->vm_info, IA32_PAT_MSR, NULL, NULL, NULL);
482
483         // Not sure what to do about this... Does not appear to be an explicit hardware cache version...
484         msr_ret |= v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
485
486         if (msr_ret != 0) {
487             PrintError("Error configuring MSR save/restore area\n");
488             return -1;
489         }
490
491
492     }    
493
494     /* Sanity check ctrl/reg fields against hw_defaults */
495
496
497
498
499     /*** Write all the info to the VMCS ***/
500   
501     /*
502     {
503         // IS THIS NECESSARY???
504 #define DEBUGCTL_MSR 0x1d9
505         struct v3_msr tmp_msr;
506         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
507         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
508         core->dbg_regs.dr7 = 0x400;
509     }
510     */
511
512 #ifdef __V3_64BIT__
513     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
514 #else
515     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
516     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
517 #endif
518
519
520  
521
522     if (v3_update_vmcs_ctrl_fields(core)) {
523         PrintError("Could not write control fields!\n");
524         return -1;
525     }
526     
527     /*
528     if (v3_update_vmcs_host_state(core)) {
529         PrintError("Could not write host state\n");
530         return -1;
531     }
532     */
533
534     // reenable global interrupts for vm state initialization now
535     // that the vm state is initialized. If another VM kicks us off, 
536     // it'll update our vmx state so that we know to reload ourself
537     v3_enable_ints();
538
539     return 0;
540 }
541
542
543 static void __init_vmx_vmcs(void * arg) {
544     struct guest_info * core = arg;
545     struct vmx_data * vmx_state = NULL;
546     int vmx_ret = 0;
547     
548     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
549     memset(vmx_state, 0, sizeof(struct vmx_data));
550
551     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
552
553     PrintDebug("Allocating VMCS\n");
554     vmx_state->vmcs_ptr_phys = allocate_vmcs();
555
556     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
557
558     core->vmm_data = vmx_state;
559     vmx_state->state = VMX_UNLAUNCHED;
560
561     PrintDebug("Initializing VMCS (addr=%p)\n", core->vmm_data);
562     
563     // TODO: Fix vmcs fields so they're 32-bit
564
565     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
566     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
567
568     if (vmx_ret != VMX_SUCCESS) {
569         PrintError("VMCLEAR failed\n");
570         return; 
571     }
572
573     if (core->vm_info->vm_class == V3_PC_VM) {
574         PrintDebug("Initializing VMCS\n");
575         if (init_vmcs_bios(core, vmx_state) == -1) {
576             PrintError("Error initializing VMCS to BIOS state\n");
577             return;
578         }
579     } else {
580         PrintError("Invalid VM Class\n");
581         return;
582     }
583
584     PrintDebug("Serializing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
585     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
586
587     core->core_run_state = CORE_STOPPED;
588     return;
589 }
590
591
592
593 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
594     extern v3_cpu_arch_t v3_cpu_types[];
595
596     if (v3_cpu_types[V3_Get_CPU()] == V3_INVALID_CPU) {
597         int i = 0;
598
599         for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
600             if (v3_cpu_types[i] != V3_INVALID_CPU) {
601                 break;
602             }
603         }
604
605         if (i == V3_CONFIG_MAX_CPUS) {
606             PrintError("Could not find VALID CPU for VMX guest initialization\n");
607             return -1;
608         }
609
610         V3_Call_On_CPU(i, __init_vmx_vmcs, core);
611
612     } else {
613         __init_vmx_vmcs(core);
614     }
615
616     if (core->core_run_state != CORE_STOPPED) {
617         PrintError("Error initializing VMX Core\n");
618         return -1;
619     }
620
621     return 0;
622 }
623
624
625 int v3_deinit_vmx_vmcs(struct guest_info * core) {
626     struct vmx_data * vmx_state = core->vmm_data;
627
628     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
629     V3_FreePages(V3_PAddr(vmx_state->msr_area), 1);
630
631     V3_Free(vmx_state);
632
633     return 0;
634 }
635
636
637
638 #ifdef V3_CONFIG_CHECKPOINT
639 /* 
640  * JRL: This is broken
641  */
642 int v3_vmx_save_core(struct guest_info * core, void * ctx){
643     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
644
645     // note that the vmcs pointer is an HPA, but we need an HVA
646     if (v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE_4KB, 
647                       V3_VAddr((void*) (vmx_info->vmcs_ptr_phys))) ==-1) {
648         PrintError("Could not save vmcs data for VMX\n");
649         return -1;
650     }
651
652     return 0;
653 }
654
655 int v3_vmx_load_core(struct guest_info * core, void * ctx){
656     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
657     struct cr0_32 * shadow_cr0;
658     addr_t vmcs_page_paddr;  //HPA
659
660     vmcs_page_paddr = (addr_t) V3_AllocPages(1);
661     
662     if (!vmcs_page_paddr) { 
663         PrintError("Could not allocate space for a vmcs in VMX\n");
664         return -1;
665     }
666
667     if (v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, 
668                       V3_VAddr((void *)vmcs_page_paddr)) == -1) { 
669         PrintError("Could not load vmcs data for VMX\n");
670         return -1;
671     }
672
673     vmcs_clear(vmx_info->vmcs_ptr_phys);
674
675     // Probably need to delete the old one... 
676     V3_FreePages((void*)(vmx_info->vmcs_ptr_phys),1);
677
678     vmcs_load(vmcs_page_paddr);
679
680     v3_vmx_save_vmcs(core);
681
682     shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
683
684
685     /* Get the CPU mode to set the guest_ia32e entry ctrl */
686
687     if (core->shdw_pg_mode == SHADOW_PAGING) {
688         if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
689             if (v3_activate_shadow_pt(core) == -1) {
690                 PrintError("Failed to activate shadow page tables\n");
691                 return -1;
692             }
693         } else {
694             if (v3_activate_passthrough_pt(core) == -1) {
695                 PrintError("Failed to activate passthrough page tables\n");
696                 return -1;
697             }
698         }
699     }
700
701     return 0;
702 }
703 #endif
704
705
706 void v3_flush_vmx_vm_core(struct guest_info * core) {
707     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
708     vmcs_clear(vmx_info->vmcs_ptr_phys);
709     vmx_info->state = VMX_UNLAUNCHED;
710 }
711
712
713
714 static int update_irq_exit_state(struct guest_info * info) {
715     struct vmx_exit_idt_vec_info idt_vec_info;
716
717     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
718
719     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
720 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
721         V3_Print("Calling v3_injecting_intr\n");
722 #endif
723         info->intr_core_state.irq_started = 0;
724         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
725     }
726
727     return 0;
728 }
729
730 static int update_irq_entry_state(struct guest_info * info) {
731     struct vmx_exit_idt_vec_info idt_vec_info;
732     struct vmcs_interrupt_state intr_core_state;
733     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
734
735     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
736     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
737
738     /* Check for pending exceptions to inject */
739     if (v3_excp_pending(info)) {
740         struct vmx_entry_int_info int_info;
741         int_info.value = 0;
742
743         // In VMX, almost every exception is hardware
744         // Software exceptions are pretty much only for breakpoint or overflow
745         int_info.type = 3;
746         int_info.vector = v3_get_excp_number(info);
747
748         if (info->excp_state.excp_error_code_valid) {
749             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
750             int_info.error_code = 1;
751
752 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
753             V3_Print("Injecting exception %d with error code %x\n", 
754                     int_info.vector, info->excp_state.excp_error_code);
755 #endif
756         }
757
758         int_info.valid = 1;
759 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
760         V3_Print("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
761 #endif
762         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
763
764         v3_injecting_excp(info, int_info.vector);
765
766     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
767                (intr_core_state.val == 0)) {
768        
769         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
770
771 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
772             V3_Print("IRQ pending from previous injection\n");
773 #endif
774
775             // Copy the IDT vectoring info over to reinject the old interrupt
776             if (idt_vec_info.error_code == 1) {
777                 uint32_t err_code = 0;
778
779                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
780                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
781             }
782
783             idt_vec_info.undef = 0;
784             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
785
786         } else {
787             struct vmx_entry_int_info ent_int;
788             ent_int.value = 0;
789
790             switch (v3_intr_pending(info)) {
791                 case V3_EXTERNAL_IRQ: {
792                     info->intr_core_state.irq_vector = v3_get_intr(info); 
793                     ent_int.vector = info->intr_core_state.irq_vector;
794                     ent_int.type = 0;
795                     ent_int.error_code = 0;
796                     ent_int.valid = 1;
797
798 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
799                     V3_Print("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
800                                info->intr_core_state.irq_vector, 
801                                (uint32_t)info->num_exits, 
802                                (void *)(addr_t)info->rip);
803 #endif
804
805                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
806                     info->intr_core_state.irq_started = 1;
807
808                     break;
809                 }
810                 case V3_NMI:
811                     PrintDebug("Injecting NMI\n");
812
813                     ent_int.type = 2;
814                     ent_int.vector = 2;
815                     ent_int.valid = 1;
816                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
817
818                     break;
819                 case V3_SOFTWARE_INTR:
820                     PrintDebug("Injecting software interrupt\n");
821                     ent_int.type = 4;
822
823                     ent_int.valid = 1;
824                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
825
826                     break;
827                 case V3_VIRTUAL_IRQ:
828                     // Not sure what to do here, Intel doesn't have virtual IRQs
829                     // May be the same as external interrupts/IRQs
830
831                     break;
832                 case V3_INVALID_INTR:
833                 default:
834                     break;
835             }
836         }
837     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
838         // Enable INTR window exiting so we know when IF=1
839         uint32_t instr_len;
840
841         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
842
843 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
844         V3_Print("Enabling Interrupt-Window exiting: %d\n", instr_len);
845 #endif
846
847         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
848         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
849     }
850
851
852     return 0;
853 }
854
855
856
857 static struct vmx_exit_info exit_log[10];
858 static uint64_t rip_log[10];
859
860
861
862 static void print_exit_log(struct guest_info * info) {
863     int cnt = info->num_exits % 10;
864     int i = 0;
865     
866
867     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
868
869     for (i = 0; i < 10; i++) {
870         struct vmx_exit_info * tmp = &exit_log[cnt];
871
872         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
873         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
874         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
875         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
876         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
877         V3_Print("\tguest_linear_addr= %p\n", (void *)(addr_t)tmp->guest_linear_addr);
878         V3_Print("\tRIP = %p\n", (void *)rip_log[cnt]);
879
880
881         cnt--;
882
883         if (cnt == -1) {
884             cnt = 9;
885         }
886
887     }
888
889 }
890
891 int 
892 v3_vmx_config_tsc_virtualization(struct guest_info * info) {
893     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
894
895     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
896         if  (!vmx_info->pri_proc_ctrls.rdtsc_exit) {
897             vmx_info->pri_proc_ctrls.rdtsc_exit = 1;
898             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
899         }
900     } else {
901         sint64_t tsc_offset;
902         uint32_t tsc_offset_low, tsc_offset_high;
903
904         if  (vmx_info->pri_proc_ctrls.rdtsc_exit) {
905             vmx_info->pri_proc_ctrls.rdtsc_exit = 0;
906             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
907         }
908
909         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
910             tsc_offset = 0;
911         } else {
912             tsc_offset = v3_tsc_host_offset(&info->time_state);
913         }
914         tsc_offset_high = (uint32_t)(( tsc_offset >> 32) & 0xffffffff);
915         tsc_offset_low = (uint32_t)(tsc_offset & 0xffffffff);
916
917         check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
918         check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
919     }
920     return 0;
921 }
922
923 /* 
924  * CAUTION and DANGER!!! 
925  * 
926  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
927  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
928  * on its contents will cause things to break. The contents at the time of the exit WILL 
929  * change before the exit handler is executed.
930  */
931 int v3_vmx_enter(struct guest_info * info) {
932     int ret = 0;
933     struct vmx_exit_info exit_info;
934     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
935     uint64_t guest_cycles = 0;
936
937     // Conditionally yield the CPU if the timeslice has expired
938     v3_yield_cond(info);
939
940     // Update timer devices late after being in the VM so that as much 
941     // of the time in the VM is accounted for as possible. Also do it before
942     // updating IRQ entry state so that any interrupts the timers raise get 
943     // handled on the next VM entry.
944     v3_advance_time(info, NULL);
945     v3_update_timers(info);
946
947     // disable global interrupts for vm state transition
948     v3_disable_ints();
949
950     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
951         vmcs_clear(vmx_info->vmcs_ptr_phys);
952         vmcs_load(vmx_info->vmcs_ptr_phys);
953         vmx_info->state = VMX_UNLAUNCHED;
954     }
955
956     v3_vmx_restore_vmcs(info);
957
958
959 #ifdef V3_CONFIG_SYMCALL
960     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
961         update_irq_entry_state(info);
962     }
963 #else 
964     update_irq_entry_state(info);
965 #endif
966
967     {
968         addr_t guest_cr3;
969         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
970         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
971     }
972
973
974     // Perform last-minute time setup prior to entering the VM
975     v3_vmx_config_tsc_virtualization(info);
976
977     if (v3_update_vmcs_host_state(info)) {
978         v3_enable_ints();
979         PrintError("Could not write host state\n");
980         return -1;
981     }
982     
983     if (vmx_info->pin_ctrls.active_preempt_timer) {
984         /* Preemption timer is active */
985         uint32_t preempt_window = 0xffffffff;
986
987         if (info->timeouts.timeout_active) {
988             preempt_window = info->timeouts.next_timeout;
989         }
990         
991         check_vmcs_write(VMCS_PREEMPT_TIMER, preempt_window);
992     }
993    
994
995     {   
996         uint64_t entry_tsc = 0;
997         uint64_t exit_tsc = 0;
998
999         if (vmx_info->state == VMX_UNLAUNCHED) {
1000             vmx_info->state = VMX_LAUNCHED;
1001             rdtscll(entry_tsc);
1002             ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
1003             rdtscll(exit_tsc);
1004
1005         } else {
1006             V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
1007             rdtscll(entry_tsc);
1008             ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
1009             rdtscll(exit_tsc);
1010         }
1011
1012         guest_cycles = exit_tsc - entry_tsc;    
1013     }
1014
1015     //  PrintDebug("VMX Exit: ret=%d\n", ret);
1016
1017     if (ret != VMX_SUCCESS) {
1018         uint32_t error = 0;
1019         vmcs_read(VMCS_INSTR_ERR, &error);
1020
1021         v3_enable_ints();
1022
1023         PrintError("VMENTRY Error: %d (launch_ret = %d)\n", error, ret);
1024         return -1;
1025     }
1026
1027
1028     info->num_exits++;
1029
1030     /* If we have the preemption time, then use it to get more accurate guest time */
1031     if (vmx_info->pin_ctrls.active_preempt_timer) {
1032         uint32_t cycles_left = 0;
1033         check_vmcs_read(VMCS_PREEMPT_TIMER, &(cycles_left));
1034
1035         if (info->timeouts.timeout_active) {
1036             guest_cycles = info->timeouts.next_timeout - cycles_left;
1037         } else {
1038             guest_cycles = 0xffffffff - cycles_left;
1039         }
1040     }
1041
1042     // Immediate exit from VM time bookkeeping
1043     v3_advance_time(info, &guest_cycles);
1044
1045     /* Update guest state */
1046     v3_vmx_save_vmcs(info);
1047
1048     // info->cpl = info->segments.cs.selector & 0x3;
1049
1050     info->mem_mode = v3_get_vm_mem_mode(info);
1051     info->cpu_mode = v3_get_vm_cpu_mode(info);
1052
1053
1054
1055     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
1056     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
1057     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
1058     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
1059     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
1060     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
1061     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
1062
1063     if (info->shdw_pg_mode == NESTED_PAGING) {
1064         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
1065     }
1066
1067     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
1068
1069     exit_log[info->num_exits % 10] = exit_info;
1070     rip_log[info->num_exits % 10] = get_addr_linear(info, info->rip, &(info->segments.cs));
1071
1072 #ifdef V3_CONFIG_SYMCALL
1073     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
1074         update_irq_exit_state(info);
1075     }
1076 #else
1077     update_irq_exit_state(info);
1078 #endif
1079
1080     if (exit_info.exit_reason == VMX_EXIT_INTR_WINDOW) {
1081         // This is a special case whose only job is to inject an interrupt
1082         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
1083         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
1084         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
1085
1086 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
1087        V3_Print("Interrupts available again! (RIP=%llx)\n", info->rip);
1088 #endif
1089     }
1090
1091     // reenable global interrupts after vm exit
1092     v3_enable_ints();
1093
1094     // Conditionally yield the CPU if the timeslice has expired
1095     v3_yield_cond(info);
1096     v3_advance_time(info, NULL);
1097     v3_update_timers(info);
1098
1099     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
1100         PrintError("Error in VMX exit handler (Exit reason=%x)\n", exit_info.exit_reason);
1101         return -1;
1102     }
1103
1104     if (info->timeouts.timeout_active) {
1105         /* Check to see if any timeouts have expired */
1106         v3_handle_timeouts(info, guest_cycles);
1107     }
1108
1109     return 0;
1110 }
1111
1112
1113 int v3_start_vmx_guest(struct guest_info * info) {
1114
1115     PrintDebug("Starting VMX core %u\n", info->vcpu_id);
1116
1117     if (info->vcpu_id == 0) {
1118         info->core_run_state = CORE_RUNNING;
1119     } else {
1120
1121         PrintDebug("VMX core %u: Waiting for core initialization\n", info->vcpu_id);
1122
1123         while (info->core_run_state == CORE_STOPPED) {
1124
1125             if (info->vm_info->run_state == VM_STOPPED) {
1126                 // The VM was stopped before this core was initialized. 
1127                 return 0;
1128             }
1129
1130             v3_yield(info);
1131             //PrintDebug("VMX core %u: still waiting for INIT\n",info->vcpu_id);
1132         }
1133         
1134         PrintDebug("VMX core %u initialized\n", info->vcpu_id);
1135
1136         // We'll be paranoid about race conditions here
1137         v3_wait_at_barrier(info);
1138     }
1139
1140
1141     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
1142                info->vcpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
1143                info->segments.cs.limit, (void *)(info->rip));
1144
1145
1146     PrintDebug("VMX core %u: Launching VMX VM on logical core %u\n", info->vcpu_id, info->pcpu_id);
1147
1148     v3_start_time(info);
1149
1150     while (1) {
1151
1152         if (info->vm_info->run_state == VM_STOPPED) {
1153             info->core_run_state = CORE_STOPPED;
1154             break;
1155         }
1156
1157         if (v3_vmx_enter(info) == -1) {
1158
1159             addr_t host_addr;
1160             addr_t linear_addr = 0;
1161             
1162             info->vm_info->run_state = VM_ERROR;
1163             
1164             V3_Print("VMX core %u: VMX ERROR!!\n", info->vcpu_id); 
1165             
1166             v3_print_guest_state(info);
1167             
1168             V3_Print("VMX core %u\n", info->vcpu_id); 
1169
1170             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1171             
1172             if (info->mem_mode == PHYSICAL_MEM) {
1173                 v3_gpa_to_hva(info, linear_addr, &host_addr);
1174             } else if (info->mem_mode == VIRTUAL_MEM) {
1175                 v3_gva_to_hva(info, linear_addr, &host_addr);
1176             }
1177             
1178             V3_Print("VMX core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1179             
1180             V3_Print("VMX core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1181             v3_dump_mem((uint8_t *)host_addr, 15);
1182             
1183             v3_print_stack(info);
1184
1185
1186             v3_print_vmcs();
1187             print_exit_log(info);
1188             return -1;
1189         }
1190
1191         v3_wait_at_barrier(info);
1192
1193
1194         if (info->vm_info->run_state == VM_STOPPED) {
1195             info->core_run_state = CORE_STOPPED;
1196             break;
1197         }
1198 /*
1199         if ((info->num_exits % 5000) == 0) {
1200             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
1201         }
1202 */
1203
1204     }
1205
1206     return 0;
1207 }
1208
1209
1210
1211
1212 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
1213 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
1214 #define CPUID_1_ECX_VTXFLAG 0x00000020
1215
1216 int v3_is_vmx_capable() {
1217     v3_msr_t feature_msr;
1218     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1219
1220     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
1221
1222     PrintDebug("ECX: 0x%x\n", ecx);
1223
1224     if (ecx & CPUID_1_ECX_VTXFLAG) {
1225         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
1226         
1227         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
1228
1229         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
1230             PrintDebug("VMX is locked -- enable in the BIOS\n");
1231             return 0;
1232         }
1233
1234     } else {
1235         PrintDebug("VMX not supported on this cpu\n");
1236         return 0;
1237     }
1238
1239     return 1;
1240 }
1241
1242
1243 int v3_reset_vmx_vm_core(struct guest_info * core, addr_t rip) {
1244     // init vmcs bios
1245     
1246     if ((core->shdw_pg_mode == NESTED_PAGING) && 
1247         (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
1248         // easy 
1249         core->rip = 0;
1250         core->segments.cs.selector = rip << 8;
1251         core->segments.cs.limit = 0xffff;
1252         core->segments.cs.base = rip << 12;
1253     } else {
1254         core->vm_regs.rdx = core->vcpu_id;
1255         core->vm_regs.rbx = rip;
1256     }
1257
1258     return 0;
1259 }
1260
1261
1262
1263 void v3_init_vmx_cpu(int cpu_id) {
1264     addr_t vmx_on_region = 0;
1265     extern v3_cpu_arch_t v3_mach_type;
1266     extern v3_cpu_arch_t v3_cpu_types[];
1267
1268     if (v3_mach_type == V3_INVALID_CPU) {
1269         if (v3_init_vmx_hw(&hw_info) == -1) {
1270             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
1271             return;
1272         }
1273     }
1274
1275     enable_vmx();
1276
1277
1278     // Setup VMXON Region
1279     vmx_on_region = allocate_vmcs();
1280
1281
1282     if (vmx_on(vmx_on_region) == VMX_SUCCESS) {
1283         V3_Print("VMX Enabled\n");
1284         host_vmcs_ptrs[cpu_id] = vmx_on_region;
1285     } else {
1286         V3_Print("VMX already enabled\n");
1287         V3_FreePages((void *)vmx_on_region, 1);
1288     }
1289
1290     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);    
1291
1292     {
1293         struct vmx_sec_proc_ctrls sec_proc_ctrls;
1294         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
1295         
1296         if (sec_proc_ctrls.enable_ept == 0) {
1297             V3_Print("VMX EPT (Nested) Paging not supported\n");
1298             v3_cpu_types[cpu_id] = V3_VMX_CPU;
1299         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
1300             V3_Print("VMX EPT (Nested) Paging supported\n");
1301             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
1302         } else {
1303             V3_Print("VMX EPT (Nested) Paging + Unrestricted guest supported\n");
1304             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
1305         }
1306     }
1307     
1308 }
1309
1310
1311 void v3_deinit_vmx_cpu(int cpu_id) {
1312     extern v3_cpu_arch_t v3_cpu_types[];
1313     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1314
1315     if (host_vmcs_ptrs[cpu_id] != 0) {
1316         V3_Print("Disabling VMX\n");
1317
1318         if (vmx_off() != VMX_SUCCESS) {
1319             PrintError("Error executing VMXOFF\n");
1320         }
1321
1322         V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
1323
1324         host_vmcs_ptrs[cpu_id] = 0;
1325     }
1326 }