Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Lots of pedantic error checking in Palacios proper, especially for memory
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_barrier.h>
36 #include <palacios/vmm_timeout.h>
37 #include <palacios/vmm_debug.h>
38
39 #ifdef V3_CONFIG_CHECKPOINT
40 #include <palacios/vmm_checkpoint.h>
41 #endif
42
43 #include <palacios/vmx_ept.h>
44 #include <palacios/vmx_assist.h>
45 #include <palacios/vmx_hw_info.h>
46
47 #ifndef V3_CONFIG_DEBUG_VMX
48 #undef PrintDebug
49 #define PrintDebug(fmt, args...)
50 #endif
51
52
53 /* These fields contain the hardware feature sets supported by the local CPU */
54 static struct vmx_hw_info hw_info;
55
56 extern v3_cpu_arch_t v3_mach_type;
57
58 static addr_t host_vmcs_ptrs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
59
60 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
61 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
62
63 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
64     int ret = 0;
65
66     ret = vmcs_write(field, val);
67
68     if (ret != VMX_SUCCESS) {
69         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
70         return 1;
71     }
72
73
74     
75
76     return 0;
77 }
78
79 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
80     int ret = 0;
81
82     ret = vmcs_read(field, val);
83
84     if (ret != VMX_SUCCESS) {
85         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
86     }
87
88     return ret;
89 }
90
91
92
93
94 static addr_t allocate_vmcs() {
95     void *temp;
96     struct vmcs_data * vmcs_page = NULL;
97
98     PrintDebug("Allocating page\n");
99
100     temp = V3_AllocPages(1);
101     if (!temp) { 
102         PrintError("Cannot allocate VMCS\n");
103         return -1;
104     }
105     vmcs_page = (struct vmcs_data *)V3_VAddr(temp);
106     memset(vmcs_page, 0, 4096);
107
108     vmcs_page->revision = hw_info.basic_info.revision;
109     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
110
111     return (addr_t)V3_PAddr((void *)vmcs_page);
112 }
113
114
115 #if 0
116 static int debug_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * src, void * priv_data) {
117     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
118     V3_Print("\n\nEFER READ (val = %p)\n", (void *)efer->value);
119     
120     v3_print_guest_state(core);
121     v3_print_vmcs();
122
123
124     src->value = efer->value;
125     return 0;
126 }
127
128 static int debug_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
129     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
130     V3_Print("\n\nEFER WRITE (old_val = %p) (new_val = %p)\n", (void *)efer->value, (void *)src.value);
131     
132     v3_print_guest_state(core);
133     v3_print_vmcs();
134
135     efer->value = src.value;
136
137     return 0;
138 }
139 #endif
140
141
142 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
143     int vmx_ret = 0;
144
145     /* Get Available features */
146     struct vmx_pin_ctrls avail_pin_ctrls;
147     avail_pin_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.pin_ctrls));
148     /* ** */
149
150
151     // disable global interrupts for vm state initialization
152     v3_disable_ints();
153
154     PrintDebug("Loading VMCS\n");
155     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
156     vmx_state->state = VMX_UNLAUNCHED;
157
158     if (vmx_ret != VMX_SUCCESS) {
159         PrintError("VMPTRLD failed\n");
160         return -1;
161     }
162
163
164     /*** Setup default state from HW ***/
165
166     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
167     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
168     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
169     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
170     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
171
172     /* Print Control MSRs */
173     V3_Print("CR0 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr0.req_val, (void *)(addr_t)hw_info.cr0.req_mask);
174     V3_Print("CR4 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr4.req_val, (void *)(addr_t)hw_info.cr4.req_mask);
175
176
177
178     /******* Setup Host State **********/
179
180     /* Cache GDTR, IDTR, and TR in host struct */
181
182
183     /********** Setup VMX Control Fields ***********/
184
185     /* Add external interrupts, NMI exiting, and virtual NMI */
186     vmx_state->pin_ctrls.nmi_exit = 1;
187     vmx_state->pin_ctrls.ext_int_exit = 1;
188
189
190     /* We enable the preemption timer by default to measure accurate guest time */
191     if (avail_pin_ctrls.active_preempt_timer) {
192         V3_Print("VMX Preemption Timer is available\n");
193         vmx_state->pin_ctrls.active_preempt_timer = 1;
194         vmx_state->exit_ctrls.save_preempt_timer = 1;
195     }
196
197     vmx_state->pri_proc_ctrls.hlt_exit = 1;
198
199
200     vmx_state->pri_proc_ctrls.pause_exit = 0;
201     vmx_state->pri_proc_ctrls.tsc_offset = 1;
202 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
203     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
204 #endif
205
206     /* Setup IO map */
207     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
208     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
209     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
210             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
211
212
213     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
214     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
215
216
217
218 #ifdef __V3_64BIT__
219     // Ensure host runs in 64-bit mode at each VM EXIT
220     vmx_state->exit_ctrls.host_64_on = 1;
221 #endif
222
223
224
225     // Restore host's EFER register on each VM EXIT
226     vmx_state->exit_ctrls.ld_efer = 1;
227
228     // Save/restore guest's EFER register to/from VMCS on VM EXIT/ENTRY
229     vmx_state->exit_ctrls.save_efer = 1;
230     vmx_state->entry_ctrls.ld_efer  = 1;
231
232     vmx_state->exit_ctrls.save_pat = 1;
233     vmx_state->exit_ctrls.ld_pat = 1;
234     vmx_state->entry_ctrls.ld_pat = 1;
235
236     /* Temporary GPF trap */
237     //  vmx_state->excp_bmap.gp = 1;
238
239     // Setup Guests initial PAT field
240     vmx_ret |= check_vmcs_write(VMCS_GUEST_PAT, 0x0007040600070406LL);
241
242     /* Setup paging */
243     if (core->shdw_pg_mode == SHADOW_PAGING) {
244         PrintDebug("Creating initial shadow page table\n");
245
246         if (v3_init_passthrough_pts(core) == -1) {
247             PrintError("Could not initialize passthrough page tables\n");
248             return -1;
249         }
250         
251 #define CR0_PE 0x00000001
252 #define CR0_PG 0x80000000
253 #define CR0_WP 0x00010000 // To ensure mem hooks work
254 #define CR0_NE 0x00000020
255         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
256
257
258         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
259         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
260
261         core->ctrl_regs.cr3 = core->direct_map_pt;
262
263         // vmx_state->pinbased_ctrls |= NMI_EXIT;
264
265         /* Add CR exits */
266         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
267         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
268         
269         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
270         
271         /* Add page fault exits */
272         vmx_state->excp_bmap.pf = 1;
273
274         // Setup VMX Assist
275         v3_vmxassist_init(core, vmx_state);
276
277         // Hook all accesses to EFER register
278         v3_hook_msr(core->vm_info, EFER_MSR, 
279                     &v3_handle_efer_read,
280                     &v3_handle_efer_write, 
281                     core);
282
283     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
284                (v3_mach_type == V3_VMX_EPT_CPU)) {
285
286 #define CR0_PE 0x00000001
287 #define CR0_PG 0x80000000
288 #define CR0_WP 0x00010000 // To ensure mem hooks work
289 #define CR0_NE 0x00000020
290         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
291
292         // vmx_state->pinbased_ctrls |= NMI_EXIT;
293
294         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
295         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
296         
297         /* Disable CR exits */
298         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
299         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
300
301         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
302
303         /* Add page fault exits */
304         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
305         
306         // Setup VMX Assist
307         v3_vmxassist_init(core, vmx_state);
308
309         /* Enable EPT */
310         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
311         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
312
313
314
315         if (v3_init_ept(core, &hw_info) == -1) {
316             PrintError("Error initializing EPT\n");
317             return -1;
318         }
319
320         // Hook all accesses to EFER register
321         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
322
323     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
324                (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
325         int i = 0;
326         // For now we will assume that unrestricted guest mode is assured w/ EPT
327
328
329         core->vm_regs.rsp = 0x00;
330         core->rip = 0xfff0;
331         core->vm_regs.rdx = 0x00000f00;
332         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
333         core->ctrl_regs.cr0 = 0x60010030; 
334         core->ctrl_regs.cr4 = 0x00002010; // Enable VMX and PSE flag
335         
336
337         core->segments.cs.selector = 0xf000;
338         core->segments.cs.limit = 0xffff;
339         core->segments.cs.base = 0x0000000f0000LL;
340
341         // (raw attributes = 0xf3)
342         core->segments.cs.type = 0xb;
343         core->segments.cs.system = 0x1;
344         core->segments.cs.dpl = 0x0;
345         core->segments.cs.present = 1;
346
347
348
349         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
350                                           &(core->segments.es), &(core->segments.fs), 
351                                           &(core->segments.gs), NULL};
352
353         for ( i = 0; segregs[i] != NULL; i++) {
354             struct v3_segment * seg = segregs[i];
355         
356             seg->selector = 0x0000;
357             //    seg->base = seg->selector << 4;
358             seg->base = 0x00000000;
359             seg->limit = 0xffff;
360
361
362             seg->type = 0x3;
363             seg->system = 0x1;
364             seg->dpl = 0x0;
365             seg->present = 1;
366             //    seg->granularity = 1;
367
368         }
369
370
371         core->segments.gdtr.limit = 0x0000ffff;
372         core->segments.gdtr.base = 0x0000000000000000LL;
373
374         core->segments.idtr.limit = 0x0000ffff;
375         core->segments.idtr.base = 0x0000000000000000LL;
376
377         core->segments.ldtr.selector = 0x0000;
378         core->segments.ldtr.limit = 0x0000ffff;
379         core->segments.ldtr.base = 0x0000000000000000LL;
380         core->segments.ldtr.type = 0x2;
381         core->segments.ldtr.present = 1;
382
383         core->segments.tr.selector = 0x0000;
384         core->segments.tr.limit = 0x0000ffff;
385         core->segments.tr.base = 0x0000000000000000LL;
386         core->segments.tr.type = 0xb;
387         core->segments.tr.present = 1;
388
389         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
390         core->dbg_regs.dr7 = 0x0000000000000400LL;
391
392         /* Enable EPT */
393         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
394         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
395         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
396
397
398         /* Disable shadow paging stuff */
399         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
400         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
401
402         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
403
404
405         // Cause VM_EXIT whenever the CR4.VMXE bit is set
406         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
407 #define CR0_NE 0x00000020
408         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, CR0_NE);
409         ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->ne = 1;
410
411         if (v3_init_ept(core, &hw_info) == -1) {
412             PrintError("Error initializing EPT\n");
413             return -1;
414         }
415
416         // Hook all accesses to EFER register
417         //      v3_hook_msr(core->vm_info, EFER_MSR, &debug_efer_read, &debug_efer_write, core);
418         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
419     } else {
420         PrintError("Invalid Virtual paging mode (pg_mode=%d) (mach_type=%d)\n", core->shdw_pg_mode, v3_mach_type);
421         return -1;
422     }
423
424
425     // hook vmx msrs
426
427     // Setup SYSCALL/SYSENTER MSRs in load/store area
428     
429     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
430     {
431
432         struct vmcs_msr_save_area * msr_entries = NULL;
433         int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
434         int msr_ret = 0;
435
436         V3_Print("Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
437
438         if (max_msrs < 4) {
439             PrintError("Max MSR cache size is too small (%d)\n", max_msrs);
440             return -1;
441         }
442
443         vmx_state->msr_area_paddr = (addr_t)V3_AllocPages(1);
444         
445         if (vmx_state->msr_area_paddr == (addr_t)NULL) {
446             PrintError("could not allocate msr load/store area\n");
447             return -1;
448         }
449
450         msr_entries = (struct vmcs_msr_save_area *)V3_VAddr((void *)(vmx_state->msr_area_paddr));
451         vmx_state->msr_area = msr_entries; // cache in vmx_info
452
453         memset(msr_entries, 0, PAGE_SIZE);
454
455         msr_entries->guest_star.index = IA32_STAR_MSR;
456         msr_entries->guest_lstar.index = IA32_LSTAR_MSR;
457         msr_entries->guest_fmask.index = IA32_FMASK_MSR;
458         msr_entries->guest_kern_gs.index = IA32_KERN_GS_BASE_MSR;
459
460         msr_entries->host_star.index = IA32_STAR_MSR;
461         msr_entries->host_lstar.index = IA32_LSTAR_MSR;
462         msr_entries->host_fmask.index = IA32_FMASK_MSR;
463         msr_entries->host_kern_gs.index = IA32_KERN_GS_BASE_MSR;
464
465         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
466         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
467         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
468
469         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
470         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
471         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->host_msrs));
472
473
474         msr_ret |= v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
475         msr_ret |= v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
476         msr_ret |= v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
477         msr_ret |= v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
478
479
480         // IMPORTANT: These MSRs appear to be cached by the hardware....
481         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
482         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
483         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
484
485         msr_ret |= v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
486         msr_ret |= v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
487
488         msr_ret |= v3_hook_msr(core->vm_info, IA32_PAT_MSR, NULL, NULL, NULL);
489
490         // Not sure what to do about this... Does not appear to be an explicit hardware cache version...
491         msr_ret |= v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
492
493         if (msr_ret != 0) {
494             PrintError("Error configuring MSR save/restore area\n");
495             return -1;
496         }
497
498
499     }    
500
501     /* Sanity check ctrl/reg fields against hw_defaults */
502
503
504
505
506     /*** Write all the info to the VMCS ***/
507   
508     /*
509     {
510         // IS THIS NECESSARY???
511 #define DEBUGCTL_MSR 0x1d9
512         struct v3_msr tmp_msr;
513         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
514         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
515         core->dbg_regs.dr7 = 0x400;
516     }
517     */
518
519 #ifdef __V3_64BIT__
520     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
521 #else
522     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
523     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
524 #endif
525
526
527  
528
529     if (v3_update_vmcs_ctrl_fields(core)) {
530         PrintError("Could not write control fields!\n");
531         return -1;
532     }
533     
534     /*
535     if (v3_update_vmcs_host_state(core)) {
536         PrintError("Could not write host state\n");
537         return -1;
538     }
539     */
540
541     // reenable global interrupts for vm state initialization now
542     // that the vm state is initialized. If another VM kicks us off, 
543     // it'll update our vmx state so that we know to reload ourself
544     v3_enable_ints();
545
546     return 0;
547 }
548
549
550 static void __init_vmx_vmcs(void * arg) {
551     struct guest_info * core = arg;
552     struct vmx_data * vmx_state = NULL;
553     int vmx_ret = 0;
554     
555     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
556
557     if (!vmx_state) {
558         PrintError("Unable to allocate in initializing vmx vmcs\n");
559         return;
560     }
561
562     memset(vmx_state, 0, sizeof(struct vmx_data));
563
564     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
565
566     PrintDebug("Allocating VMCS\n");
567     vmx_state->vmcs_ptr_phys = allocate_vmcs();
568
569     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
570
571     core->vmm_data = vmx_state;
572     vmx_state->state = VMX_UNLAUNCHED;
573
574     PrintDebug("Initializing VMCS (addr=%p)\n", core->vmm_data);
575     
576     // TODO: Fix vmcs fields so they're 32-bit
577
578     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
579     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
580
581     if (vmx_ret != VMX_SUCCESS) {
582         PrintError("VMCLEAR failed\n");
583         return; 
584     }
585
586     if (core->vm_info->vm_class == V3_PC_VM) {
587         PrintDebug("Initializing VMCS\n");
588         if (init_vmcs_bios(core, vmx_state) == -1) {
589             PrintError("Error initializing VMCS to BIOS state\n");
590             return;
591         }
592     } else {
593         PrintError("Invalid VM Class\n");
594         return;
595     }
596
597     PrintDebug("Serializing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
598     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
599
600     core->core_run_state = CORE_STOPPED;
601     return;
602 }
603
604
605
606 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
607     extern v3_cpu_arch_t v3_cpu_types[];
608
609     if (v3_cpu_types[V3_Get_CPU()] == V3_INVALID_CPU) {
610         int i = 0;
611
612         for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
613             if (v3_cpu_types[i] != V3_INVALID_CPU) {
614                 break;
615             }
616         }
617
618         if (i == V3_CONFIG_MAX_CPUS) {
619             PrintError("Could not find VALID CPU for VMX guest initialization\n");
620             return -1;
621         }
622
623         V3_Call_On_CPU(i, __init_vmx_vmcs, core);
624
625     } else {
626         __init_vmx_vmcs(core);
627     }
628
629     if (core->core_run_state != CORE_STOPPED) {
630         PrintError("Error initializing VMX Core\n");
631         return -1;
632     }
633
634     return 0;
635 }
636
637
638 int v3_deinit_vmx_vmcs(struct guest_info * core) {
639     struct vmx_data * vmx_state = core->vmm_data;
640
641     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
642     V3_FreePages(V3_PAddr(vmx_state->msr_area), 1);
643
644     V3_Free(vmx_state);
645
646     return 0;
647 }
648
649
650
651 #ifdef V3_CONFIG_CHECKPOINT
652 /* 
653  * JRL: This is broken
654  */
655 int v3_vmx_save_core(struct guest_info * core, void * ctx){
656     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
657
658     // note that the vmcs pointer is an HPA, but we need an HVA
659     if (v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE_4KB, 
660                       V3_VAddr((void*) (vmx_info->vmcs_ptr_phys))) ==-1) {
661         PrintError("Could not save vmcs data for VMX\n");
662         return -1;
663     }
664
665     return 0;
666 }
667
668 int v3_vmx_load_core(struct guest_info * core, void * ctx){
669     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
670     struct cr0_32 * shadow_cr0;
671     addr_t vmcs_page_paddr;  //HPA
672
673     vmcs_page_paddr = (addr_t) V3_AllocPages(1);
674     
675     if (!vmcs_page_paddr) { 
676         PrintError("Could not allocate space for a vmcs in VMX\n");
677         return -1;
678     }
679
680     if (v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, 
681                       V3_VAddr((void *)vmcs_page_paddr)) == -1) { 
682         PrintError("Could not load vmcs data for VMX\n");
683         return -1;
684     }
685
686     vmcs_clear(vmx_info->vmcs_ptr_phys);
687
688     // Probably need to delete the old one... 
689     V3_FreePages((void*)(vmx_info->vmcs_ptr_phys),1);
690
691     vmcs_load(vmcs_page_paddr);
692
693     v3_vmx_save_vmcs(core);
694
695     shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
696
697
698     /* Get the CPU mode to set the guest_ia32e entry ctrl */
699
700     if (core->shdw_pg_mode == SHADOW_PAGING) {
701         if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
702             if (v3_activate_shadow_pt(core) == -1) {
703                 PrintError("Failed to activate shadow page tables\n");
704                 return -1;
705             }
706         } else {
707             if (v3_activate_passthrough_pt(core) == -1) {
708                 PrintError("Failed to activate passthrough page tables\n");
709                 return -1;
710             }
711         }
712     }
713
714     return 0;
715 }
716 #endif
717
718
719 void v3_flush_vmx_vm_core(struct guest_info * core) {
720     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
721     vmcs_clear(vmx_info->vmcs_ptr_phys);
722     vmx_info->state = VMX_UNLAUNCHED;
723 }
724
725
726
727 static int update_irq_exit_state(struct guest_info * info) {
728     struct vmx_exit_idt_vec_info idt_vec_info;
729
730     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
731
732     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
733 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
734         V3_Print("Calling v3_injecting_intr\n");
735 #endif
736         info->intr_core_state.irq_started = 0;
737         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
738     }
739
740     return 0;
741 }
742
743 static int update_irq_entry_state(struct guest_info * info) {
744     struct vmx_exit_idt_vec_info idt_vec_info;
745     struct vmcs_interrupt_state intr_core_state;
746     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
747
748     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
749     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
750
751     /* Check for pending exceptions to inject */
752     if (v3_excp_pending(info)) {
753         struct vmx_entry_int_info int_info;
754         int_info.value = 0;
755
756         // In VMX, almost every exception is hardware
757         // Software exceptions are pretty much only for breakpoint or overflow
758         int_info.type = 3;
759         int_info.vector = v3_get_excp_number(info);
760
761         if (info->excp_state.excp_error_code_valid) {
762             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
763             int_info.error_code = 1;
764
765 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
766             V3_Print("Injecting exception %d with error code %x\n", 
767                     int_info.vector, info->excp_state.excp_error_code);
768 #endif
769         }
770
771         int_info.valid = 1;
772 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
773         V3_Print("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
774 #endif
775         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
776
777         v3_injecting_excp(info, int_info.vector);
778
779     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
780                (intr_core_state.val == 0)) {
781        
782         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
783
784 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
785             V3_Print("IRQ pending from previous injection\n");
786 #endif
787
788             // Copy the IDT vectoring info over to reinject the old interrupt
789             if (idt_vec_info.error_code == 1) {
790                 uint32_t err_code = 0;
791
792                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
793                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
794             }
795
796             idt_vec_info.undef = 0;
797             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
798
799         } else {
800             struct vmx_entry_int_info ent_int;
801             ent_int.value = 0;
802
803             switch (v3_intr_pending(info)) {
804                 case V3_EXTERNAL_IRQ: {
805                     info->intr_core_state.irq_vector = v3_get_intr(info); 
806                     ent_int.vector = info->intr_core_state.irq_vector;
807                     ent_int.type = 0;
808                     ent_int.error_code = 0;
809                     ent_int.valid = 1;
810
811 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
812                     V3_Print("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
813                                info->intr_core_state.irq_vector, 
814                                (uint32_t)info->num_exits, 
815                                (void *)(addr_t)info->rip);
816 #endif
817
818                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
819                     info->intr_core_state.irq_started = 1;
820
821                     break;
822                 }
823                 case V3_NMI:
824                     PrintDebug("Injecting NMI\n");
825
826                     ent_int.type = 2;
827                     ent_int.vector = 2;
828                     ent_int.valid = 1;
829                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
830
831                     break;
832                 case V3_SOFTWARE_INTR:
833                     PrintDebug("Injecting software interrupt\n");
834                     ent_int.type = 4;
835
836                     ent_int.valid = 1;
837                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
838
839                     break;
840                 case V3_VIRTUAL_IRQ:
841                     // Not sure what to do here, Intel doesn't have virtual IRQs
842                     // May be the same as external interrupts/IRQs
843
844                     break;
845                 case V3_INVALID_INTR:
846                 default:
847                     break;
848             }
849         }
850     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
851         // Enable INTR window exiting so we know when IF=1
852         uint32_t instr_len;
853
854         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
855
856 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
857         V3_Print("Enabling Interrupt-Window exiting: %d\n", instr_len);
858 #endif
859
860         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
861         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
862     }
863
864
865     return 0;
866 }
867
868
869
870 static struct vmx_exit_info exit_log[10];
871 static uint64_t rip_log[10];
872
873
874
875 static void print_exit_log(struct guest_info * info) {
876     int cnt = info->num_exits % 10;
877     int i = 0;
878     
879
880     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
881
882     for (i = 0; i < 10; i++) {
883         struct vmx_exit_info * tmp = &exit_log[cnt];
884
885         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
886         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
887         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
888         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
889         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
890         V3_Print("\tguest_linear_addr= %p\n", (void *)(addr_t)tmp->guest_linear_addr);
891         V3_Print("\tRIP = %p\n", (void *)rip_log[cnt]);
892
893
894         cnt--;
895
896         if (cnt == -1) {
897             cnt = 9;
898         }
899
900     }
901
902 }
903
904 int 
905 v3_vmx_config_tsc_virtualization(struct guest_info * info) {
906     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
907
908     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
909         if  (!vmx_info->pri_proc_ctrls.rdtsc_exit) {
910             vmx_info->pri_proc_ctrls.rdtsc_exit = 1;
911             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
912         }
913     } else {
914         sint64_t tsc_offset;
915         uint32_t tsc_offset_low, tsc_offset_high;
916
917         if  (vmx_info->pri_proc_ctrls.rdtsc_exit) {
918             vmx_info->pri_proc_ctrls.rdtsc_exit = 0;
919             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
920         }
921
922         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
923             tsc_offset = 0;
924         } else {
925             tsc_offset = v3_tsc_host_offset(&info->time_state);
926         }
927         tsc_offset_high = (uint32_t)(( tsc_offset >> 32) & 0xffffffff);
928         tsc_offset_low = (uint32_t)(tsc_offset & 0xffffffff);
929
930         check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
931         check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
932     }
933     return 0;
934 }
935
936 /* 
937  * CAUTION and DANGER!!! 
938  * 
939  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
940  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
941  * on its contents will cause things to break. The contents at the time of the exit WILL 
942  * change before the exit handler is executed.
943  */
944 int v3_vmx_enter(struct guest_info * info) {
945     int ret = 0;
946     struct vmx_exit_info exit_info;
947     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
948     uint64_t guest_cycles = 0;
949
950     // Conditionally yield the CPU if the timeslice has expired
951     v3_yield_cond(info);
952
953     // Update timer devices late after being in the VM so that as much 
954     // of the time in the VM is accounted for as possible. Also do it before
955     // updating IRQ entry state so that any interrupts the timers raise get 
956     // handled on the next VM entry.
957     v3_advance_time(info, NULL);
958     v3_update_timers(info);
959
960     // disable global interrupts for vm state transition
961     v3_disable_ints();
962
963     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
964         vmcs_clear(vmx_info->vmcs_ptr_phys);
965         vmcs_load(vmx_info->vmcs_ptr_phys);
966         vmx_info->state = VMX_UNLAUNCHED;
967     }
968
969     v3_vmx_restore_vmcs(info);
970
971
972 #ifdef V3_CONFIG_SYMCALL
973     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
974         update_irq_entry_state(info);
975     }
976 #else 
977     update_irq_entry_state(info);
978 #endif
979
980     {
981         addr_t guest_cr3;
982         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
983         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
984     }
985
986
987     // Perform last-minute time setup prior to entering the VM
988     v3_vmx_config_tsc_virtualization(info);
989
990     if (v3_update_vmcs_host_state(info)) {
991         v3_enable_ints();
992         PrintError("Could not write host state\n");
993         return -1;
994     }
995     
996     if (vmx_info->pin_ctrls.active_preempt_timer) {
997         /* Preemption timer is active */
998         uint32_t preempt_window = 0xffffffff;
999
1000         if (info->timeouts.timeout_active) {
1001             preempt_window = info->timeouts.next_timeout;
1002         }
1003         
1004         check_vmcs_write(VMCS_PREEMPT_TIMER, preempt_window);
1005     }
1006    
1007
1008     {   
1009         uint64_t entry_tsc = 0;
1010         uint64_t exit_tsc = 0;
1011
1012         if (vmx_info->state == VMX_UNLAUNCHED) {
1013             vmx_info->state = VMX_LAUNCHED;
1014             rdtscll(entry_tsc);
1015             ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
1016             rdtscll(exit_tsc);
1017
1018         } else {
1019             V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
1020             rdtscll(entry_tsc);
1021             ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
1022             rdtscll(exit_tsc);
1023         }
1024
1025         guest_cycles = exit_tsc - entry_tsc;    
1026     }
1027
1028     //  PrintDebug("VMX Exit: ret=%d\n", ret);
1029
1030     if (ret != VMX_SUCCESS) {
1031         uint32_t error = 0;
1032         vmcs_read(VMCS_INSTR_ERR, &error);
1033
1034         v3_enable_ints();
1035
1036         PrintError("VMENTRY Error: %d (launch_ret = %d)\n", error, ret);
1037         return -1;
1038     }
1039
1040
1041     info->num_exits++;
1042
1043     /* If we have the preemption time, then use it to get more accurate guest time */
1044     if (vmx_info->pin_ctrls.active_preempt_timer) {
1045         uint32_t cycles_left = 0;
1046         check_vmcs_read(VMCS_PREEMPT_TIMER, &(cycles_left));
1047
1048         if (info->timeouts.timeout_active) {
1049             guest_cycles = info->timeouts.next_timeout - cycles_left;
1050         } else {
1051             guest_cycles = 0xffffffff - cycles_left;
1052         }
1053     }
1054
1055     // Immediate exit from VM time bookkeeping
1056     v3_advance_time(info, &guest_cycles);
1057
1058     /* Update guest state */
1059     v3_vmx_save_vmcs(info);
1060
1061     // info->cpl = info->segments.cs.selector & 0x3;
1062
1063     info->mem_mode = v3_get_vm_mem_mode(info);
1064     info->cpu_mode = v3_get_vm_cpu_mode(info);
1065
1066
1067
1068     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
1069     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
1070     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
1071     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
1072     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
1073     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
1074     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
1075
1076     if (info->shdw_pg_mode == NESTED_PAGING) {
1077         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
1078     }
1079
1080     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
1081
1082     exit_log[info->num_exits % 10] = exit_info;
1083     rip_log[info->num_exits % 10] = get_addr_linear(info, info->rip, &(info->segments.cs));
1084
1085 #ifdef V3_CONFIG_SYMCALL
1086     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
1087         update_irq_exit_state(info);
1088     }
1089 #else
1090     update_irq_exit_state(info);
1091 #endif
1092
1093     if (exit_info.exit_reason == VMX_EXIT_INTR_WINDOW) {
1094         // This is a special case whose only job is to inject an interrupt
1095         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
1096         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
1097         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
1098
1099 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
1100        V3_Print("Interrupts available again! (RIP=%llx)\n", info->rip);
1101 #endif
1102     }
1103
1104     // reenable global interrupts after vm exit
1105     v3_enable_ints();
1106
1107     // Conditionally yield the CPU if the timeslice has expired
1108     v3_yield_cond(info);
1109     v3_advance_time(info, NULL);
1110     v3_update_timers(info);
1111
1112     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
1113         PrintError("Error in VMX exit handler (Exit reason=%x)\n", exit_info.exit_reason);
1114         return -1;
1115     }
1116
1117     if (info->timeouts.timeout_active) {
1118         /* Check to see if any timeouts have expired */
1119         v3_handle_timeouts(info, guest_cycles);
1120     }
1121
1122     return 0;
1123 }
1124
1125
1126 int v3_start_vmx_guest(struct guest_info * info) {
1127
1128     PrintDebug("Starting VMX core %u\n", info->vcpu_id);
1129
1130     if (info->vcpu_id == 0) {
1131         info->core_run_state = CORE_RUNNING;
1132     } else {
1133
1134         PrintDebug("VMX core %u: Waiting for core initialization\n", info->vcpu_id);
1135
1136         while (info->core_run_state == CORE_STOPPED) {
1137
1138             if (info->vm_info->run_state == VM_STOPPED) {
1139                 // The VM was stopped before this core was initialized. 
1140                 return 0;
1141             }
1142
1143             v3_yield(info);
1144             //PrintDebug("VMX core %u: still waiting for INIT\n",info->vcpu_id);
1145         }
1146         
1147         PrintDebug("VMX core %u initialized\n", info->vcpu_id);
1148
1149         // We'll be paranoid about race conditions here
1150         v3_wait_at_barrier(info);
1151     }
1152
1153
1154     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
1155                info->vcpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
1156                info->segments.cs.limit, (void *)(info->rip));
1157
1158
1159     PrintDebug("VMX core %u: Launching VMX VM on logical core %u\n", info->vcpu_id, info->pcpu_id);
1160
1161     v3_start_time(info);
1162
1163     while (1) {
1164
1165         if (info->vm_info->run_state == VM_STOPPED) {
1166             info->core_run_state = CORE_STOPPED;
1167             break;
1168         }
1169
1170         if (v3_vmx_enter(info) == -1) {
1171
1172             addr_t host_addr;
1173             addr_t linear_addr = 0;
1174             
1175             info->vm_info->run_state = VM_ERROR;
1176             
1177             V3_Print("VMX core %u: VMX ERROR!!\n", info->vcpu_id); 
1178             
1179             v3_print_guest_state(info);
1180             
1181             V3_Print("VMX core %u\n", info->vcpu_id); 
1182
1183             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1184             
1185             if (info->mem_mode == PHYSICAL_MEM) {
1186                 v3_gpa_to_hva(info, linear_addr, &host_addr);
1187             } else if (info->mem_mode == VIRTUAL_MEM) {
1188                 v3_gva_to_hva(info, linear_addr, &host_addr);
1189             }
1190             
1191             V3_Print("VMX core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1192             
1193             V3_Print("VMX core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1194             v3_dump_mem((uint8_t *)host_addr, 15);
1195             
1196             v3_print_stack(info);
1197
1198
1199             v3_print_vmcs();
1200             print_exit_log(info);
1201             return -1;
1202         }
1203
1204         v3_wait_at_barrier(info);
1205
1206
1207         if (info->vm_info->run_state == VM_STOPPED) {
1208             info->core_run_state = CORE_STOPPED;
1209             break;
1210         }
1211 /*
1212         if ((info->num_exits % 5000) == 0) {
1213             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
1214         }
1215 */
1216
1217     }
1218
1219     return 0;
1220 }
1221
1222
1223
1224
1225 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
1226 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
1227 #define CPUID_1_ECX_VTXFLAG 0x00000020
1228
1229 int v3_is_vmx_capable() {
1230     v3_msr_t feature_msr;
1231     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1232
1233     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
1234
1235     PrintDebug("ECX: 0x%x\n", ecx);
1236
1237     if (ecx & CPUID_1_ECX_VTXFLAG) {
1238         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
1239         
1240         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
1241
1242         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
1243             PrintDebug("VMX is locked -- enable in the BIOS\n");
1244             return 0;
1245         }
1246
1247     } else {
1248         PrintDebug("VMX not supported on this cpu\n");
1249         return 0;
1250     }
1251
1252     return 1;
1253 }
1254
1255
1256 int v3_reset_vmx_vm_core(struct guest_info * core, addr_t rip) {
1257     // init vmcs bios
1258     
1259     if ((core->shdw_pg_mode == NESTED_PAGING) && 
1260         (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
1261         // easy 
1262         core->rip = 0;
1263         core->segments.cs.selector = rip << 8;
1264         core->segments.cs.limit = 0xffff;
1265         core->segments.cs.base = rip << 12;
1266     } else {
1267         core->vm_regs.rdx = core->vcpu_id;
1268         core->vm_regs.rbx = rip;
1269     }
1270
1271     return 0;
1272 }
1273
1274
1275
1276 void v3_init_vmx_cpu(int cpu_id) {
1277     addr_t vmx_on_region = 0;
1278     extern v3_cpu_arch_t v3_mach_type;
1279     extern v3_cpu_arch_t v3_cpu_types[];
1280
1281     if (v3_mach_type == V3_INVALID_CPU) {
1282         if (v3_init_vmx_hw(&hw_info) == -1) {
1283             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
1284             return;
1285         }
1286     }
1287
1288     enable_vmx();
1289
1290
1291     // Setup VMXON Region
1292     vmx_on_region = allocate_vmcs();
1293
1294
1295     if (vmx_on(vmx_on_region) == VMX_SUCCESS) {
1296         V3_Print("VMX Enabled\n");
1297         host_vmcs_ptrs[cpu_id] = vmx_on_region;
1298     } else {
1299         V3_Print("VMX already enabled\n");
1300         V3_FreePages((void *)vmx_on_region, 1);
1301     }
1302
1303     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);    
1304
1305     {
1306         struct vmx_sec_proc_ctrls sec_proc_ctrls;
1307         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
1308         
1309         if (sec_proc_ctrls.enable_ept == 0) {
1310             V3_Print("VMX EPT (Nested) Paging not supported\n");
1311             v3_cpu_types[cpu_id] = V3_VMX_CPU;
1312         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
1313             V3_Print("VMX EPT (Nested) Paging supported\n");
1314             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
1315         } else {
1316             V3_Print("VMX EPT (Nested) Paging + Unrestricted guest supported\n");
1317             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
1318         }
1319     }
1320     
1321 }
1322
1323
1324 void v3_deinit_vmx_cpu(int cpu_id) {
1325     extern v3_cpu_arch_t v3_cpu_types[];
1326     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1327
1328     if (host_vmcs_ptrs[cpu_id] != 0) {
1329         V3_Print("Disabling VMX\n");
1330
1331         if (vmx_off() != VMX_SUCCESS) {
1332             PrintError("Error executing VMXOFF\n");
1333         }
1334
1335         V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
1336
1337         host_vmcs_ptrs[cpu_id] = 0;
1338     }
1339 }