Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Floating point context-switching and checkpoint/load
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_barrier.h>
36 #include <palacios/vmm_timeout.h>
37 #include <palacios/vmm_debug.h>
38
39 #ifdef V3_CONFIG_CHECKPOINT
40 #include <palacios/vmm_checkpoint.h>
41 #endif
42
43 #include <palacios/vmx_ept.h>
44 #include <palacios/vmx_assist.h>
45 #include <palacios/vmx_hw_info.h>
46
47 #ifndef V3_CONFIG_DEBUG_VMX
48 #undef PrintDebug
49 #define PrintDebug(fmt, args...)
50 #endif
51
52
53 /* These fields contain the hardware feature sets supported by the local CPU */
54 static struct vmx_hw_info hw_info;
55
56 extern v3_cpu_arch_t v3_mach_type;
57
58 static addr_t host_vmcs_ptrs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
59
60 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
61 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
62
63 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
64     int ret = 0;
65
66     ret = vmcs_write(field, val);
67
68     if (ret != VMX_SUCCESS) {
69         PrintError(VM_NONE, VCORE_NONE, "VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
70         return 1;
71     }
72
73
74     
75
76     return 0;
77 }
78
79 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
80     int ret = 0;
81
82     ret = vmcs_read(field, val);
83
84     if (ret != VMX_SUCCESS) {
85         PrintError(VM_NONE, VCORE_NONE, "VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
86     }
87
88     return ret;
89 }
90
91
92
93
94 static addr_t allocate_vmcs() {
95     void *temp;
96     struct vmcs_data * vmcs_page = NULL;
97
98     PrintDebug(VM_NONE, VCORE_NONE, "Allocating page\n");
99
100     temp = V3_AllocPages(1); // need not be shadow-safe, not exposed to guest
101     if (!temp) { 
102         PrintError(VM_NONE, VCORE_NONE, "Cannot allocate VMCS\n");
103         return -1;
104     }
105     vmcs_page = (struct vmcs_data *)V3_VAddr(temp);
106     memset(vmcs_page, 0, 4096);
107
108     vmcs_page->revision = hw_info.basic_info.revision;
109     PrintDebug(VM_NONE, VCORE_NONE, "VMX Revision: 0x%x\n", vmcs_page->revision);
110
111     return (addr_t)V3_PAddr((void *)vmcs_page);
112 }
113
114
115 #if 0
116 static int debug_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * src, void * priv_data) {
117     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
118     V3_Print(core->vm_info, core, "\n\nEFER READ (val = %p)\n", (void *)efer->value);
119     
120     v3_print_guest_state(core);
121     v3_print_vmcs();
122
123
124     src->value = efer->value;
125     return 0;
126 }
127
128 static int debug_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
129     struct v3_msr * efer = (struct v3_msr *)&(core->ctrl_regs.efer);
130     V3_Print(core->vm_info, core, "\n\nEFER WRITE (old_val = %p) (new_val = %p)\n", (void *)efer->value, (void *)src.value);
131     
132     v3_print_guest_state(core);
133     v3_print_vmcs();
134
135     efer->value = src.value;
136
137     return 0;
138 }
139 #endif
140
141
142 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
143     int vmx_ret = 0;
144
145     /* Get Available features */
146     struct vmx_pin_ctrls avail_pin_ctrls;
147     avail_pin_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.pin_ctrls));
148     /* ** */
149
150
151     // disable global interrupts for vm state initialization
152     v3_disable_ints();
153
154     PrintDebug(core->vm_info, core, "Loading VMCS\n");
155     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
156     vmx_state->state = VMX_UNLAUNCHED;
157
158     if (vmx_ret != VMX_SUCCESS) {
159         PrintError(core->vm_info, core, "VMPTRLD failed\n");
160         return -1;
161     }
162
163
164     /*** Setup default state from HW ***/
165
166     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
167     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
168     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
169     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
170     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
171
172     /* Print Control MSRs */
173     V3_Print(core->vm_info, core, "CR0 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr0.req_val, (void *)(addr_t)hw_info.cr0.req_mask);
174     V3_Print(core->vm_info, core, "CR4 MSR: req_val=%p, req_mask=%p\n", (void *)(addr_t)hw_info.cr4.req_val, (void *)(addr_t)hw_info.cr4.req_mask);
175
176
177
178     /******* Setup Host State **********/
179
180     /* Cache GDTR, IDTR, and TR in host struct */
181
182
183     /********** Setup VMX Control Fields ***********/
184
185     /* Add external interrupts, NMI exiting, and virtual NMI */
186     vmx_state->pin_ctrls.nmi_exit = 1;
187     vmx_state->pin_ctrls.virt_nmi = 1;
188     vmx_state->pin_ctrls.ext_int_exit = 1;
189
190
191
192     /* We enable the preemption timer by default to measure accurate guest time */
193     if (avail_pin_ctrls.active_preempt_timer) {
194         V3_Print(core->vm_info, core, "VMX Preemption Timer is available\n");
195         vmx_state->pin_ctrls.active_preempt_timer = 1;
196         vmx_state->exit_ctrls.save_preempt_timer = 1;
197     }
198
199     // we want it to use this when halting
200     vmx_state->pri_proc_ctrls.hlt_exit = 1;
201
202     // cpuid tells it that it does not have these instructions
203     vmx_state->pri_proc_ctrls.monitor_exit = 1;
204     vmx_state->pri_proc_ctrls.mwait_exit = 1;
205
206     // we don't need to handle a pause, although this is where
207     // we could pull out of a spin lock acquire or schedule to find its partner
208     vmx_state->pri_proc_ctrls.pause_exit = 0;
209
210     vmx_state->pri_proc_ctrls.tsc_offset = 1;
211 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
212     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
213 #endif
214
215     /* Setup IO map */
216     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
217     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
218     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
219             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
220
221
222     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
223     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
224
225
226
227 #ifdef __V3_64BIT__
228     // Ensure host runs in 64-bit mode at each VM EXIT
229     vmx_state->exit_ctrls.host_64_on = 1;
230 #endif
231
232
233
234     // Restore host's EFER register on each VM EXIT
235     vmx_state->exit_ctrls.ld_efer = 1;
236
237     // Save/restore guest's EFER register to/from VMCS on VM EXIT/ENTRY
238     vmx_state->exit_ctrls.save_efer = 1;
239     vmx_state->entry_ctrls.ld_efer  = 1;
240
241     vmx_state->exit_ctrls.save_pat = 1;
242     vmx_state->exit_ctrls.ld_pat = 1;
243     vmx_state->entry_ctrls.ld_pat = 1;
244
245     /* Temporary GPF trap */
246     //  vmx_state->excp_bmap.gp = 1;
247
248     // Setup Guests initial PAT field
249     vmx_ret |= check_vmcs_write(VMCS_GUEST_PAT, 0x0007040600070406LL);
250
251     // Capture CR8 mods so that we can keep the apic_tpr correct
252     vmx_state->pri_proc_ctrls.cr8_ld_exit = 1;
253     vmx_state->pri_proc_ctrls.cr8_str_exit = 1;
254
255
256     /* Setup paging */
257     if (core->shdw_pg_mode == SHADOW_PAGING) {
258         PrintDebug(core->vm_info, core, "Creating initial shadow page table\n");
259
260         if (v3_init_passthrough_pts(core) == -1) {
261             PrintError(core->vm_info, core, "Could not initialize passthrough page tables\n");
262             return -1;
263         }
264         
265 #define CR0_PE 0x00000001
266 #define CR0_PG 0x80000000
267 #define CR0_WP 0x00010000 // To ensure mem hooks work
268 #define CR0_NE 0x00000020
269         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
270
271
272         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
273         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
274
275         core->ctrl_regs.cr3 = core->direct_map_pt;
276
277         // vmx_state->pinbased_ctrls |= NMI_EXIT;
278
279         /* Add CR exits */
280         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
281         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
282         
283         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
284         
285         /* Add page fault exits */
286         vmx_state->excp_bmap.pf = 1;
287
288         // Setup VMX Assist
289         v3_vmxassist_init(core, vmx_state);
290
291         // Hook all accesses to EFER register
292         v3_hook_msr(core->vm_info, EFER_MSR, 
293                     &v3_handle_efer_read,
294                     &v3_handle_efer_write, 
295                     core);
296
297     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
298                (v3_mach_type == V3_VMX_EPT_CPU)) {
299
300 #define CR0_PE 0x00000001
301 #define CR0_PG 0x80000000
302 #define CR0_WP 0x00010000 // To ensure mem hooks work
303 #define CR0_NE 0x00000020
304         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP | CR0_NE));
305
306         // vmx_state->pinbased_ctrls |= NMI_EXIT;
307
308         // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
309         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
310         
311         /* Disable CR exits */
312         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
313         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
314
315         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
316
317         /* Add page fault exits */
318         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
319         
320         // Setup VMX Assist
321         v3_vmxassist_init(core, vmx_state);
322
323         /* Enable EPT */
324         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
325         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
326
327
328
329         if (v3_init_ept(core, &hw_info) == -1) {
330             PrintError(core->vm_info, core, "Error initializing EPT\n");
331             return -1;
332         }
333
334         // Hook all accesses to EFER register
335         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
336
337     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
338                (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
339         int i = 0;
340         // For now we will assume that unrestricted guest mode is assured w/ EPT
341
342
343         core->vm_regs.rsp = 0x00;
344         core->rip = 0xfff0;
345         core->vm_regs.rdx = 0x00000f00;
346         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
347         core->ctrl_regs.cr0 = 0x60010030; 
348         core->ctrl_regs.cr4 = 0x00002010; // Enable VMX and PSE flag
349         
350
351         core->segments.cs.selector = 0xf000;
352         core->segments.cs.limit = 0xffff;
353         core->segments.cs.base = 0x0000000f0000LL;
354
355         // (raw attributes = 0xf3)
356         core->segments.cs.type = 0xb;
357         core->segments.cs.system = 0x1;
358         core->segments.cs.dpl = 0x0;
359         core->segments.cs.present = 1;
360
361
362
363         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
364                                           &(core->segments.es), &(core->segments.fs), 
365                                           &(core->segments.gs), NULL};
366
367         for ( i = 0; segregs[i] != NULL; i++) {
368             struct v3_segment * seg = segregs[i];
369         
370             seg->selector = 0x0000;
371             //    seg->base = seg->selector << 4;
372             seg->base = 0x00000000;
373             seg->limit = 0xffff;
374
375
376             seg->type = 0x3;
377             seg->system = 0x1;
378             seg->dpl = 0x0;
379             seg->present = 1;
380             //    seg->granularity = 1;
381
382         }
383
384
385         core->segments.gdtr.limit = 0x0000ffff;
386         core->segments.gdtr.base = 0x0000000000000000LL;
387
388         core->segments.idtr.limit = 0x0000ffff;
389         core->segments.idtr.base = 0x0000000000000000LL;
390
391         core->segments.ldtr.selector = 0x0000;
392         core->segments.ldtr.limit = 0x0000ffff;
393         core->segments.ldtr.base = 0x0000000000000000LL;
394         core->segments.ldtr.type = 0x2;
395         core->segments.ldtr.present = 1;
396
397         core->segments.tr.selector = 0x0000;
398         core->segments.tr.limit = 0x0000ffff;
399         core->segments.tr.base = 0x0000000000000000LL;
400         core->segments.tr.type = 0xb;
401         core->segments.tr.present = 1;
402
403         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
404         core->dbg_regs.dr7 = 0x0000000000000400LL;
405
406         /* Enable EPT */
407         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
408         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
409         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
410
411
412         /* Disable shadow paging stuff */
413         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
414         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
415
416         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
417
418
419         // Cause VM_EXIT whenever the CR4.VMXE bit is set
420         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
421 #define CR0_NE 0x00000020
422 #define CR0_CD 0x40000000
423         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, CR0_NE | CR0_CD);
424         ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->ne = 1;
425         ((struct cr0_32 *)&(core->shdw_pg_state.guest_cr0))->cd = 0;
426
427         if (v3_init_ept(core, &hw_info) == -1) {
428             PrintError(core->vm_info, core, "Error initializing EPT\n");
429             return -1;
430         }
431
432         // Hook all accesses to EFER register
433         //      v3_hook_msr(core->vm_info, EFER_MSR, &debug_efer_read, &debug_efer_write, core);
434         v3_hook_msr(core->vm_info, EFER_MSR, NULL, NULL, NULL);
435     } else {
436         PrintError(core->vm_info, core, "Invalid Virtual paging mode (pg_mode=%d) (mach_type=%d)\n", core->shdw_pg_mode, v3_mach_type);
437         return -1;
438     }
439
440
441     // hook vmx msrs
442
443     // Setup SYSCALL/SYSENTER MSRs in load/store area
444     
445     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
446     {
447
448         struct vmcs_msr_save_area * msr_entries = NULL;
449         int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
450         int msr_ret = 0;
451
452         V3_Print(core->vm_info, core, "Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
453
454         if (max_msrs < 4) {
455             PrintError(core->vm_info, core, "Max MSR cache size is too small (%d)\n", max_msrs);
456             return -1;
457         }
458
459         vmx_state->msr_area_paddr = (addr_t)V3_AllocPages(1); // need not be shadow-safe, not exposed to guest
460         
461         if (vmx_state->msr_area_paddr == (addr_t)NULL) {
462             PrintError(core->vm_info, core, "could not allocate msr load/store area\n");
463             return -1;
464         }
465
466         msr_entries = (struct vmcs_msr_save_area *)V3_VAddr((void *)(vmx_state->msr_area_paddr));
467         vmx_state->msr_area = msr_entries; // cache in vmx_info
468
469         memset(msr_entries, 0, PAGE_SIZE);
470
471         msr_entries->guest_star.index = IA32_STAR_MSR;
472         msr_entries->guest_lstar.index = IA32_LSTAR_MSR;
473         msr_entries->guest_fmask.index = IA32_FMASK_MSR;
474         msr_entries->guest_kern_gs.index = IA32_KERN_GS_BASE_MSR;
475
476         msr_entries->host_star.index = IA32_STAR_MSR;
477         msr_entries->host_lstar.index = IA32_LSTAR_MSR;
478         msr_entries->host_fmask.index = IA32_FMASK_MSR;
479         msr_entries->host_kern_gs.index = IA32_KERN_GS_BASE_MSR;
480
481         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
482         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
483         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
484
485         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
486         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->guest_msrs));
487         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(msr_entries->host_msrs));
488
489
490         msr_ret |= v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
491         msr_ret |= v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
492         msr_ret |= v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
493         msr_ret |= v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
494
495
496         // IMPORTANT: These MSRs appear to be cached by the hardware....
497         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
498         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
499         msr_ret |= v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
500
501         msr_ret |= v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
502         msr_ret |= v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
503
504         msr_ret |= v3_hook_msr(core->vm_info, IA32_PAT_MSR, NULL, NULL, NULL);
505
506         // Not sure what to do about this... Does not appear to be an explicit hardware cache version...
507         msr_ret |= v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
508
509         if (msr_ret != 0) {
510             PrintError(core->vm_info, core, "Error configuring MSR save/restore area\n");
511             return -1;
512         }
513
514
515     }    
516
517     /* Sanity check ctrl/reg fields against hw_defaults */
518
519
520
521
522     /*** Write all the info to the VMCS ***/
523   
524     /*
525     {
526         // IS THIS NECESSARY???
527 #define DEBUGCTL_MSR 0x1d9
528         struct v3_msr tmp_msr;
529         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
530         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
531         core->dbg_regs.dr7 = 0x400;
532     }
533     */
534
535 #ifdef __V3_64BIT__
536     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
537 #else
538     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
539     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
540 #endif
541
542
543
544  
545
546     if (v3_update_vmcs_ctrl_fields(core)) {
547         PrintError(core->vm_info, core, "Could not write control fields!\n");
548         return -1;
549     }
550     
551     /*
552     if (v3_update_vmcs_host_state(core)) {
553         PrintError(core->vm_info, core, "Could not write host state\n");
554         return -1;
555     }
556     */
557
558     // reenable global interrupts for vm state initialization now
559     // that the vm state is initialized. If another VM kicks us off, 
560     // it'll update our vmx state so that we know to reload ourself
561     v3_enable_ints();
562
563     return 0;
564 }
565
566
567 static void __init_vmx_vmcs(void * arg) {
568     struct guest_info * core = arg;
569     struct vmx_data * vmx_state = NULL;
570     int vmx_ret = 0;
571     
572     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
573
574     if (!vmx_state) {
575         PrintError(core->vm_info, core,  "Unable to allocate in initializing vmx vmcs\n");
576         return;
577     }
578
579     memset(vmx_state, 0, sizeof(struct vmx_data));
580
581     PrintDebug(core->vm_info, core,  "vmx_data pointer: %p\n", (void *)vmx_state);
582
583     PrintDebug(core->vm_info, core, "Allocating VMCS\n");
584     vmx_state->vmcs_ptr_phys = allocate_vmcs();
585
586     PrintDebug(core->vm_info, core, "VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
587
588     core->vmm_data = vmx_state;
589     vmx_state->state = VMX_UNLAUNCHED;
590
591     PrintDebug(core->vm_info, core, "Initializing VMCS (addr=%p)\n", core->vmm_data);
592     
593     // TODO: Fix vmcs fields so they're 32-bit
594
595     PrintDebug(core->vm_info, core, "Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
596     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
597
598     if (vmx_ret != VMX_SUCCESS) {
599         PrintError(core->vm_info, core, "VMCLEAR failed\n");
600         return; 
601     }
602
603     if (core->vm_info->vm_class == V3_PC_VM) {
604         PrintDebug(core->vm_info, core, "Initializing VMCS\n");
605         if (init_vmcs_bios(core, vmx_state) == -1) {
606             PrintError(core->vm_info, core, "Error initializing VMCS to BIOS state\n");
607             return;
608         }
609     } else {
610         PrintError(core->vm_info, core, "Invalid VM Class\n");
611         return;
612     }
613
614     PrintDebug(core->vm_info, core, "Serializing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
615     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
616
617     core->core_run_state = CORE_STOPPED;
618     return;
619 }
620
621
622
623 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
624     extern v3_cpu_arch_t v3_cpu_types[];
625
626     if (v3_cpu_types[V3_Get_CPU()] == V3_INVALID_CPU) {
627         int i = 0;
628
629         for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
630             if (v3_cpu_types[i] != V3_INVALID_CPU) {
631                 break;
632             }
633         }
634
635         if (i == V3_CONFIG_MAX_CPUS) {
636             PrintError(core->vm_info, core, "Could not find VALID CPU for VMX guest initialization\n");
637             return -1;
638         }
639
640         V3_Call_On_CPU(i, __init_vmx_vmcs, core);
641
642     } else {
643         __init_vmx_vmcs(core);
644     }
645
646     if (core->core_run_state != CORE_STOPPED) {
647         PrintError(core->vm_info, core, "Error initializing VMX Core\n");
648         return -1;
649     }
650
651     return 0;
652 }
653
654
655 int v3_deinit_vmx_vmcs(struct guest_info * core) {
656     struct vmx_data * vmx_state = core->vmm_data;
657
658     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
659     V3_FreePages(V3_PAddr(vmx_state->msr_area), 1);
660
661     V3_Free(vmx_state);
662
663     return 0;
664 }
665
666
667
668 #ifdef V3_CONFIG_CHECKPOINT
669 /* 
670  * JRL: This is broken
671  */
672 int v3_vmx_save_core(struct guest_info * core, void * ctx){
673   struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
674   
675   // note that the vmcs pointer is an HPA, but we need an HVA
676   if (v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE_4KB, 
677                     V3_VAddr((void*) (vmx_info->vmcs_ptr_phys)))) {
678     PrintError(core->vm_info, core, "Could not save vmcs data for VMX\n");
679     return -1;
680   }
681   
682   return 0;
683 }
684
685 int v3_vmx_load_core(struct guest_info * core, void * ctx){
686   struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
687   struct cr0_32 * shadow_cr0;
688   addr_t vmcs_page_paddr;  //HPA
689   
690   vmcs_page_paddr = (addr_t) V3_AllocPages(1); // need not be shadow-safe, not exposed to guest
691   
692   if (!vmcs_page_paddr) { 
693     PrintError(core->vm_info, core, "Could not allocate space for a vmcs in VMX\n");
694     return -1;
695   }
696   
697   if (v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, 
698                     V3_VAddr((void *)vmcs_page_paddr)) == -1) { 
699     PrintError(core->vm_info, core, "Could not load vmcs data for VMX\n");
700     V3_FreePages((void*)vmcs_page_paddr,1);
701     return -1;
702   }
703
704   vmcs_clear(vmx_info->vmcs_ptr_phys);
705   
706   // Probably need to delete the old one... 
707   V3_FreePages((void*)(vmx_info->vmcs_ptr_phys),1);
708   
709   vmcs_load(vmcs_page_paddr);
710   
711   v3_vmx_save_vmcs(core);
712
713   shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
714
715
716   /* Get the CPU mode to set the guest_ia32e entry ctrl */
717   
718   if (core->shdw_pg_mode == SHADOW_PAGING) {
719     if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
720       if (v3_activate_shadow_pt(core) == -1) {
721         PrintError(core->vm_info, core, "Failed to activate shadow page tables\n");
722         return -1;
723       }
724     } else {
725       if (v3_activate_passthrough_pt(core) == -1) {
726         PrintError(core->vm_info, core, "Failed to activate passthrough page tables\n");
727         return -1;
728       }
729     }
730   }
731   
732   return 0;
733 }
734 #endif
735
736
737 void v3_flush_vmx_vm_core(struct guest_info * core) {
738     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
739     vmcs_clear(vmx_info->vmcs_ptr_phys);
740     vmx_info->state = VMX_UNLAUNCHED;
741 }
742
743
744
745 static int update_irq_exit_state(struct guest_info * info) {
746     struct vmx_exit_idt_vec_info idt_vec_info;
747
748     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
749
750     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
751 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
752         V3_Print(info->vm_info, info, "Calling v3_injecting_intr\n");
753 #endif
754         info->intr_core_state.irq_started = 0;
755         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
756     }
757
758     return 0;
759 }
760
761 static int update_irq_entry_state(struct guest_info * info) {
762     struct vmx_exit_idt_vec_info idt_vec_info;
763     struct vmcs_interrupt_state intr_core_state;
764     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
765
766     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
767     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
768
769     /* Check for pending exceptions to inject */
770     if (v3_excp_pending(info)) {
771         struct vmx_entry_int_info int_info;
772         int_info.value = 0;
773
774         // In VMX, almost every exception is hardware
775         // Software exceptions are pretty much only for breakpoint or overflow
776         int_info.type = 3;
777         int_info.vector = v3_get_excp_number(info);
778
779         if (info->excp_state.excp_error_code_valid) {
780             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
781             int_info.error_code = 1;
782
783 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
784             V3_Print(info->vm_info, info, "Injecting exception %d with error code %x\n", 
785                     int_info.vector, info->excp_state.excp_error_code);
786 #endif
787         }
788
789         int_info.valid = 1;
790 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
791         V3_Print(info->vm_info, info, "Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
792 #endif
793         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
794
795         v3_injecting_excp(info, int_info.vector);
796
797     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
798                (intr_core_state.val == 0)) {
799        
800         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
801
802 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
803             V3_Print(info->vm_info, info, "IRQ pending from previous injection\n");
804 #endif
805
806             // Copy the IDT vectoring info over to reinject the old interrupt
807             if (idt_vec_info.error_code == 1) {
808                 uint32_t err_code = 0;
809
810                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
811                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
812             }
813
814             idt_vec_info.undef = 0;
815             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
816
817         } else {
818             struct vmx_entry_int_info ent_int;
819             ent_int.value = 0;
820
821             switch (v3_intr_pending(info)) {
822                 case V3_EXTERNAL_IRQ: {
823                   
824                     int irq = v3_get_intr(info); 
825
826                     if (irq<0) {
827                       break;
828                     }
829
830                     info->intr_core_state.irq_vector = irq; 
831                     ent_int.vector = info->intr_core_state.irq_vector;
832                     ent_int.type = 0;
833                     ent_int.error_code = 0;
834                     ent_int.valid = 1;
835
836 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
837                     V3_Print(info->vm_info, info, "Injecting Interrupt %d at exit %u(EIP=%p)\n", 
838                                info->intr_core_state.irq_vector, 
839                                (uint32_t)info->num_exits, 
840                                (void *)(addr_t)info->rip);
841 #endif
842
843                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
844                     info->intr_core_state.irq_started = 1;
845
846                     break;
847                 }
848                 case V3_NMI:
849                     PrintDebug(info->vm_info, info, "Injecting NMI\n");
850
851                     ent_int.type = 2;
852                     ent_int.vector = 2;
853                     ent_int.valid = 1;
854                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
855
856                     break;
857                 case V3_SOFTWARE_INTR:
858                     PrintDebug(info->vm_info, info, "Injecting software interrupt\n");
859                     ent_int.type = 4;
860
861                     ent_int.valid = 1;
862                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
863
864                     break;
865                 case V3_VIRTUAL_IRQ:
866                     // Not sure what to do here, Intel doesn't have virtual IRQs
867                     // May be the same as external interrupts/IRQs
868
869                     break;
870                 case V3_INVALID_INTR:
871                 default:
872                     break;
873             }
874         }
875     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
876         // Enable INTR window exiting so we know when IF=1
877         uint32_t instr_len;
878
879         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
880
881 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
882         V3_Print(info->vm_info, info, "Enabling Interrupt-Window exiting: %d\n", instr_len);
883 #endif
884
885         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
886         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
887     }
888
889
890     return 0;
891 }
892
893
894
895 static struct vmx_exit_info exit_log[10];
896 static uint64_t rip_log[10];
897
898
899
900 static void print_exit_log(struct guest_info * info) {
901     int cnt = info->num_exits % 10;
902     int i = 0;
903     
904
905     V3_Print(info->vm_info, info, "\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
906
907     for (i = 0; i < 10; i++) {
908         struct vmx_exit_info * tmp = &exit_log[cnt];
909
910         V3_Print(info->vm_info, info, "%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
911         V3_Print(info->vm_info, info, "\texit_qual = %p\n", (void *)tmp->exit_qual);
912         V3_Print(info->vm_info, info, "\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
913         V3_Print(info->vm_info, info, "\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
914         V3_Print(info->vm_info, info, "\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
915         V3_Print(info->vm_info, info, "\tguest_linear_addr= %p\n", (void *)(addr_t)tmp->guest_linear_addr);
916         V3_Print(info->vm_info, info, "\tRIP = %p\n", (void *)rip_log[cnt]);
917
918
919         cnt--;
920
921         if (cnt == -1) {
922             cnt = 9;
923         }
924
925     }
926
927 }
928
929 int 
930 v3_vmx_config_tsc_virtualization(struct guest_info * info) {
931     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
932
933     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
934         if  (!vmx_info->pri_proc_ctrls.rdtsc_exit) {
935             vmx_info->pri_proc_ctrls.rdtsc_exit = 1;
936             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
937         }
938     } else {
939         sint64_t tsc_offset;
940         uint32_t tsc_offset_low, tsc_offset_high;
941
942         if  (vmx_info->pri_proc_ctrls.rdtsc_exit) {
943             vmx_info->pri_proc_ctrls.rdtsc_exit = 0;
944             check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
945         }
946
947         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
948             tsc_offset = 0;
949         } else {
950             tsc_offset = v3_tsc_host_offset(&info->time_state);
951         }
952         tsc_offset_high = (uint32_t)(( tsc_offset >> 32) & 0xffffffff);
953         tsc_offset_low = (uint32_t)(tsc_offset & 0xffffffff);
954
955         check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
956         check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
957     }
958     return 0;
959 }
960
961 /* 
962  * CAUTION and DANGER!!! 
963  * 
964  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
965  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
966  * on its contents will cause things to break. The contents at the time of the exit WILL 
967  * change before the exit handler is executed.
968  */
969 int v3_vmx_enter(struct guest_info * info) {
970     int ret = 0;
971     struct vmx_exit_info exit_info;
972     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
973     uint64_t guest_cycles = 0;
974
975     // Conditionally yield the CPU if the timeslice has expired
976     v3_schedule(info);
977
978     // Update timer devices late after being in the VM so that as much 
979     // of the time in the VM is accounted for as possible. Also do it before
980     // updating IRQ entry state so that any interrupts the timers raise get 
981     // handled on the next VM entry.
982     v3_advance_time(info, NULL);
983     v3_update_timers(info);
984
985     // disable global interrupts for vm state transition
986     v3_disable_ints();
987
988     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
989         vmcs_clear(vmx_info->vmcs_ptr_phys);
990         vmcs_load(vmx_info->vmcs_ptr_phys);
991         vmx_info->state = VMX_UNLAUNCHED;
992     }
993
994     v3_vmx_restore_vmcs(info);
995
996
997 #ifdef V3_CONFIG_SYMCALL
998     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
999         update_irq_entry_state(info);
1000     }
1001 #else 
1002     update_irq_entry_state(info);
1003 #endif
1004
1005     {
1006         addr_t guest_cr3;
1007         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
1008         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
1009     }
1010
1011
1012     // Perform last-minute time setup prior to entering the VM
1013     v3_vmx_config_tsc_virtualization(info);
1014
1015     if (v3_update_vmcs_host_state(info)) {
1016         v3_enable_ints();
1017         PrintError(info->vm_info, info, "Could not write host state\n");
1018         return -1;
1019     }
1020     
1021     if (vmx_info->pin_ctrls.active_preempt_timer) {
1022         /* Preemption timer is active */
1023         uint32_t preempt_window = 0xffffffff;
1024
1025         if (info->timeouts.timeout_active) {
1026             preempt_window = info->timeouts.next_timeout;
1027         }
1028         
1029         check_vmcs_write(VMCS_PREEMPT_TIMER, preempt_window);
1030     }
1031
1032     V3_FP_ENTRY_RESTORE(info);
1033
1034     {   
1035         uint64_t entry_tsc = 0;
1036         uint64_t exit_tsc = 0;
1037
1038 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1039         v3_pwrstat_telemetry_enter(info);
1040 #endif
1041
1042 #ifdef V3_CONFIG_PMU_TELEMETRY
1043         v3_pmu_telemetry_enter(info);
1044 #endif
1045
1046         if (vmx_info->state == VMX_UNLAUNCHED) {
1047             vmx_info->state = VMX_LAUNCHED;
1048             rdtscll(entry_tsc);
1049             ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
1050             rdtscll(exit_tsc);
1051
1052         } else {
1053             V3_ASSERT(info->vm_info, info,vmx_info->state != VMX_UNLAUNCHED);
1054             rdtscll(entry_tsc);
1055             ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
1056             rdtscll(exit_tsc);
1057         }
1058
1059         guest_cycles = exit_tsc - entry_tsc;    
1060
1061 #ifdef V3_CONFIG_PMU_TELEMETRY
1062         v3_pmu_telemetry_exit(info);
1063 #endif
1064
1065 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1066         v3_pwrstat_telemetry_exit(info);
1067 #endif
1068     }
1069
1070     //  PrintDebug(info->vm_info, info, "VMX Exit: ret=%d\n", ret);
1071
1072     if (ret != VMX_SUCCESS) {
1073         uint32_t error = 0;
1074         vmcs_read(VMCS_INSTR_ERR, &error);
1075
1076         v3_enable_ints();
1077
1078         PrintError(info->vm_info, info, "VMENTRY Error: %d (launch_ret = %d)\n", error, ret);
1079         return -1;
1080     }
1081
1082
1083     info->num_exits++;
1084
1085     V3_FP_EXIT_SAVE(info);
1086
1087     /* If we have the preemption time, then use it to get more accurate guest time */
1088     if (vmx_info->pin_ctrls.active_preempt_timer) {
1089         uint32_t cycles_left = 0;
1090         check_vmcs_read(VMCS_PREEMPT_TIMER, &(cycles_left));
1091
1092         if (info->timeouts.timeout_active) {
1093             guest_cycles = info->timeouts.next_timeout - cycles_left;
1094         } else {
1095             guest_cycles = 0xffffffff - cycles_left;
1096         }
1097     }
1098
1099     // Immediate exit from VM time bookkeeping
1100     v3_advance_time(info, &guest_cycles);
1101
1102     /* Update guest state */
1103     v3_vmx_save_vmcs(info);
1104
1105     // info->cpl = info->segments.cs.selector & 0x3;
1106
1107     info->mem_mode = v3_get_vm_mem_mode(info);
1108     info->cpu_mode = v3_get_vm_cpu_mode(info);
1109
1110
1111
1112     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
1113     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
1114     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
1115     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
1116     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
1117     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
1118     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
1119
1120     if (info->shdw_pg_mode == NESTED_PAGING) {
1121         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
1122     }
1123
1124     //PrintDebug(info->vm_info, info, "VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
1125
1126     exit_log[info->num_exits % 10] = exit_info;
1127     rip_log[info->num_exits % 10] = get_addr_linear(info, info->rip, &(info->segments.cs));
1128
1129 #ifdef V3_CONFIG_SYMCALL
1130     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
1131         update_irq_exit_state(info);
1132     }
1133 #else
1134     update_irq_exit_state(info);
1135 #endif
1136
1137     if (exit_info.exit_reason == VMX_EXIT_INTR_WINDOW) {
1138         // This is a special case whose only job is to inject an interrupt
1139         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
1140         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
1141         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
1142
1143 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
1144        V3_Print(info->vm_info, info, "Interrupts available again! (RIP=%llx)\n", info->rip);
1145 #endif
1146     }
1147
1148
1149     // Lastly we check for an NMI exit, and reinject if so
1150     {
1151         struct vmx_basic_exit_info * basic_info = (struct vmx_basic_exit_info *)&(exit_info.exit_reason);
1152
1153         if (basic_info->reason == VMX_EXIT_INFO_EXCEPTION_OR_NMI) {
1154             if ((uint8_t)exit_info.int_info == 2) {
1155                 asm("int $2");
1156             }
1157         }
1158     }
1159
1160     // reenable global interrupts after vm exit
1161     v3_enable_ints();
1162
1163     // Conditionally yield the CPU if the timeslice has expired
1164     v3_schedule(info);
1165     v3_advance_time(info, NULL);
1166     v3_update_timers(info);
1167
1168     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
1169         PrintError(info->vm_info, info, "Error in VMX exit handler (Exit reason=%x)\n", exit_info.exit_reason);
1170         return -1;
1171     }
1172
1173     if (info->timeouts.timeout_active) {
1174         /* Check to see if any timeouts have expired */
1175         v3_handle_timeouts(info, guest_cycles);
1176     }
1177
1178     return 0;
1179 }
1180
1181
1182 int v3_start_vmx_guest(struct guest_info * info) {
1183
1184     PrintDebug(info->vm_info, info, "Starting VMX core %u\n", info->vcpu_id);
1185
1186     while (1) {
1187         if (info->core_run_state == CORE_STOPPED) {
1188             if (info->vcpu_id == 0) {
1189                 info->core_run_state = CORE_RUNNING;
1190             } else {
1191                 
1192                 PrintDebug(info->vm_info, info, "VMX core %u: Waiting for core initialization\n", info->vcpu_id);
1193
1194                 V3_NO_WORK(info);
1195                 
1196                 while (info->core_run_state == CORE_STOPPED) {
1197                     
1198                     if (info->vm_info->run_state == VM_STOPPED) {
1199                         // The VM was stopped before this core was initialized. 
1200                         return 0;
1201                     }
1202
1203                     V3_STILL_NO_WORK(info);
1204                     //PrintDebug(info->vm_info, info, "VMX core %u: still waiting for INIT\n",info->vcpu_id);
1205                 }
1206
1207                 V3_HAVE_WORK_AGAIN(info);
1208
1209                 PrintDebug(info->vm_info, info, "VMX core %u initialized\n", info->vcpu_id);
1210                 
1211                 // We'll be paranoid about race conditions here
1212                 v3_wait_at_barrier(info);
1213             }
1214             
1215             
1216             PrintDebug(info->vm_info, info, "VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
1217                        info->vcpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
1218                        info->segments.cs.limit, (void *)(info->rip));
1219             
1220             
1221             PrintDebug(info->vm_info, info, "VMX core %u: Launching VMX VM on logical core %u\n", info->vcpu_id, info->pcpu_id);
1222             
1223             v3_start_time(info);
1224             
1225             
1226             if (info->vm_info->run_state == VM_STOPPED) {
1227                 info->core_run_state = CORE_STOPPED;
1228                 break;
1229             }
1230         }
1231         
1232         
1233 #ifdef V3_CONFIG_PMU_TELEMETRY
1234         v3_pmu_telemetry_start(info);
1235 #endif
1236         
1237 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1238         v3_pwrstat_telemetry_start(info);
1239 #endif
1240         
1241         
1242         if (v3_vmx_enter(info) == -1) {
1243             
1244             addr_t host_addr;
1245             addr_t linear_addr = 0;
1246             
1247             info->vm_info->run_state = VM_ERROR;
1248             
1249             V3_Print(info->vm_info, info, "VMX core %u: VMX ERROR!!\n", info->vcpu_id); 
1250             
1251             v3_print_guest_state(info);
1252             
1253             V3_Print(info->vm_info, info, "VMX core %u\n", info->vcpu_id); 
1254
1255             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1256             
1257             if (info->mem_mode == PHYSICAL_MEM) {
1258                 v3_gpa_to_hva(info, linear_addr, &host_addr);
1259             } else if (info->mem_mode == VIRTUAL_MEM) {
1260                 v3_gva_to_hva(info, linear_addr, &host_addr);
1261             }
1262             
1263             V3_Print(info->vm_info, info, "VMX core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1264             
1265             V3_Print(info->vm_info, info, "VMX core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1266             v3_dump_mem((uint8_t *)host_addr, 15);
1267             
1268             v3_print_stack(info);
1269
1270
1271             v3_print_vmcs();
1272             print_exit_log(info);
1273             return -1;
1274         }
1275
1276         v3_wait_at_barrier(info);
1277
1278
1279         if (info->vm_info->run_state == VM_STOPPED) {
1280             info->core_run_state = CORE_STOPPED;
1281             break;
1282         }
1283 /*
1284         if ((info->num_exits % 5000) == 0) {
1285             V3_Print(info->vm_info, info, "VMX Exit number %d\n", (uint32_t)info->num_exits);
1286         }
1287 */
1288
1289     }
1290
1291 #ifdef V3_CONFIG_PMU_TELEMETRY
1292     v3_pmu_telemetry_end(info);
1293 #endif
1294
1295 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1296     v3_pwrstat_telemetry_end(info);
1297 #endif
1298
1299     return 0;
1300 }
1301
1302
1303
1304
1305 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
1306 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
1307 #define CPUID_1_ECX_VTXFLAG 0x00000020
1308
1309 int v3_is_vmx_capable() {
1310     v3_msr_t feature_msr;
1311     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1312
1313     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
1314
1315     PrintDebug(VM_NONE, VCORE_NONE, "ECX: 0x%x\n", ecx);
1316
1317     if (ecx & CPUID_1_ECX_VTXFLAG) {
1318         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
1319         
1320         PrintDebug(VM_NONE, VCORE_NONE,  "MSRREGlow: 0x%.8x\n", feature_msr.lo);
1321
1322         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
1323             PrintDebug(VM_NONE, VCORE_NONE,  "VMX is locked -- enable in the BIOS\n");
1324             return 0;
1325         }
1326
1327     } else {
1328         PrintDebug(VM_NONE, VCORE_NONE,  "VMX not supported on this cpu\n");
1329         return 0;
1330     }
1331
1332     return 1;
1333 }
1334
1335
1336 int v3_reset_vmx_vm_core(struct guest_info * core, addr_t rip) {
1337     // init vmcs bios
1338     
1339     if ((core->shdw_pg_mode == NESTED_PAGING) && 
1340         (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
1341         // easy 
1342         core->rip = 0;
1343         core->segments.cs.selector = rip << 8;
1344         core->segments.cs.limit = 0xffff;
1345         core->segments.cs.base = rip << 12;
1346     } else {
1347         core->vm_regs.rdx = core->vcpu_id;
1348         core->vm_regs.rbx = rip;
1349     }
1350
1351     return 0;
1352 }
1353
1354
1355
1356 void v3_init_vmx_cpu(int cpu_id) {
1357     addr_t vmx_on_region = 0;
1358     extern v3_cpu_arch_t v3_mach_type;
1359     extern v3_cpu_arch_t v3_cpu_types[];
1360
1361     if (v3_mach_type == V3_INVALID_CPU) {
1362         if (v3_init_vmx_hw(&hw_info) == -1) {
1363             PrintError(VM_NONE, VCORE_NONE, "Could not initialize VMX hardware features on cpu %d\n", cpu_id);
1364             return;
1365         }
1366     }
1367
1368     enable_vmx();
1369
1370
1371     // Setup VMXON Region
1372     vmx_on_region = allocate_vmcs();
1373
1374
1375     if (vmx_on(vmx_on_region) == VMX_SUCCESS) {
1376         V3_Print(VM_NONE, VCORE_NONE,  "VMX Enabled\n");
1377         host_vmcs_ptrs[cpu_id] = vmx_on_region;
1378     } else {
1379         V3_Print(VM_NONE, VCORE_NONE,  "VMX already enabled\n");
1380         V3_FreePages((void *)vmx_on_region, 1);
1381     }
1382
1383     PrintDebug(VM_NONE, VCORE_NONE,  "VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);    
1384
1385     {
1386         struct vmx_sec_proc_ctrls sec_proc_ctrls;
1387         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
1388         
1389         if (sec_proc_ctrls.enable_ept == 0) {
1390             V3_Print(VM_NONE, VCORE_NONE, "VMX EPT (Nested) Paging not supported\n");
1391             v3_cpu_types[cpu_id] = V3_VMX_CPU;
1392         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
1393             V3_Print(VM_NONE, VCORE_NONE, "VMX EPT (Nested) Paging supported\n");
1394             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
1395         } else {
1396             V3_Print(VM_NONE, VCORE_NONE, "VMX EPT (Nested) Paging + Unrestricted guest supported\n");
1397             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
1398         }
1399     }
1400     
1401 }
1402
1403
1404 void v3_deinit_vmx_cpu(int cpu_id) {
1405     extern v3_cpu_arch_t v3_cpu_types[];
1406     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1407
1408     if (host_vmcs_ptrs[cpu_id] != 0) {
1409         V3_Print(VM_NONE, VCORE_NONE, "Disabling VMX\n");
1410
1411         if (vmx_off() != VMX_SUCCESS) {
1412             PrintError(VM_NONE, VCORE_NONE, "Error executing VMXOFF\n");
1413         }
1414
1415         V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
1416
1417         host_vmcs_ptrs[cpu_id] = 0;
1418     }
1419 }