Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


enabled stopping a VM before the secondary cores have been initialized
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_barrier.h>
36
37 #ifdef V3_CONFIG_CHECKPOINT
38 #include <palacios/vmm_checkpoint.h>
39 #endif
40
41 #include <palacios/vmx_ept.h>
42 #include <palacios/vmx_assist.h>
43 #include <palacios/vmx_hw_info.h>
44
45 #ifndef V3_CONFIG_DEBUG_VMX
46 #undef PrintDebug
47 #define PrintDebug(fmt, args...)
48 #endif
49
50
51 /* These fields contain the hardware feature sets supported by the local CPU */
52 static struct vmx_hw_info hw_info;
53
54 extern v3_cpu_arch_t v3_cpu_types[];
55
56 static addr_t host_vmcs_ptrs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
57
58 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
59 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
60
61 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
62     int ret = 0;
63
64     ret = vmcs_write(field, val);
65
66     if (ret != VMX_SUCCESS) {
67         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
68         return 1;
69     }
70
71     return 0;
72 }
73
74 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
75     int ret = 0;
76
77     ret = vmcs_read(field, val);
78
79     if (ret != VMX_SUCCESS) {
80         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
81     }
82
83     return ret;
84 }
85
86
87
88
89 static addr_t allocate_vmcs() {
90     struct vmcs_data * vmcs_page = NULL;
91
92     PrintDebug("Allocating page\n");
93
94     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
95     memset(vmcs_page, 0, 4096);
96
97     vmcs_page->revision = hw_info.basic_info.revision;
98     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
99
100     return (addr_t)V3_PAddr((void *)vmcs_page);
101 }
102
103
104
105
106 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
107     int vmx_ret = 0;
108
109     // disable global interrupts for vm state initialization
110     v3_disable_ints();
111
112     PrintDebug("Loading VMCS\n");
113     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
114     vmx_state->state = VMX_UNLAUNCHED;
115
116     if (vmx_ret != VMX_SUCCESS) {
117         PrintError("VMPTRLD failed\n");
118         return -1;
119     }
120
121
122     /*** Setup default state from HW ***/
123
124     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
125     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
126     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
127     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
128     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
129
130     /* Print Control MSRs */
131     PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)hw_info.cr0.value);
132     PrintDebug("CR4 MSR: %p\n", (void *)(addr_t)hw_info.cr4.value);
133
134
135
136     /******* Setup Host State **********/
137
138     /* Cache GDTR, IDTR, and TR in host struct */
139
140
141     /********** Setup VMX Control Fields ***********/
142
143     /* Add external interrupts, NMI exiting, and virtual NMI */
144     vmx_state->pin_ctrls.nmi_exit = 1;
145     vmx_state->pin_ctrls.ext_int_exit = 1;
146
147
148     vmx_state->pri_proc_ctrls.hlt_exit = 1;
149
150
151     vmx_state->pri_proc_ctrls.pause_exit = 0;
152     vmx_state->pri_proc_ctrls.tsc_offset = 1;
153 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
154     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
155 #endif
156
157     /* Setup IO map */
158     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
159     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
160     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
161             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
162
163
164     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
165     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
166
167
168
169 #ifdef __V3_64BIT__
170     // Ensure host runs in 64-bit mode at each VM EXIT
171     vmx_state->exit_ctrls.host_64_on = 1;
172 #endif
173
174     // Hook all accesses to EFER register
175     v3_hook_msr(core->vm_info, EFER_MSR, 
176                 &v3_handle_efer_read,
177                 &v3_handle_efer_write, 
178                 core);
179
180     // Restore host's EFER register on each VM EXIT
181     vmx_state->exit_ctrls.ld_efer = 1;
182
183     // Save/restore guest's EFER register to/from VMCS on VM EXIT/ENTRY
184     vmx_state->exit_ctrls.save_efer = 1;
185     vmx_state->entry_ctrls.ld_efer  = 1;
186
187     // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
188     vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
189
190
191     /* Setup paging */
192     if (core->shdw_pg_mode == SHADOW_PAGING) {
193         PrintDebug("Creating initial shadow page table\n");
194
195         if (v3_init_passthrough_pts(core) == -1) {
196             PrintError("Could not initialize passthrough page tables\n");
197             return -1;
198         }
199         
200 #define CR0_PE 0x00000001
201 #define CR0_PG 0x80000000
202 #define CR0_WP 0x00010000 // To ensure mem hooks work
203         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
204
205         core->ctrl_regs.cr3 = core->direct_map_pt;
206
207         // vmx_state->pinbased_ctrls |= NMI_EXIT;
208
209         /* Add CR exits */
210         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
211         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
212         
213         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
214         
215         /* Add page fault exits */
216         vmx_state->excp_bmap.pf = 1;
217
218         // Setup VMX Assist
219         v3_vmxassist_init(core, vmx_state);
220
221     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
222                (v3_cpu_types[core->pcpu_id] == V3_VMX_EPT_CPU)) {
223
224 #define CR0_PE 0x00000001
225 #define CR0_PG 0x80000000
226 #define CR0_WP 0x00010000 // To ensure mem hooks work
227         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
228
229         // vmx_state->pinbased_ctrls |= NMI_EXIT;
230
231         /* Disable CR exits */
232         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
233         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
234
235         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
236
237         /* Add page fault exits */
238         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
239         
240         // Setup VMX Assist
241         v3_vmxassist_init(core, vmx_state);
242
243         /* Enable EPT */
244         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
245         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
246
247
248
249         if (v3_init_ept(core, &hw_info) == -1) {
250             PrintError("Error initializing EPT\n");
251             return -1;
252         }
253
254     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
255                (v3_cpu_types[core->pcpu_id] == V3_VMX_EPT_UG_CPU)) {
256         int i = 0;
257         // For now we will assume that unrestricted guest mode is assured w/ EPT
258
259
260         core->vm_regs.rsp = 0x00;
261         core->rip = 0xfff0;
262         core->vm_regs.rdx = 0x00000f00;
263         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
264         core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
265
266
267         core->segments.cs.selector = 0xf000;
268         core->segments.cs.limit = 0xffff;
269         core->segments.cs.base = 0x0000000f0000LL;
270
271         // (raw attributes = 0xf3)
272         core->segments.cs.type = 0xb;
273         core->segments.cs.system = 0x1;
274         core->segments.cs.dpl = 0x0;
275         core->segments.cs.present = 1;
276
277
278
279         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
280                                           &(core->segments.es), &(core->segments.fs), 
281                                           &(core->segments.gs), NULL};
282
283         for ( i = 0; segregs[i] != NULL; i++) {
284             struct v3_segment * seg = segregs[i];
285         
286             seg->selector = 0x0000;
287             //    seg->base = seg->selector << 4;
288             seg->base = 0x00000000;
289             seg->limit = 0xffff;
290
291
292             seg->type = 0x3;
293             seg->system = 0x1;
294             seg->dpl = 0x0;
295             seg->present = 1;
296             //    seg->granularity = 1;
297
298         }
299
300
301         core->segments.gdtr.limit = 0x0000ffff;
302         core->segments.gdtr.base = 0x0000000000000000LL;
303
304         core->segments.idtr.limit = 0x0000ffff;
305         core->segments.idtr.base = 0x0000000000000000LL;
306
307         core->segments.ldtr.selector = 0x0000;
308         core->segments.ldtr.limit = 0x0000ffff;
309         core->segments.ldtr.base = 0x0000000000000000LL;
310         core->segments.ldtr.type = 2;
311         core->segments.ldtr.present = 1;
312
313         core->segments.tr.selector = 0x0000;
314         core->segments.tr.limit = 0x0000ffff;
315         core->segments.tr.base = 0x0000000000000000LL;
316         core->segments.tr.type = 0xb;
317         core->segments.tr.present = 1;
318
319         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
320         core->dbg_regs.dr7 = 0x0000000000000400LL;
321
322         /* Enable EPT */
323         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
324         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
325         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
326
327
328         /* Disable shadow paging stuff */
329         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
330         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
331
332         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
333
334
335         if (v3_init_ept(core, &hw_info) == -1) {
336             PrintError("Error initializing EPT\n");
337             return -1;
338         }
339
340     } else {
341         PrintError("Invalid Virtual paging mode\n");
342         return -1;
343     }
344
345
346     // hook vmx msrs
347
348     // Setup SYSCALL/SYSENTER MSRs in load/store area
349     
350     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
351     {
352         int msr_ret = 0;
353
354         struct vmcs_msr_entry * exit_store_msrs = NULL;
355         struct vmcs_msr_entry * exit_load_msrs = NULL;
356         struct vmcs_msr_entry * entry_load_msrs = NULL;;
357         int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
358
359         V3_Print("Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
360
361         if (max_msrs < 4) {
362             PrintError("Max MSR cache size is too small (%d)\n", max_msrs);
363             return -1;
364         }
365
366         vmx_state->msr_area = V3_VAddr(V3_AllocPages(1));
367
368         if (vmx_state->msr_area == NULL) {
369             PrintError("could not allocate msr load/store area\n");
370             return -1;
371         }
372
373         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
374         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
375         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
376         
377         
378         exit_store_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area);
379         exit_load_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area + (sizeof(struct vmcs_msr_entry) * 4));
380         entry_load_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area + (sizeof(struct vmcs_msr_entry) * 8));
381
382
383         exit_store_msrs[0].index = IA32_STAR_MSR;
384         exit_store_msrs[1].index = IA32_LSTAR_MSR;
385         exit_store_msrs[2].index = IA32_FMASK_MSR;
386         exit_store_msrs[3].index = IA32_KERN_GS_BASE_MSR;
387         
388         memcpy(exit_store_msrs, exit_load_msrs, sizeof(struct vmcs_msr_entry) * 4);
389         memcpy(exit_store_msrs, entry_load_msrs, sizeof(struct vmcs_msr_entry) * 4);
390
391         
392         v3_get_msr(IA32_STAR_MSR, &(exit_load_msrs[0].hi), &(exit_load_msrs[0].lo));
393         v3_get_msr(IA32_LSTAR_MSR, &(exit_load_msrs[1].hi), &(exit_load_msrs[1].lo));
394         v3_get_msr(IA32_FMASK_MSR, &(exit_load_msrs[2].hi), &(exit_load_msrs[2].lo));
395         v3_get_msr(IA32_KERN_GS_BASE_MSR, &(exit_load_msrs[3].hi), &(exit_load_msrs[3].lo));
396
397         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(exit_store_msrs));
398         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(exit_load_msrs));
399         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(entry_load_msrs));
400
401
402         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
403         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
404         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
405         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
406
407
408         // IMPORTANT: These SYSCALL MSRs are currently not handled by hardware or cached
409         // We should really emulate these ourselves, or ideally include them in the MSR store area if there is room
410         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
411         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
412         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
413         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
414     }    
415
416     /* Sanity check ctrl/reg fields against hw_defaults */
417
418
419
420
421     /*** Write all the info to the VMCS ***/
422   
423     /*
424     {
425         // IS THIS NECESSARY???
426 #define DEBUGCTL_MSR 0x1d9
427         struct v3_msr tmp_msr;
428         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
429         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
430         core->dbg_regs.dr7 = 0x400;
431     }
432     */
433
434 #ifdef __V3_64BIT__
435     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
436 #else
437     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
438     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
439 #endif
440
441
442  
443
444     if (v3_update_vmcs_ctrl_fields(core)) {
445         PrintError("Could not write control fields!\n");
446         return -1;
447     }
448     
449     if (v3_update_vmcs_host_state(core)) {
450         PrintError("Could not write host state\n");
451         return -1;
452     }
453
454     // reenable global interrupts for vm state initialization now
455     // that the vm state is initialized. If another VM kicks us off, 
456     // it'll update our vmx state so that we know to reload ourself
457     v3_enable_ints();
458
459     return 0;
460 }
461
462 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
463     struct vmx_data * vmx_state = NULL;
464     int vmx_ret = 0;
465     
466     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
467     memset(vmx_state, 0, sizeof(struct vmx_data));
468
469     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
470
471     PrintDebug("Allocating VMCS\n");
472     vmx_state->vmcs_ptr_phys = allocate_vmcs();
473
474     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
475
476     core->vmm_data = vmx_state;
477     vmx_state->state = VMX_UNLAUNCHED;
478
479     PrintDebug("Initializing VMCS (addr=%p)\n", core->vmm_data);
480     
481     // TODO: Fix vmcs fields so they're 32-bit
482
483     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
484     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
485
486     if (vmx_ret != VMX_SUCCESS) {
487         PrintError("VMCLEAR failed\n");
488         return -1; 
489     }
490
491     if (vm_class == V3_PC_VM) {
492         PrintDebug("Initializing VMCS\n");
493         if (init_vmcs_bios(core, vmx_state) == -1) {
494             PrintError("Error initializing VMCS to BIOS state\n");
495             return -1;
496         }
497     } else {
498         PrintError("Invalid VM Class\n");
499         return -1;
500     }
501
502     PrintDebug("Serializing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
503     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
504
505     return 0;
506 }
507
508
509 int v3_deinit_vmx_vmcs(struct guest_info * core) {
510     struct vmx_data * vmx_state = core->vmm_data;
511
512     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
513     V3_FreePages(V3_PAddr(vmx_state->msr_area), 1);
514
515     V3_Free(vmx_state);
516
517     return 0;
518 }
519
520
521
522 #ifdef V3_CONFIG_CHECKPOINT
523 /* 
524  * JRL: This is broken
525  */
526 int v3_vmx_save_core(struct guest_info * core, void * ctx){
527     uint64_t vmcs_ptr = vmcs_store();
528
529     v3_chkpt_save(ctx, "vmcs_data", PAGE_SIZE, (void *)vmcs_ptr);
530
531     return 0;
532 }
533
534 int v3_vmx_load_core(struct guest_info * core, void * ctx){
535     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
536     struct cr0_32 * shadow_cr0;
537     char vmcs[PAGE_SIZE_4KB];
538
539     v3_chkpt_load(ctx, "vmcs_data", PAGE_SIZE_4KB, vmcs);
540
541     vmcs_clear(vmx_info->vmcs_ptr_phys);
542     vmcs_load((addr_t)vmcs);
543
544     v3_vmx_save_vmcs(core);
545
546     shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
547
548
549     /* Get the CPU mode to set the guest_ia32e entry ctrl */
550
551     if (core->shdw_pg_mode == SHADOW_PAGING) {
552         if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
553             if (v3_activate_shadow_pt(core) == -1) {
554                 PrintError("Failed to activate shadow page tables\n");
555                 return -1;
556             }
557         } else {
558             if (v3_activate_passthrough_pt(core) == -1) {
559                 PrintError("Failed to activate passthrough page tables\n");
560                 return -1;
561             }
562         }
563     }
564
565     return 0;
566 }
567 #endif
568
569
570 void v3_flush_vmx_vm_core(struct guest_info * core) {
571     struct vmx_data * vmx_info = (struct vmx_data *)(core->vmm_data);
572     vmcs_clear(vmx_info->vmcs_ptr_phys);
573     vmx_info->state = VMX_UNLAUNCHED;
574 }
575
576
577
578 static int update_irq_exit_state(struct guest_info * info) {
579     struct vmx_exit_idt_vec_info idt_vec_info;
580
581     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
582
583     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
584 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
585         V3_Print("Calling v3_injecting_intr\n");
586 #endif
587         info->intr_core_state.irq_started = 0;
588         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
589     }
590
591     return 0;
592 }
593
594 static int update_irq_entry_state(struct guest_info * info) {
595     struct vmx_exit_idt_vec_info idt_vec_info;
596     struct vmcs_interrupt_state intr_core_state;
597     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
598
599     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
600     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
601
602     /* Check for pending exceptions to inject */
603     if (v3_excp_pending(info)) {
604         struct vmx_entry_int_info int_info;
605         int_info.value = 0;
606
607         // In VMX, almost every exception is hardware
608         // Software exceptions are pretty much only for breakpoint or overflow
609         int_info.type = 3;
610         int_info.vector = v3_get_excp_number(info);
611
612         if (info->excp_state.excp_error_code_valid) {
613             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
614             int_info.error_code = 1;
615
616 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
617             V3_Print("Injecting exception %d with error code %x\n", 
618                     int_info.vector, info->excp_state.excp_error_code);
619 #endif
620         }
621
622         int_info.valid = 1;
623 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
624         V3_Print("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
625 #endif
626         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
627
628         v3_injecting_excp(info, int_info.vector);
629
630     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
631                (intr_core_state.val == 0)) {
632        
633         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
634
635 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
636             V3_Print("IRQ pending from previous injection\n");
637 #endif
638
639             // Copy the IDT vectoring info over to reinject the old interrupt
640             if (idt_vec_info.error_code == 1) {
641                 uint32_t err_code = 0;
642
643                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
644                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
645             }
646
647             idt_vec_info.undef = 0;
648             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
649
650         } else {
651             struct vmx_entry_int_info ent_int;
652             ent_int.value = 0;
653
654             switch (v3_intr_pending(info)) {
655                 case V3_EXTERNAL_IRQ: {
656                     info->intr_core_state.irq_vector = v3_get_intr(info); 
657                     ent_int.vector = info->intr_core_state.irq_vector;
658                     ent_int.type = 0;
659                     ent_int.error_code = 0;
660                     ent_int.valid = 1;
661
662 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
663                     V3_Print("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
664                                info->intr_core_state.irq_vector, 
665                                (uint32_t)info->num_exits, 
666                                (void *)(addr_t)info->rip);
667 #endif
668
669                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
670                     info->intr_core_state.irq_started = 1;
671
672                     break;
673                 }
674                 case V3_NMI:
675                     PrintDebug("Injecting NMI\n");
676
677                     ent_int.type = 2;
678                     ent_int.vector = 2;
679                     ent_int.valid = 1;
680                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
681
682                     break;
683                 case V3_SOFTWARE_INTR:
684                     PrintDebug("Injecting software interrupt\n");
685                     ent_int.type = 4;
686
687                     ent_int.valid = 1;
688                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
689
690                     break;
691                 case V3_VIRTUAL_IRQ:
692                     // Not sure what to do here, Intel doesn't have virtual IRQs
693                     // May be the same as external interrupts/IRQs
694
695                     break;
696                 case V3_INVALID_INTR:
697                 default:
698                     break;
699             }
700         }
701     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
702         // Enable INTR window exiting so we know when IF=1
703         uint32_t instr_len;
704
705         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
706
707 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
708         V3_Print("Enabling Interrupt-Window exiting: %d\n", instr_len);
709 #endif
710
711         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
712         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
713     }
714
715
716     return 0;
717 }
718
719
720
721 static struct vmx_exit_info exit_log[10];
722
723 static void print_exit_log(struct guest_info * info) {
724     int cnt = info->num_exits % 10;
725     int i = 0;
726     
727
728     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
729
730     for (i = 0; i < 10; i++) {
731         struct vmx_exit_info * tmp = &exit_log[cnt];
732
733         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
734         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
735         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
736         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
737         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
738
739         cnt--;
740
741         if (cnt == -1) {
742             cnt = 9;
743         }
744
745     }
746
747 }
748
749 /* 
750  * CAUTION and DANGER!!! 
751  * 
752  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
753  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
754  * on its contents will cause things to break. The contents at the time of the exit WILL 
755  * change before the exit handler is executed.
756  */
757 int v3_vmx_enter(struct guest_info * info) {
758     int ret = 0;
759     uint32_t tsc_offset_low, tsc_offset_high;
760     struct vmx_exit_info exit_info;
761     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
762
763     // Conditionally yield the CPU if the timeslice has expired
764     v3_yield_cond(info);
765
766     // Perform any additional yielding needed for time adjustment
767     v3_adjust_time(info);
768
769     // disable global interrupts for vm state transition
770     v3_disable_ints();
771
772     // Update timer devices late after being in the VM so that as much 
773     // of hte time in the VM is accounted for as possible. Also do it before
774     // updating IRQ entry state so that any interrupts the timers raise get 
775     // handled on the next VM entry. Must be done with interrupts disabled.
776     v3_update_timers(info);
777
778     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
779         vmcs_clear(vmx_info->vmcs_ptr_phys);
780         vmcs_load(vmx_info->vmcs_ptr_phys);
781         vmx_info->state = VMX_UNLAUNCHED;
782     }
783
784     v3_vmx_restore_vmcs(info);
785
786
787 #ifdef V3_CONFIG_SYMCALL
788     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
789         update_irq_entry_state(info);
790     }
791 #else 
792     update_irq_entry_state(info);
793 #endif
794
795     {
796         addr_t guest_cr3;
797         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
798         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
799     }
800
801     // Perform last-minute time bookkeeping prior to entering the VM
802     v3_time_enter_vm(info);
803
804     tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
805     tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
806     check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
807     check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
808
809     if (v3_update_vmcs_host_state(info)) {
810         v3_enable_ints();
811         PrintError("Could not write host state\n");
812         return -1;
813     }
814
815
816     if (vmx_info->state == VMX_UNLAUNCHED) {
817         vmx_info->state = VMX_LAUNCHED;
818
819         info->vm_info->run_state = VM_RUNNING;
820         ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
821     } else {
822         V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
823         ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
824     }
825     
826
827
828     //  PrintDebug("VMX Exit: ret=%d\n", ret);
829
830     if (ret != VMX_SUCCESS) {
831         uint32_t error = 0;
832         vmcs_read(VMCS_INSTR_ERR, &error);
833
834         v3_enable_ints();
835
836         PrintError("VMENTRY Error: %d (launch_ret = %d)\n", error, ret);
837         return -1;
838     }
839
840
841
842     // Immediate exit from VM time bookkeeping
843     v3_time_exit_vm(info);
844
845     info->num_exits++;
846
847     /* Update guest state */
848     v3_vmx_save_vmcs(info);
849
850     // info->cpl = info->segments.cs.selector & 0x3;
851
852     info->mem_mode = v3_get_vm_mem_mode(info);
853     info->cpu_mode = v3_get_vm_cpu_mode(info);
854
855
856     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
857     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
858     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
859     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
860     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
861     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
862     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
863
864     if (info->shdw_pg_mode == NESTED_PAGING) {
865         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
866     }
867
868     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
869
870     exit_log[info->num_exits % 10] = exit_info;
871
872 #ifdef V3_CONFIG_SYMCALL
873     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
874         update_irq_exit_state(info);
875     }
876 #else
877     update_irq_exit_state(info);
878 #endif
879
880     if (exit_info.exit_reason == VMEXIT_INTR_WINDOW) {
881         // This is a special case whose only job is to inject an interrupt
882         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
883         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
884         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
885
886 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
887        V3_Print("Interrupts available again! (RIP=%llx)\n", info->rip);
888 #endif
889     }
890
891     // reenable global interrupts after vm exit
892     v3_enable_ints();
893
894     // Conditionally yield the CPU if the timeslice has expired
895     v3_yield_cond(info);
896
897     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
898         PrintError("Error in VMX exit handler (Exit reason=%x)\n", exit_info.exit_reason);
899         return -1;
900     }
901
902     return 0;
903 }
904
905
906 int v3_start_vmx_guest(struct guest_info * info) {
907
908     PrintDebug("Starting VMX core %u\n", info->vcpu_id);
909
910     if (info->vcpu_id == 0) {
911         info->core_run_state = CORE_RUNNING;
912         info->vm_info->run_state = VM_RUNNING;
913     } else {
914
915         PrintDebug("VMX core %u: Waiting for core initialization\n", info->vcpu_id);
916
917         while (info->core_run_state == CORE_STOPPED) {
918
919             if (info->vm_info->run_state == VM_STOPPED) {
920                 // The VM was stopped before this core was initialized. 
921                 return 0;
922             }
923
924             v3_yield(info);
925             //PrintDebug("VMX core %u: still waiting for INIT\n",info->vcpu_id);
926         }
927         
928         PrintDebug("VMX core %u initialized\n", info->vcpu_id);
929
930         // We'll be paranoid about race conditions here
931         v3_wait_at_barrier(info);
932     }
933
934
935     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
936                info->vcpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
937                info->segments.cs.limit, (void *)(info->rip));
938
939
940     PrintDebug("VMX core %u: Launching VMX VM on logical core %u\n", info->vcpu_id, info->pcpu_id);
941
942     v3_start_time(info);
943
944     while (1) {
945
946         if (info->vm_info->run_state == VM_STOPPED) {
947             info->core_run_state = CORE_STOPPED;
948             break;
949         }
950
951         if (v3_vmx_enter(info) == -1) {
952
953             addr_t host_addr;
954             addr_t linear_addr = 0;
955             
956             info->vm_info->run_state = VM_ERROR;
957             
958             V3_Print("VMX core %u: VMX ERROR!!\n", info->vcpu_id); 
959             
960             v3_print_guest_state(info);
961             
962             V3_Print("VMX core %u\n", info->vcpu_id); 
963
964             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
965             
966             if (info->mem_mode == PHYSICAL_MEM) {
967                 v3_gpa_to_hva(info, linear_addr, &host_addr);
968             } else if (info->mem_mode == VIRTUAL_MEM) {
969                 v3_gva_to_hva(info, linear_addr, &host_addr);
970             }
971             
972             V3_Print("VMX core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
973             
974             V3_Print("VMX core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
975             v3_dump_mem((uint8_t *)host_addr, 15);
976             
977             v3_print_stack(info);
978
979
980             v3_print_vmcs();
981             print_exit_log(info);
982             return -1;
983         }
984
985         v3_wait_at_barrier(info);
986
987
988         if (info->vm_info->run_state == VM_STOPPED) {
989             info->core_run_state = CORE_STOPPED;
990             break;
991         }
992 /*
993         if ((info->num_exits % 5000) == 0) {
994             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
995         }
996 */
997
998     }
999
1000     return 0;
1001 }
1002
1003
1004
1005
1006 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
1007 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
1008 #define CPUID_1_ECX_VTXFLAG 0x00000020
1009
1010 int v3_is_vmx_capable() {
1011     v3_msr_t feature_msr;
1012     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1013
1014     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
1015
1016     PrintDebug("ECX: 0x%x\n", ecx);
1017
1018     if (ecx & CPUID_1_ECX_VTXFLAG) {
1019         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
1020         
1021         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
1022
1023         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
1024             PrintDebug("VMX is locked -- enable in the BIOS\n");
1025             return 0;
1026         }
1027
1028     } else {
1029         PrintDebug("VMX not supported on this cpu\n");
1030         return 0;
1031     }
1032
1033     return 1;
1034 }
1035
1036
1037 int v3_reset_vmx_vm_core(struct guest_info * core, addr_t rip) {
1038     // init vmcs bios
1039     
1040     if ((core->shdw_pg_mode == NESTED_PAGING) && 
1041         (v3_cpu_types[core->pcpu_id] == V3_VMX_EPT_UG_CPU)) {
1042         // easy 
1043         core->rip = 0;
1044         core->segments.cs.selector = rip << 8;
1045         core->segments.cs.limit = 0xffff;
1046         core->segments.cs.base = rip << 12;
1047     } else {
1048         core->vm_regs.rdx = core->vcpu_id;
1049         core->vm_regs.rbx = rip;
1050     }
1051
1052     return 0;
1053 }
1054
1055
1056
1057 void v3_init_vmx_cpu(int cpu_id) {
1058     addr_t vmx_on_region = 0;
1059
1060     if (cpu_id == 0) {
1061         if (v3_init_vmx_hw(&hw_info) == -1) {
1062             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
1063             return;
1064         }
1065     }
1066
1067     enable_vmx();
1068
1069
1070     // Setup VMXON Region
1071     vmx_on_region = allocate_vmcs();
1072
1073
1074     if (vmx_on(vmx_on_region) == VMX_SUCCESS) {
1075         V3_Print("VMX Enabled\n");
1076         host_vmcs_ptrs[cpu_id] = vmx_on_region;
1077     } else {
1078         V3_Print("VMX already enabled\n");
1079         V3_FreePages((void *)vmx_on_region, 1);
1080     }
1081
1082     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);    
1083
1084     {
1085         struct vmx_sec_proc_ctrls sec_proc_ctrls;
1086         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
1087         
1088         if (sec_proc_ctrls.enable_ept == 0) {
1089             V3_Print("VMX EPT (Nested) Paging not supported\n");
1090             v3_cpu_types[cpu_id] = V3_VMX_CPU;
1091         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
1092             V3_Print("VMX EPT (Nested) Paging supported\n");
1093             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
1094         } else {
1095             V3_Print("VMX EPT (Nested) Paging + Unrestricted guest supported\n");
1096             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
1097         }
1098     }
1099 }
1100
1101
1102 void v3_deinit_vmx_cpu(int cpu_id) {
1103     extern v3_cpu_arch_t v3_cpu_types[];
1104     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1105
1106     if (host_vmcs_ptrs[cpu_id] != 0) {
1107         V3_Print("Disabling VMX\n");
1108
1109         if (vmx_off() != VMX_SUCCESS) {
1110             PrintError("Error executing VMXOFF\n");
1111         }
1112
1113         V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
1114
1115         host_vmcs_ptrs[cpu_id] = 0;
1116     }
1117 }