Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Enlarge serial buffer size to comply to line buffering of serial console
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34
35 #include <palacios/vmx_ept.h>
36 #include <palacios/vmx_assist.h>
37 #include <palacios/vmx_hw_info.h>
38
39 #ifndef V3_CONFIG_DEBUG_VMX
40 #undef PrintDebug
41 #define PrintDebug(fmt, args...)
42 #endif
43
44
45 /* These fields contain the hardware feature sets supported by the local CPU */
46 static struct vmx_hw_info hw_info;
47
48 extern v3_cpu_arch_t v3_cpu_types[];
49
50 static addr_t host_vmcs_ptrs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
51
52 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
53 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
54
55 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
56     int ret = 0;
57
58     ret = vmcs_write(field, val);
59
60     if (ret != VMX_SUCCESS) {
61         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
62         return 1;
63     }
64
65     return 0;
66 }
67
68 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
69     int ret = 0;
70
71     ret = vmcs_read(field, val);
72
73     if (ret != VMX_SUCCESS) {
74         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
75     }
76
77     return ret;
78 }
79
80
81
82
83 static addr_t allocate_vmcs() {
84     struct vmcs_data * vmcs_page = NULL;
85
86     PrintDebug("Allocating page\n");
87
88     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
89     memset(vmcs_page, 0, 4096);
90
91     vmcs_page->revision = hw_info.basic_info.revision;
92     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
93
94     return (addr_t)V3_PAddr((void *)vmcs_page);
95 }
96
97
98
99
100 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
101     int vmx_ret = 0;
102
103     // disable global interrupts for vm state initialization
104     v3_disable_ints();
105
106     PrintDebug("Loading VMCS\n");
107     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
108     vmx_state->state = VMX_UNLAUNCHED;
109
110     if (vmx_ret != VMX_SUCCESS) {
111         PrintError("VMPTRLD failed\n");
112         return -1;
113     }
114
115
116     /*** Setup default state from HW ***/
117
118     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
119     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
120     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
121     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
122     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
123
124     /* Print Control MSRs */
125     PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)hw_info.cr0.value);
126     PrintDebug("CR4 MSR: %p\n", (void *)(addr_t)hw_info.cr4.value);
127
128
129
130     /******* Setup Host State **********/
131
132     /* Cache GDTR, IDTR, and TR in host struct */
133     addr_t gdtr_base;
134     struct {
135         uint16_t selector;
136         addr_t   base;
137     } __attribute__((packed)) tmp_seg;
138     
139
140     __asm__ __volatile__(
141                          "sgdt (%0);"
142                          :
143                          : "q"(&tmp_seg)
144                          : "memory"
145                          );
146     gdtr_base = tmp_seg.base;
147     vmx_state->host_state.gdtr.base = gdtr_base;
148
149     __asm__ __volatile__(
150                          "sidt (%0);"
151                          :
152                          : "q"(&tmp_seg)
153                          : "memory"
154                          );
155     vmx_state->host_state.idtr.base = tmp_seg.base;
156
157     __asm__ __volatile__(
158                          "str (%0);"
159                          :
160                          : "q"(&tmp_seg)
161                          : "memory"
162                          );
163     vmx_state->host_state.tr.selector = tmp_seg.selector;
164
165     /* The GDTR *index* is bits 3-15 of the selector. */
166     struct tss_descriptor * desc = NULL;
167     desc = (struct tss_descriptor *)(gdtr_base + (8 * (tmp_seg.selector >> 3)));
168
169     tmp_seg.base = ((desc->base1) |
170                     (desc->base2 << 16) |
171                     (desc->base3 << 24) |
172 #ifdef __V3_64BIT__
173                     ((uint64_t)desc->base4 << 32)
174 #else 
175                     (0)
176 #endif
177                     );
178
179     vmx_state->host_state.tr.base = tmp_seg.base;
180
181
182     /********** Setup VMX Control Fields ***********/
183
184     /* Add external interrupts, NMI exiting, and virtual NMI */
185     vmx_state->pin_ctrls.nmi_exit = 1;
186     vmx_state->pin_ctrls.ext_int_exit = 1;
187
188
189     vmx_state->pri_proc_ctrls.hlt_exit = 1;
190
191
192     vmx_state->pri_proc_ctrls.pause_exit = 0;
193     vmx_state->pri_proc_ctrls.tsc_offset = 1;
194 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
195     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
196 #endif
197
198     /* Setup IO map */
199     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
200     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
201     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
202             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
203
204
205     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
206     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
207
208
209
210     
211
212
213
214 #ifdef __V3_64BIT__
215     // Ensure host runs in 64-bit mode at each VM EXIT
216     vmx_state->exit_ctrls.host_64_on = 1;
217 #endif
218
219     // Hook all accesses to EFER register
220     v3_hook_msr(core->vm_info, EFER_MSR, 
221                 &v3_handle_efer_read,
222                 &v3_handle_efer_write, 
223                 core);
224
225     // Restore host's EFER register on each VM EXIT
226     vmx_state->exit_ctrls.ld_efer = 1;
227
228     // Save/restore guest's EFER register to/from VMCS on VM EXIT/ENTRY
229     vmx_state->exit_ctrls.save_efer = 1;
230     vmx_state->entry_ctrls.ld_efer  = 1;
231
232     // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written
233     vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE);
234
235
236     /* Setup paging */
237     if (core->shdw_pg_mode == SHADOW_PAGING) {
238         PrintDebug("Creating initial shadow page table\n");
239
240         if (v3_init_passthrough_pts(core) == -1) {
241             PrintError("Could not initialize passthrough page tables\n");
242             return -1;
243         }
244         
245 #define CR0_PE 0x00000001
246 #define CR0_PG 0x80000000
247 #define CR0_WP 0x00010000 // To ensure mem hooks work
248         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
249
250         core->ctrl_regs.cr3 = core->direct_map_pt;
251
252         // vmx_state->pinbased_ctrls |= NMI_EXIT;
253
254         /* Add CR exits */
255         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
256         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
257         
258         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
259         
260         /* Add page fault exits */
261         vmx_state->excp_bmap.pf = 1;
262
263         // Setup VMX Assist
264         v3_vmxassist_init(core, vmx_state);
265
266     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
267                (v3_cpu_types[core->pcpu_id] == V3_VMX_EPT_CPU)) {
268
269 #define CR0_PE 0x00000001
270 #define CR0_PG 0x80000000
271 #define CR0_WP 0x00010000 // To ensure mem hooks work
272         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
273
274         // vmx_state->pinbased_ctrls |= NMI_EXIT;
275
276         /* Disable CR exits */
277         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
278         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
279
280         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
281
282         /* Add page fault exits */
283         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
284         
285         // Setup VMX Assist
286         v3_vmxassist_init(core, vmx_state);
287
288         /* Enable EPT */
289         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
290         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
291
292
293
294         if (v3_init_ept(core, &hw_info) == -1) {
295             PrintError("Error initializing EPT\n");
296             return -1;
297         }
298
299     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
300                (v3_cpu_types[core->pcpu_id] == V3_VMX_EPT_UG_CPU)) {
301         int i = 0;
302         // For now we will assume that unrestricted guest mode is assured w/ EPT
303
304
305         core->vm_regs.rsp = 0x00;
306         core->rip = 0xfff0;
307         core->vm_regs.rdx = 0x00000f00;
308         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
309         core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
310
311
312         core->segments.cs.selector = 0xf000;
313         core->segments.cs.limit = 0xffff;
314         core->segments.cs.base = 0x0000000f0000LL;
315
316         // (raw attributes = 0xf3)
317         core->segments.cs.type = 0xb;
318         core->segments.cs.system = 0x1;
319         core->segments.cs.dpl = 0x0;
320         core->segments.cs.present = 1;
321
322
323
324         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
325                                           &(core->segments.es), &(core->segments.fs), 
326                                           &(core->segments.gs), NULL};
327
328         for ( i = 0; segregs[i] != NULL; i++) {
329             struct v3_segment * seg = segregs[i];
330         
331             seg->selector = 0x0000;
332             //    seg->base = seg->selector << 4;
333             seg->base = 0x00000000;
334             seg->limit = 0xffff;
335
336
337             seg->type = 0x3;
338             seg->system = 0x1;
339             seg->dpl = 0x0;
340             seg->present = 1;
341             //    seg->granularity = 1;
342
343         }
344
345
346         core->segments.gdtr.limit = 0x0000ffff;
347         core->segments.gdtr.base = 0x0000000000000000LL;
348
349         core->segments.idtr.limit = 0x0000ffff;
350         core->segments.idtr.base = 0x0000000000000000LL;
351
352         core->segments.ldtr.selector = 0x0000;
353         core->segments.ldtr.limit = 0x0000ffff;
354         core->segments.ldtr.base = 0x0000000000000000LL;
355         core->segments.ldtr.type = 2;
356         core->segments.ldtr.present = 1;
357
358         core->segments.tr.selector = 0x0000;
359         core->segments.tr.limit = 0x0000ffff;
360         core->segments.tr.base = 0x0000000000000000LL;
361         core->segments.tr.type = 0xb;
362         core->segments.tr.present = 1;
363
364         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
365         core->dbg_regs.dr7 = 0x0000000000000400LL;
366
367         /* Enable EPT */
368         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
369         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
370         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
371
372
373         /* Disable shadow paging stuff */
374         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
375         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
376
377         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
378
379
380         if (v3_init_ept(core, &hw_info) == -1) {
381             PrintError("Error initializing EPT\n");
382             return -1;
383         }
384
385     } else {
386         PrintError("Invalid Virtual paging mode\n");
387         return -1;
388     }
389
390
391     // hook vmx msrs
392
393     // Setup SYSCALL/SYSENTER MSRs in load/store area
394     
395     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
396     {
397 #define IA32_STAR 0xc0000081
398 #define IA32_LSTAR 0xc0000082
399 #define IA32_FMASK 0xc0000084
400 #define IA32_KERN_GS_BASE 0xc0000102
401
402 #define IA32_CSTAR 0xc0000083 // Compatibility mode STAR (ignored for now... hopefully its not that important...)
403
404         int msr_ret = 0;
405
406         struct vmcs_msr_entry * exit_store_msrs = NULL;
407         struct vmcs_msr_entry * exit_load_msrs = NULL;
408         struct vmcs_msr_entry * entry_load_msrs = NULL;;
409         int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
410
411         V3_Print("Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
412
413         if (max_msrs < 4) {
414             PrintError("Max MSR cache size is too small (%d)\n", max_msrs);
415             return -1;
416         }
417
418         vmx_state->msr_area = V3_VAddr(V3_AllocPages(1));
419
420         if (vmx_state->msr_area == NULL) {
421             PrintError("could not allocate msr load/store area\n");
422             return -1;
423         }
424
425         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
426         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
427         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
428         
429         
430         exit_store_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area);
431         exit_load_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area + (sizeof(struct vmcs_msr_entry) * 4));
432         entry_load_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area + (sizeof(struct vmcs_msr_entry) * 8));
433
434
435         exit_store_msrs[0].index = IA32_STAR;
436         exit_store_msrs[1].index = IA32_LSTAR;
437         exit_store_msrs[2].index = IA32_FMASK;
438         exit_store_msrs[3].index = IA32_KERN_GS_BASE;
439         
440         memcpy(exit_store_msrs, exit_load_msrs, sizeof(struct vmcs_msr_entry) * 4);
441         memcpy(exit_store_msrs, entry_load_msrs, sizeof(struct vmcs_msr_entry) * 4);
442
443         
444         v3_get_msr(IA32_STAR, &(exit_load_msrs[0].hi), &(exit_load_msrs[0].lo));
445         v3_get_msr(IA32_LSTAR, &(exit_load_msrs[1].hi), &(exit_load_msrs[1].lo));
446         v3_get_msr(IA32_FMASK, &(exit_load_msrs[2].hi), &(exit_load_msrs[2].lo));
447         v3_get_msr(IA32_KERN_GS_BASE, &(exit_load_msrs[3].hi), &(exit_load_msrs[3].lo));
448
449         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(exit_store_msrs));
450         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(exit_load_msrs));
451         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(entry_load_msrs));
452
453     }    
454
455     /* Sanity check ctrl/reg fields against hw_defaults */
456
457
458
459
460     /*** Write all the info to the VMCS ***/
461   
462     /*
463     {
464         // IS THIS NECESSARY???
465 #define DEBUGCTL_MSR 0x1d9
466         struct v3_msr tmp_msr;
467         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
468         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
469         core->dbg_regs.dr7 = 0x400;
470     }
471     */
472
473 #ifdef __V3_64BIT__
474     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
475 #else
476     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
477     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
478 #endif
479
480
481  
482
483     if (v3_update_vmcs_ctrl_fields(core)) {
484         PrintError("Could not write control fields!\n");
485         return -1;
486     }
487     
488     if (v3_update_vmcs_host_state(core)) {
489         PrintError("Could not write host state\n");
490         return -1;
491     }
492
493     // reenable global interrupts for vm state initialization now
494     // that the vm state is initialized. If another VM kicks us off, 
495     // it'll update our vmx state so that we know to reload ourself
496     v3_enable_ints();
497
498     return 0;
499 }
500
501 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
502     struct vmx_data * vmx_state = NULL;
503     int vmx_ret = 0;
504     
505     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
506     memset(vmx_state, 0, sizeof(struct vmx_data));
507
508     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
509
510     PrintDebug("Allocating VMCS\n");
511     vmx_state->vmcs_ptr_phys = allocate_vmcs();
512
513     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
514
515     core->vmm_data = vmx_state;
516     vmx_state->state = VMX_UNLAUNCHED;
517
518     PrintDebug("Initializing VMCS (addr=%p)\n", core->vmm_data);
519     
520     // TODO: Fix vmcs fields so they're 32-bit
521
522     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
523     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
524
525     if (vmx_ret != VMX_SUCCESS) {
526         PrintError("VMCLEAR failed\n");
527         return -1; 
528     }
529
530     if (vm_class == V3_PC_VM) {
531         PrintDebug("Initializing VMCS\n");
532         if (init_vmcs_bios(core, vmx_state) == -1) {
533             PrintError("Error initializing VMCS to BIOS state\n");
534             return -1;
535         }
536     } else {
537         PrintError("Invalid VM Class\n");
538         return -1;
539     }
540
541     PrintDebug("Serializing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
542     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
543
544     return 0;
545 }
546
547
548 int v3_deinit_vmx_vmcs(struct guest_info * core) {
549     struct vmx_data * vmx_state = core->vmm_data;
550
551     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
552     V3_FreePages(V3_PAddr(vmx_state->msr_area), 1);
553
554     V3_Free(vmx_state);
555
556     return 0;
557 }
558
559
560 static int update_irq_exit_state(struct guest_info * info) {
561     struct vmx_exit_idt_vec_info idt_vec_info;
562
563     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
564
565     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
566 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
567         V3_Print("Calling v3_injecting_intr\n");
568 #endif
569         info->intr_core_state.irq_started = 0;
570         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
571     }
572
573     return 0;
574 }
575
576 static int update_irq_entry_state(struct guest_info * info) {
577     struct vmx_exit_idt_vec_info idt_vec_info;
578     struct vmcs_interrupt_state intr_core_state;
579     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
580
581     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
582     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
583
584     /* Check for pending exceptions to inject */
585     if (v3_excp_pending(info)) {
586         struct vmx_entry_int_info int_info;
587         int_info.value = 0;
588
589         // In VMX, almost every exception is hardware
590         // Software exceptions are pretty much only for breakpoint or overflow
591         int_info.type = 3;
592         int_info.vector = v3_get_excp_number(info);
593
594         if (info->excp_state.excp_error_code_valid) {
595             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
596             int_info.error_code = 1;
597
598 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
599             V3_Print("Injecting exception %d with error code %x\n", 
600                     int_info.vector, info->excp_state.excp_error_code);
601 #endif
602         }
603
604         int_info.valid = 1;
605 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
606         V3_Print("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
607 #endif
608         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
609
610         v3_injecting_excp(info, int_info.vector);
611
612     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
613                (intr_core_state.val == 0)) {
614        
615         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
616
617 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
618             V3_Print("IRQ pending from previous injection\n");
619 #endif
620
621             // Copy the IDT vectoring info over to reinject the old interrupt
622             if (idt_vec_info.error_code == 1) {
623                 uint32_t err_code = 0;
624
625                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
626                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
627             }
628
629             idt_vec_info.undef = 0;
630             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
631
632         } else {
633             struct vmx_entry_int_info ent_int;
634             ent_int.value = 0;
635
636             switch (v3_intr_pending(info)) {
637                 case V3_EXTERNAL_IRQ: {
638                     info->intr_core_state.irq_vector = v3_get_intr(info); 
639                     ent_int.vector = info->intr_core_state.irq_vector;
640                     ent_int.type = 0;
641                     ent_int.error_code = 0;
642                     ent_int.valid = 1;
643
644 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
645                     V3_Print("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
646                                info->intr_core_state.irq_vector, 
647                                (uint32_t)info->num_exits, 
648                                (void *)(addr_t)info->rip);
649 #endif
650
651                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
652                     info->intr_core_state.irq_started = 1;
653
654                     break;
655                 }
656                 case V3_NMI:
657                     PrintDebug("Injecting NMI\n");
658
659                     ent_int.type = 2;
660                     ent_int.vector = 2;
661                     ent_int.valid = 1;
662                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
663
664                     break;
665                 case V3_SOFTWARE_INTR:
666                     PrintDebug("Injecting software interrupt\n");
667                     ent_int.type = 4;
668
669                     ent_int.valid = 1;
670                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
671
672                     break;
673                 case V3_VIRTUAL_IRQ:
674                     // Not sure what to do here, Intel doesn't have virtual IRQs
675                     // May be the same as external interrupts/IRQs
676
677                     break;
678                 case V3_INVALID_INTR:
679                 default:
680                     break;
681             }
682         }
683     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
684         // Enable INTR window exiting so we know when IF=1
685         uint32_t instr_len;
686
687         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
688
689 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
690         V3_Print("Enabling Interrupt-Window exiting: %d\n", instr_len);
691 #endif
692
693         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
694         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
695     }
696
697
698     return 0;
699 }
700
701
702
703 static struct vmx_exit_info exit_log[10];
704
705 static void print_exit_log(struct guest_info * info) {
706     int cnt = info->num_exits % 10;
707     int i = 0;
708     
709
710     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
711
712     for (i = 0; i < 10; i++) {
713         struct vmx_exit_info * tmp = &exit_log[cnt];
714
715         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
716         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
717         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
718         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
719         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
720
721         cnt--;
722
723         if (cnt == -1) {
724             cnt = 9;
725         }
726
727     }
728
729 }
730
731 /* 
732  * CAUTION and DANGER!!! 
733  * 
734  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
735  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
736  * on its contents will cause things to break. The contents at the time of the exit WILL 
737  * change before the exit handler is executed.
738  */
739 int v3_vmx_enter(struct guest_info * info) {
740     int ret = 0;
741     uint32_t tsc_offset_low, tsc_offset_high;
742     struct vmx_exit_info exit_info;
743     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
744
745     // Conditionally yield the CPU if the timeslice has expired
746     v3_yield_cond(info);
747
748     // Perform any additional yielding needed for time adjustment
749     v3_adjust_time(info);
750
751     // disable global interrupts for vm state transition
752     v3_disable_ints();
753
754     // Update timer devices late after being in the VM so that as much 
755     // of hte time in the VM is accounted for as possible. Also do it before
756     // updating IRQ entry state so that any interrupts the timers raise get 
757     // handled on the next VM entry. Must be done with interrupts disabled.
758     v3_update_timers(info);
759
760     if (vmcs_store() != vmx_info->vmcs_ptr_phys) {
761         vmcs_load(vmx_info->vmcs_ptr_phys);
762     }
763
764     v3_vmx_restore_vmcs(info);
765
766
767 #ifdef V3_CONFIG_SYMCALL
768     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
769         update_irq_entry_state(info);
770     }
771 #else 
772     update_irq_entry_state(info);
773 #endif
774
775     {
776         addr_t guest_cr3;
777         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
778         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
779     }
780
781     // Perform last-minute time bookkeeping prior to entering the VM
782     v3_time_enter_vm(info);
783
784     tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
785     tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
786     check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
787     check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
788
789
790     /* determine if we need to move to a different physical core */
791     if(info->core_move_state == CORE_MOVE_PENDING) {
792         vmcs_clear(vmx_info->vmcs_ptr_phys);
793         
794         v3_enable_ints();
795
796         if(V3_MOVE_THREAD_TO_CPU(info->target_pcpu_id, info->core_thread) != 0){
797             PrintError("Failed to move vcore %d to CPU %d\n", info->vcpu_id, info->target_pcpu_id);
798         } else {
799             info->pcpu_id = info->target_pcpu_id;
800             PrintDebug("Core move done, vcore %d is running on CPU %d now\n", info->vcpu_id, V3_Get_CPU());
801         }
802
803         /* disable global interrupts, 
804          *  NOTE now it is being running on a different CPU 
805          */
806         v3_disable_ints();
807
808         vmcs_load(vmx_info->vmcs_ptr_phys);
809         vmx_info->state = VMX_UNLAUNCHED;
810         info->core_move_state= CORE_MOVE_DONE;
811     }
812         
813
814     if (v3_update_vmcs_host_state(info)) {
815         v3_enable_ints();
816         PrintError("Could not write host state\n");
817         return -1;
818     }
819
820
821     if (vmx_info->state == VMX_UNLAUNCHED) {
822         vmx_info->state = VMX_LAUNCHED;
823         info->vm_info->run_state = VM_RUNNING;
824         ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
825     } else {
826         V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
827         ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
828     }
829     
830     //  PrintDebug("VMX Exit: ret=%d\n", ret);
831
832     if (ret != VMX_SUCCESS) {
833         uint32_t error = 0;
834
835         vmcs_read(VMCS_INSTR_ERR, &error);
836
837         v3_enable_ints();
838
839         PrintError("VMENTRY Error: %d\n", error);
840         return -1;
841     }
842
843     // Immediate exit from VM time bookkeeping
844     v3_time_exit_vm(info);
845
846     info->num_exits++;
847
848     /* Update guest state */
849     v3_vmx_save_vmcs(info);
850
851     // info->cpl = info->segments.cs.selector & 0x3;
852
853     info->mem_mode = v3_get_vm_mem_mode(info);
854     info->cpu_mode = v3_get_vm_cpu_mode(info);
855
856
857     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
858     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
859     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
860     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
861     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
862     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
863     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
864
865     if (info->shdw_pg_mode == NESTED_PAGING) {
866         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
867     }
868
869     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
870
871     exit_log[info->num_exits % 10] = exit_info;
872
873 #ifdef V3_CONFIG_SYMCALL
874     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
875         update_irq_exit_state(info);
876     }
877 #else
878     update_irq_exit_state(info);
879 #endif
880
881     if (exit_info.exit_reason == VMEXIT_INTR_WINDOW) {
882         // This is a special case whose only job is to inject an interrupt
883         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
884         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
885         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
886
887 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
888        V3_Print("Interrupts available again! (RIP=%llx)\n", info->rip);
889 #endif
890     }
891
892     // reenable global interrupts after vm exit
893     v3_enable_ints();
894
895     // Conditionally yield the CPU if the timeslice has expired
896     v3_yield_cond(info);
897
898     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
899         PrintError("Error in VMX exit handler\n");
900         return -1;
901     }
902
903     return 0;
904 }
905
906
907 int v3_start_vmx_guest(struct guest_info * info) {
908
909     PrintDebug("Starting VMX core %u\n", info->vcpu_id);
910
911     if (info->vcpu_id == 0) {
912         info->core_run_state = CORE_RUNNING;
913         info->vm_info->run_state = VM_RUNNING;
914     } else {
915
916         PrintDebug("VMX core %u: Waiting for core initialization\n", info->vcpu_id);
917
918         while (info->core_run_state == CORE_STOPPED) {
919             v3_yield(info);
920             //PrintDebug("VMX core %u: still waiting for INIT\n",info->vcpu_id);
921         }
922         
923         PrintDebug("VMX core %u initialized\n", info->vcpu_id);
924     }
925
926
927     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
928                info->vcpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
929                info->segments.cs.limit, (void *)(info->rip));
930
931
932     PrintDebug("VMX core %u: Launching VMX VM on logical core %u\n", info->vcpu_id, info->pcpu_id);
933
934     v3_start_time(info);
935
936     while (1) {
937
938         if (info->vm_info->run_state == VM_STOPPED) {
939             info->core_run_state = CORE_STOPPED;
940             break;
941         }
942
943         if (v3_vmx_enter(info) == -1) {
944             v3_print_vmcs();
945             print_exit_log(info);
946             return -1;
947         }
948
949
950
951         if (info->vm_info->run_state == VM_STOPPED) {
952             info->core_run_state = CORE_STOPPED;
953             break;
954         }
955 /*
956         if ((info->num_exits % 5000) == 0) {
957             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
958         }
959 */
960
961     }
962
963     return 0;
964 }
965
966
967
968
969 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
970 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
971 #define CPUID_1_ECX_VTXFLAG 0x00000020
972
973 int v3_is_vmx_capable() {
974     v3_msr_t feature_msr;
975     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
976
977     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
978
979     PrintDebug("ECX: 0x%x\n", ecx);
980
981     if (ecx & CPUID_1_ECX_VTXFLAG) {
982         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
983         
984         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
985
986         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
987             PrintDebug("VMX is locked -- enable in the BIOS\n");
988             return 0;
989         }
990
991     } else {
992         PrintDebug("VMX not supported on this cpu\n");
993         return 0;
994     }
995
996     return 1;
997 }
998
999
1000 int v3_reset_vmx_vm_core(struct guest_info * core, addr_t rip) {
1001     // init vmcs bios
1002     
1003     if ((core->shdw_pg_mode == NESTED_PAGING) && 
1004         (v3_cpu_types[core->pcpu_id] == V3_VMX_EPT_UG_CPU)) {
1005         // easy 
1006         core->rip = 0;
1007         core->segments.cs.selector = rip << 8;
1008         core->segments.cs.limit = 0xffff;
1009         core->segments.cs.base = rip << 12;
1010     } else {
1011         core->vm_regs.rdx = core->vcpu_id;
1012         core->vm_regs.rbx = rip;
1013     }
1014
1015     return 0;
1016 }
1017
1018
1019
1020 void v3_init_vmx_cpu(int cpu_id) {
1021     addr_t vmx_on_region = 0;
1022
1023     if (cpu_id == 0) {
1024         if (v3_init_vmx_hw(&hw_info) == -1) {
1025             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
1026             return;
1027         }
1028     }
1029
1030     enable_vmx();
1031
1032
1033     // Setup VMXON Region
1034     vmx_on_region = allocate_vmcs();
1035
1036
1037     if (vmx_on(vmx_on_region) == VMX_SUCCESS) {
1038         V3_Print("VMX Enabled\n");
1039         host_vmcs_ptrs[cpu_id] = vmx_on_region;
1040     } else {
1041         V3_Print("VMX already enabled\n");
1042         V3_FreePages((void *)vmx_on_region, 1);
1043     }
1044
1045     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);    
1046
1047     {
1048         struct vmx_sec_proc_ctrls sec_proc_ctrls;
1049         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
1050         
1051         if (sec_proc_ctrls.enable_ept == 0) {
1052             V3_Print("VMX EPT (Nested) Paging not supported\n");
1053             v3_cpu_types[cpu_id] = V3_VMX_CPU;
1054         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
1055             V3_Print("VMX EPT (Nested) Paging supported\n");
1056             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
1057         } else {
1058             V3_Print("VMX EPT (Nested) Paging + Unrestricted guest supported\n");
1059             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
1060         }
1061     }
1062 }
1063
1064
1065 void v3_deinit_vmx_cpu(int cpu_id) {
1066     extern v3_cpu_arch_t v3_cpu_types[];
1067     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1068
1069     if (host_vmcs_ptrs[cpu_id] != 0) {
1070         V3_Print("Disabling VMX\n");
1071
1072         if (vmx_off() != VMX_SUCCESS) {
1073             PrintError("Error executing VMXOFF\n");
1074         }
1075
1076         V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
1077
1078         host_vmcs_ptrs[cpu_id] = 0;
1079     }
1080 }