Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


fix interrupt masking bug
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Peter Dinda <pdinda@northwestern.edu> 
11  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
13  * All rights reserved.
14  *
15  * Author: Peter Dinda <pdinda@northwestern.edu>
16  *         Jack Lange <jarusl@cs.northwestern.edu>
17  *
18  * This is free software.  You are permitted to use,
19  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20  */
21
22
23 #include <palacios/vmx.h>
24 #include <palacios/vmm.h>
25 #include <palacios/vmx_handler.h>
26 #include <palacios/vmcs.h>
27 #include <palacios/vmx_lowlevel.h>
28 #include <palacios/vmm_lowlevel.h>
29 #include <palacios/vmm_ctrl_regs.h>
30 #include <palacios/vmm_config.h>
31 #include <palacios/vmm_time.h>
32 #include <palacios/vm_guest_mem.h>
33 #include <palacios/vmm_direct_paging.h>
34 #include <palacios/vmx_io.h>
35 #include <palacios/vmx_msr.h>
36
37
38 #ifndef CONFIG_DEBUG_VMX
39 #undef PrintDebug
40 #define PrintDebug(fmt, args...)
41 #endif
42
43
44 static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
45 static addr_t active_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
46
47 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
48 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
49
50 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
51     int ret = 0;
52
53     ret = vmcs_write(field, val);
54
55     if (ret != VMX_SUCCESS) {
56         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
57         return 1;
58     }
59
60     return 0;
61 }
62
63 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
64     int ret = 0;
65
66     ret = vmcs_read(field, val);
67
68     if (ret != VMX_SUCCESS) {
69         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
70     }
71
72     return ret;
73 }
74
75
76
77
78 static addr_t allocate_vmcs() {
79     reg_ex_t msr;
80     struct vmcs_data * vmcs_page = NULL;
81
82     PrintDebug("Allocating page\n");
83
84     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
85     memset(vmcs_page, 0, 4096);
86
87     v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
88     
89     vmcs_page->revision = ((struct vmx_basic_msr*)&msr)->revision;
90     PrintDebug("VMX Revision: 0x%x\n",vmcs_page->revision);
91
92     return (addr_t)V3_PAddr((void *)vmcs_page);
93 }
94
95
96
97
98 static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state) {
99     int vmx_ret = 0;
100     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
101
102     // disable global interrupts for vm state initialization
103     v3_disable_ints();
104
105     PrintDebug("Loading VMCS\n");
106     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
107     active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
108     vmx_state->state = VMX_UNLAUNCHED;
109
110     if (vmx_ret != VMX_SUCCESS) {
111         PrintError("VMPTRLD failed\n");
112         return -1;
113     }
114
115
116
117     /******* Setup Host State **********/
118
119     /* Cache GDTR, IDTR, and TR in host struct */
120     addr_t gdtr_base;
121     struct {
122         uint16_t selector;
123         addr_t   base;
124     } __attribute__((packed)) tmp_seg;
125     
126
127     __asm__ __volatile__(
128                          "sgdt (%0);"
129                          :
130                          : "q"(&tmp_seg)
131                          : "memory"
132                          );
133     gdtr_base = tmp_seg.base;
134     vmx_state->host_state.gdtr.base = gdtr_base;
135
136     __asm__ __volatile__(
137                          "sidt (%0);"
138                          :
139                          : "q"(&tmp_seg)
140                          : "memory"
141                          );
142     vmx_state->host_state.idtr.base = tmp_seg.base;
143
144     __asm__ __volatile__(
145                          "str (%0);"
146                          :
147                          : "q"(&tmp_seg)
148                          : "memory"
149                          );
150     vmx_state->host_state.tr.selector = tmp_seg.selector;
151
152     /* The GDTR *index* is bits 3-15 of the selector. */
153     struct tss_descriptor * desc = NULL;
154     desc = (struct tss_descriptor *)(gdtr_base + (8 * (tmp_seg.selector >> 3)));
155
156     tmp_seg.base = ((desc->base1) |
157                     (desc->base2 << 16) |
158                     (desc->base3 << 24) |
159 #ifdef __V3_64BIT__
160                     ((uint64_t)desc->base4 << 32)
161 #else 
162                     (0)
163 #endif
164                     );
165
166     vmx_state->host_state.tr.base = tmp_seg.base;
167
168   
169
170     /********** Setup and VMX Control Fields from MSR ***********/
171     /* Setup IO map */
172
173
174     struct v3_msr tmp_msr;
175
176     v3_get_msr(VMX_PINBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
177
178     /* Add external interrupts, NMI exiting, and virtual NMI */
179     vmx_state->pin_ctrls.value =  tmp_msr.lo;
180     vmx_state->pin_ctrls.nmi_exit = 1;
181     vmx_state->pin_ctrls.ext_int_exit = 1;
182
183     v3_get_msr(VMX_PROCBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
184
185     vmx_state->pri_proc_ctrls.value = tmp_msr.lo;
186     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
187     vmx_state->pri_proc_ctrls.hlt_exit = 1;
188     vmx_state->pri_proc_ctrls.invlpg_exit = 1;
189     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
190     vmx_state->pri_proc_ctrls.pause_exit = 1;
191     vmx_state->pri_proc_ctrls.tsc_offset = 1;
192 #ifdef CONFIG_TIME_VIRTUALIZE_TSC
193     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
194 #endif
195
196     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->vm_info->io_map.arch_data));
197     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
198             (addr_t)V3_PAddr(info->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
199
200
201     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(info->vm_info->msr_map.arch_data));
202
203     v3_get_msr(VMX_EXIT_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
204     vmx_state->exit_ctrls.value = tmp_msr.lo;
205     vmx_state->exit_ctrls.host_64_on = 1;
206
207     if ((vmx_state->exit_ctrls.save_efer == 1) || (vmx_state->exit_ctrls.ld_efer == 1)) {
208         vmx_state->ia32e_avail = 1;
209     }
210
211     v3_get_msr(VMX_ENTRY_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
212     vmx_state->entry_ctrls.value = tmp_msr.lo;
213
214     {
215         struct vmx_exception_bitmap excp_bmap;
216         excp_bmap.value = 0;
217         
218         excp_bmap.pf = 1;
219     
220         vmx_ret |= check_vmcs_write(VMCS_EXCP_BITMAP, excp_bmap.value);
221     }
222     /******* Setup VMXAssist guest state ***********/
223
224     info->rip = 0xd0000;
225     info->vm_regs.rsp = 0x80000;
226
227     struct rflags * flags = (struct rflags *)&(info->ctrl_regs.rflags);
228     flags->rsvd1 = 1;
229
230     /* Print Control MSRs */
231     v3_get_msr(VMX_CR0_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
232     PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
233
234     v3_get_msr(VMX_CR4_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
235     PrintDebug("CR4 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
236
237
238 #define GUEST_CR0 0x80000031
239 #define GUEST_CR4 0x00002000
240     info->ctrl_regs.cr0 = GUEST_CR0;
241     info->ctrl_regs.cr4 = GUEST_CR4;
242
243     ((struct cr0_32 *)&(info->shdw_pg_state.guest_cr0))->pe = 1;
244    
245     /* Setup paging */
246     if (info->shdw_pg_mode == SHADOW_PAGING) {
247         PrintDebug("Creating initial shadow page table\n");
248
249         if (v3_init_passthrough_pts(info) == -1) {
250             PrintError("Could not initialize passthrough page tables\n");
251             return -1;
252         }
253         
254 #define CR0_PE 0x00000001
255 #define CR0_PG 0x80000000
256
257
258         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG) );
259         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
260
261         info->ctrl_regs.cr3 = info->direct_map_pt;
262
263         // vmx_state->pinbased_ctrls |= NMI_EXIT;
264
265         /* Add CR exits */
266         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
267         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
268     }
269
270     // Setup segment registers
271     {
272         struct v3_segment * seg_reg = (struct v3_segment *)&(info->segments);
273
274         int i;
275
276         for (i = 0; i < 10; i++) {
277             seg_reg[i].selector = 3 << 3;
278             seg_reg[i].limit = 0xffff;
279             seg_reg[i].base = 0x0;
280         }
281
282         info->segments.cs.selector = 2<<3;
283
284         /* Set only the segment registers */
285         for (i = 0; i < 6; i++) {
286             seg_reg[i].limit = 0xfffff;
287             seg_reg[i].granularity = 1;
288             seg_reg[i].type = 3;
289             seg_reg[i].system = 1;
290             seg_reg[i].dpl = 0;
291             seg_reg[i].present = 1;
292             seg_reg[i].db = 1;
293         }
294
295         info->segments.cs.type = 0xb;
296
297         info->segments.ldtr.selector = 0x20;
298         info->segments.ldtr.type = 2;
299         info->segments.ldtr.system = 0;
300         info->segments.ldtr.present = 1;
301         info->segments.ldtr.granularity = 0;
302
303     
304         /************* Map in GDT and vmxassist *************/
305
306         uint64_t  gdt[] __attribute__ ((aligned(32))) = {
307             0x0000000000000000ULL,              /* 0x00: reserved */
308             0x0000830000000000ULL,              /* 0x08: 32-bit TSS */
309             //0x0000890000000000ULL,            /* 0x08: 32-bit TSS */
310             0x00CF9b000000FFFFULL,              /* 0x10: CS 32-bit */
311             0x00CF93000000FFFFULL,              /* 0x18: DS 32-bit */
312             0x000082000000FFFFULL,              /* 0x20: LDTR 32-bit */
313         };
314
315 #define VMXASSIST_GDT   0x10000
316         addr_t vmxassist_gdt = 0;
317
318         if (v3_gpa_to_hva(info, VMXASSIST_GDT, &vmxassist_gdt) == -1) {
319             PrintError("Could not find VMXASSIST GDT destination\n");
320             return -1;
321         }
322
323         memcpy((void *)vmxassist_gdt, gdt, sizeof(uint64_t) * 5);
324         
325         info->segments.gdtr.base = VMXASSIST_GDT;
326
327 #define VMXASSIST_TSS   0x40000
328         uint64_t vmxassist_tss = VMXASSIST_TSS;
329         gdt[0x08 / sizeof(gdt[0])] |=
330             ((vmxassist_tss & 0xFF000000) << (56 - 24)) |
331             ((vmxassist_tss & 0x00FF0000) << (32 - 16)) |
332             ((vmxassist_tss & 0x0000FFFF) << (16)) |
333             (8392 - 1);
334
335         info->segments.tr.selector = 0x08;
336         info->segments.tr.base = vmxassist_tss;
337
338         //info->segments.tr.type = 0x9; 
339         info->segments.tr.type = 0x3;
340         info->segments.tr.system = 0;
341         info->segments.tr.present = 1;
342         info->segments.tr.granularity = 0;
343     }
344  
345     // setup VMXASSIST
346     { 
347 #define VMXASSIST_START 0x000d0000
348         extern uint8_t v3_vmxassist_start[];
349         extern uint8_t v3_vmxassist_end[];
350         addr_t vmxassist_dst = 0;
351
352         if (v3_gpa_to_hva(info, VMXASSIST_START, &vmxassist_dst) == -1) {
353             PrintError("Could not find VMXASSIST destination\n");
354             return -1;
355         }
356
357         memcpy((void *)vmxassist_dst, v3_vmxassist_start, v3_vmxassist_end - v3_vmxassist_start);
358     }    
359
360     /*** Write all the info to the VMCS ***/
361
362 #define DEBUGCTL_MSR 0x1d9
363     v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
364     vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
365
366     info->dbg_regs.dr7 = 0x400;
367
368 #ifdef __V3_64BIT__
369     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
370 #else
371     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
372     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
373 #endif
374
375     if (v3_update_vmcs_ctrl_fields(info)) {
376         PrintError("Could not write control fields!\n");
377         return -1;
378     }
379     
380     if (v3_update_vmcs_host_state(info)) {
381         PrintError("Could not write host state\n");
382         return -1;
383     }
384
385
386     vmx_state->assist_state = VMXASSIST_DISABLED;
387
388     // reenable global interrupts for vm state initialization now
389     // that the vm state is initialized. If another VM kicks us off, 
390     // it'll update our vmx state so that we know to reload ourself
391     v3_enable_ints();
392
393     return 0;
394 }
395
396 int v3_init_vmx_vmcs(struct guest_info * info, v3_vm_class_t vm_class) {
397     struct vmx_data * vmx_state = NULL;
398     int vmx_ret = 0;
399     
400     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
401
402     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
403
404     PrintDebug("Allocating VMCS\n");
405     vmx_state->vmcs_ptr_phys = allocate_vmcs();
406
407     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
408
409     info->vmm_data = vmx_state;
410     vmx_state->state = VMX_UNLAUNCHED;
411
412     PrintDebug("Initializing VMCS (addr=%p)\n", info->vmm_data);
413     
414     // TODO: Fix vmcs fields so they're 32-bit
415
416     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
417     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
418
419     if (vmx_ret != VMX_SUCCESS) {
420         PrintError("VMCLEAR failed\n");
421         return -1; 
422     }
423
424     if (vm_class == V3_PC_VM) {
425         PrintDebug("Initializing VMCS\n");
426         init_vmcs_bios(info, vmx_state);
427     } else {
428         PrintError("Invalid VM Class\n");
429         return -1;
430     }
431
432     return 0;
433 }
434
435
436 int v3_deinit_vmx_vmcs(struct guest_info * core) {
437     struct vmx_data * vmx_state = core->vmm_data;
438
439     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
440
441     V3_Free(vmx_state);
442
443     return 0;
444 }
445
446
447 static int update_irq_exit_state(struct guest_info * info) {
448     struct vmx_exit_idt_vec_info idt_vec_info;
449
450     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
451
452     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
453 #ifdef CONFIG_DEBUG_INTERRUPTS
454         PrintDebug("Calling v3_injecting_intr\n");
455 #endif
456         info->intr_core_state.irq_started = 0;
457         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
458     }
459
460     return 0;
461 }
462
463 static int update_irq_entry_state(struct guest_info * info) {
464     struct vmx_exit_idt_vec_info idt_vec_info;
465     struct vmcs_interrupt_state intr_core_state;
466     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
467
468     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
469     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
470
471     /* Check for pending exceptions to inject */
472     if (v3_excp_pending(info)) {
473         struct vmx_entry_int_info int_info;
474         int_info.value = 0;
475
476         // In VMX, almost every exception is hardware
477         // Software exceptions are pretty much only for breakpoint or overflow
478         int_info.type = 3;
479         int_info.vector = v3_get_excp_number(info);
480
481         if (info->excp_state.excp_error_code_valid) {
482             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
483             int_info.error_code = 1;
484
485 #ifdef CONFIG_DEBUG_INTERRUPTS
486             PrintDebug("Injecting exception %d with error code %x\n", 
487                     int_info.vector, info->excp_state.excp_error_code);
488 #endif
489         }
490
491         int_info.valid = 1;
492 #ifdef CONFIG_DEBUG_INTERRUPTS
493         PrintDebug("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
494 #endif
495         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
496
497         v3_injecting_excp(info, int_info.vector);
498
499     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
500                (intr_core_state.val == 0)) {
501        
502         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
503
504 #ifdef CONFIG_DEBUG_INTERRUPTS
505             PrintDebug("IRQ pending from previous injection\n");
506 #endif
507
508             // Copy the IDT vectoring info over to reinject the old interrupt
509             if (idt_vec_info.error_code == 1) {
510                 uint32_t err_code = 0;
511
512                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
513                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
514             }
515
516             idt_vec_info.undef = 0;
517             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
518
519         } else {
520             struct vmx_entry_int_info ent_int;
521             ent_int.value = 0;
522
523             switch (v3_intr_pending(info)) {
524                 case V3_EXTERNAL_IRQ: {
525                     info->intr_core_state.irq_vector = v3_get_intr(info); 
526                     ent_int.vector = info->intr_core_state.irq_vector;
527                     ent_int.type = 0;
528                     ent_int.error_code = 0;
529                     ent_int.valid = 1;
530
531 #ifdef CONFIG_DEBUG_INTERRUPTS
532                     PrintDebug("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
533                                info->intr_core_state.irq_vector, 
534                                (uint32_t)info->num_exits, 
535                                (void *)(addr_t)info->rip);
536 #endif
537
538                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
539                     info->intr_core_state.irq_started = 1;
540
541                     break;
542                 }
543                 case V3_NMI:
544                     PrintDebug("Injecting NMI\n");
545
546                     ent_int.type = 2;
547                     ent_int.vector = 2;
548                     ent_int.valid = 1;
549                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
550
551                     break;
552                 case V3_SOFTWARE_INTR:
553                     PrintDebug("Injecting software interrupt\n");
554                     ent_int.type = 4;
555
556                     ent_int.valid = 1;
557                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
558
559                     break;
560                 case V3_VIRTUAL_IRQ:
561                     // Not sure what to do here, Intel doesn't have virtual IRQs
562                     // May be the same as external interrupts/IRQs
563
564                     break;
565                 case V3_INVALID_INTR:
566                 default:
567                     break;
568             }
569         }
570     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
571         // Enable INTR window exiting so we know when IF=1
572         uint32_t instr_len;
573
574         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
575
576 #ifdef CONFIG_DEBUG_INTERRUPTS
577         PrintDebug("Enabling Interrupt-Window exiting: %d\n", instr_len);
578 #endif
579
580         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
581         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
582     }
583
584
585     return 0;
586 }
587
588
589
590 static struct vmx_exit_info exit_log[10];
591
592 static void print_exit_log(struct guest_info * info) {
593     int cnt = info->num_exits % 10;
594     int i = 0;
595     
596
597     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
598
599     for (i = 0; i < 10; i++) {
600         struct vmx_exit_info * tmp = &exit_log[cnt];
601
602         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
603         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
604         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
605         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
606         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
607
608         cnt--;
609
610         if (cnt == -1) {
611             cnt = 9;
612         }
613
614     }
615
616 }
617
618 /* 
619  * CAUTION and DANGER!!! 
620  * 
621  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
622  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
623  * on its contents will cause things to break. The contents at the time of the exit WILL 
624  * change before the exit handler is executed.
625  */
626 int v3_vmx_enter(struct guest_info * info) {
627     int ret = 0;
628     uint32_t tsc_offset_low, tsc_offset_high;
629     struct vmx_exit_info exit_info;
630     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
631
632     // Conditionally yield the CPU if the timeslice has expired
633     v3_yield_cond(info);
634
635     // Perform any additional yielding needed for time adjustment
636     v3_adjust_time(info);
637
638     // Update timer devices prior to entering VM.
639     v3_update_timers(info);
640
641     // disable global interrupts for vm state transition
642     v3_disable_ints();
643
644
645     if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) {
646         vmcs_load(vmx_info->vmcs_ptr_phys);
647         active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
648     }
649
650
651     v3_vmx_restore_vmcs(info);
652
653
654 #ifdef CONFIG_SYMCALL
655     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
656         update_irq_entry_state(info);
657     }
658 #else 
659     update_irq_entry_state(info);
660 #endif
661
662     {
663         addr_t guest_cr3;
664         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
665         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
666     }
667
668     // Perform last-minute time bookkeeping prior to entering the VM
669     v3_time_enter_vm(info);
670
671     tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
672     tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
673     check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
674     check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
675
676
677     if (vmx_info->state == VMX_UNLAUNCHED) {
678         vmx_info->state = VMX_LAUNCHED;
679         info->vm_info->run_state = VM_RUNNING;
680         ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
681     } else {
682         V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
683         ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
684     }
685     
686     //  PrintDebug("VMX Exit: ret=%d\n", ret);
687
688     if (ret != VMX_SUCCESS) {
689         uint32_t error = 0;
690
691         vmcs_read(VMCS_INSTR_ERR, &error);
692         PrintError("VMENTRY Error: %d\n", error);
693
694         return -1;
695     }
696
697     // Immediate exit from VM time bookkeeping
698     v3_time_exit_vm(info);
699
700     info->num_exits++;
701
702     /* Update guest state */
703     v3_vmx_save_vmcs(info);
704
705     // info->cpl = info->segments.cs.selector & 0x3;
706
707     info->mem_mode = v3_get_vm_mem_mode(info);
708     info->cpu_mode = v3_get_vm_cpu_mode(info);
709
710
711     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
712     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
713     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
714     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
715     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
716     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
717     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
718
719     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
720
721     exit_log[info->num_exits % 10] = exit_info;
722
723
724 #ifdef CONFIG_SYMCALL
725     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
726         update_irq_exit_state(info);
727     }
728 #else
729     update_irq_exit_state(info);
730 #endif
731
732     if (exit_info.exit_reason == VMEXIT_INTR_WINDOW) {
733         // This is a special case whose only job is to inject an interrupt
734         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
735         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
736         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
737
738 #ifdef CONFIG_DEBUG_INTERRUPTS
739         PrintDebug("Interrupts available again! (RIP=%llx)\n", info->rip);
740 #endif
741     }
742
743     // reenable global interrupts after vm exit
744     v3_enable_ints();
745
746     // Conditionally yield the CPU if the timeslice has expired
747     v3_yield_cond(info);
748
749     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
750         PrintError("Error in VMX exit handler\n");
751         return -1;
752     }
753
754     return 0;
755 }
756
757
758 int v3_start_vmx_guest(struct guest_info * info) {
759
760     PrintDebug("Starting VMX core %u\n", info->cpu_id);
761
762     if (info->cpu_id == 0) {
763         info->core_run_state = CORE_RUNNING;
764         info->vm_info->run_state = VM_RUNNING;
765     } else {
766
767         PrintDebug("VMX core %u: Waiting for core initialization\n", info->cpu_id);
768
769         while (info->core_run_state == CORE_STOPPED) {
770             v3_yield(info);
771             //PrintDebug("VMX core %u: still waiting for INIT\n",info->cpu_id);
772         }
773         
774         PrintDebug("VMX core %u initialized\n", info->cpu_id);
775     }
776
777
778     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
779                info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
780                info->segments.cs.limit, (void *)(info->rip));
781
782
783     PrintDebug("VMX core %u: Launching VMX VM\n", info->cpu_id);
784
785     v3_start_time(info);
786
787     while (1) {
788
789         if (info->vm_info->run_state == VM_STOPPED) {
790             info->core_run_state = CORE_STOPPED;
791             break;
792         }
793
794         if (v3_vmx_enter(info) == -1) {
795             v3_print_vmcs();
796             print_exit_log(info);
797             return -1;
798         }
799
800
801
802         if (info->vm_info->run_state == VM_STOPPED) {
803             info->core_run_state = CORE_STOPPED;
804             break;
805         }
806 /*
807         if ((info->num_exits % 5000) == 0) {
808             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
809         }
810 */
811
812     }
813
814     return 0;
815 }
816
817
818 int v3_is_vmx_capable() {
819     v3_msr_t feature_msr;
820     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
821
822     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
823
824     PrintDebug("ECX: 0x%x\n", ecx);
825
826     if (ecx & CPUID_1_ECX_VTXFLAG) {
827         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
828         
829         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
830
831         if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
832             PrintDebug("VMX is locked -- enable in the BIOS\n");
833             return 0;
834         }
835
836     } else {
837         PrintDebug("VMX not supported on this cpu\n");
838         return 0;
839     }
840
841     return 1;
842 }
843
844 static int has_vmx_nested_paging() {
845     /* We assume that both EPT and unrestricted guest mode (Intel's Virtual Real Mode) 
846      * are mutually assured. i.e. We have either both or neither.
847      */
848
849     
850
851     return 0;
852 }
853
854
855
856 void v3_init_vmx_cpu(int cpu_id) {
857     extern v3_cpu_arch_t v3_cpu_types[];
858     struct v3_msr tmp_msr;
859     uint64_t ret = 0;
860
861     v3_get_msr(VMX_CR4_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
862
863 #ifdef __V3_64BIT__
864     __asm__ __volatile__ (
865                           "movq %%cr4, %%rbx;"
866                           "orq  $0x00002000, %%rbx;"
867                           "movq %%rbx, %0;"
868                           : "=m"(ret) 
869                           :
870                           : "%rbx"
871                           );
872
873     if ((~ret & tmp_msr.value) == 0) {
874         __asm__ __volatile__ (
875                               "movq %0, %%cr4;"
876                               :
877                               : "q"(ret)
878                               );
879     } else {
880         PrintError("Invalid CR4 Settings!\n");
881         return;
882     }
883
884     __asm__ __volatile__ (
885                           "movq %%cr0, %%rbx; "
886                           "orq  $0x00000020,%%rbx; "
887                           "movq %%rbx, %%cr0;"
888                           :
889                           :
890                           : "%rbx"
891                           );
892 #elif __V3_32BIT__
893     __asm__ __volatile__ (
894                           "movl %%cr4, %%ecx;"
895                           "orl  $0x00002000, %%ecx;"
896                           "movl %%ecx, %0;"
897                           : "=m"(ret) 
898                           :
899                           : "%ecx"
900                           );
901
902     if ((~ret & tmp_msr.value) == 0) {
903         __asm__ __volatile__ (
904                               "movl %0, %%cr4;"
905                               :
906                               : "q"(ret)
907                               );
908     } else {
909         PrintError("Invalid CR4 Settings!\n");
910         return;
911     }
912
913     __asm__ __volatile__ (
914                           "movl %%cr0, %%ecx; "
915                           "orl  $0x00000020,%%ecx; "
916                           "movl %%ecx, %%cr0;"
917                           :
918                           :
919                           : "%ecx"
920                           );
921
922 #endif
923
924     //
925     // Should check and return Error here.... 
926
927
928     // Setup VMXON Region
929     host_vmcs_ptrs[cpu_id] = allocate_vmcs();
930
931     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);
932
933     if (v3_enable_vmx(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
934         PrintDebug("VMX Enabled\n");
935     } else {
936         PrintError("VMX initialization failure\n");
937         return;
938     }
939     
940
941     if (has_vmx_nested_paging() == 1) {
942         v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
943     } else {
944         v3_cpu_types[cpu_id] = V3_VMX_CPU;
945     }
946
947 }
948
949
950 void v3_deinit_vmx_cpu(int cpu_id) {
951     extern v3_cpu_arch_t v3_cpu_types[];
952     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
953     V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
954 }