Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


vmx cleanup
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Peter Dinda <pdinda@northwestern.edu> 
11  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
13  * All rights reserved.
14  *
15  * Author: Peter Dinda <pdinda@northwestern.edu>
16  *         Jack Lange <jarusl@cs.northwestern.edu>
17  *
18  * This is free software.  You are permitted to use,
19  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20  */
21
22
23 #include <palacios/vmx.h>
24 #include <palacios/vmm.h>
25 #include <palacios/vmx_handler.h>
26 #include <palacios/vmcs.h>
27 #include <palacios/vmx_lowlevel.h>
28 #include <palacios/vmm_lowlevel.h>
29 #include <palacios/vmm_ctrl_regs.h>
30 #include <palacios/vmm_config.h>
31 #include <palacios/vmm_time.h>
32 #include <palacios/vm_guest_mem.h>
33 #include <palacios/vmm_direct_paging.h>
34 #include <palacios/vmx_io.h>
35 #include <palacios/vmx_msr.h>
36
37 #include <palacios/vmx_hw_info.h>
38
39 #ifndef CONFIG_DEBUG_VMX
40 #undef PrintDebug
41 #define PrintDebug(fmt, args...)
42 #endif
43
44
45 /* These fields contain the hardware feature sets supported by the local CPU */
46 static struct vmx_hw_info hw_info;
47
48
49 static addr_t active_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
50 static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
51
52 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
53 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
54
55 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
56     int ret = 0;
57
58     ret = vmcs_write(field, val);
59
60     if (ret != VMX_SUCCESS) {
61         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
62         return 1;
63     }
64
65     return 0;
66 }
67
68 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
69     int ret = 0;
70
71     ret = vmcs_read(field, val);
72
73     if (ret != VMX_SUCCESS) {
74         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
75     }
76
77     return ret;
78 }
79
80
81
82
83 static addr_t allocate_vmcs() {
84     struct vmcs_data * vmcs_page = NULL;
85
86     PrintDebug("Allocating page\n");
87
88     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
89     memset(vmcs_page, 0, 4096);
90
91     vmcs_page->revision = hw_info.basic_info.revision;
92     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
93
94     return (addr_t)V3_PAddr((void *)vmcs_page);
95 }
96
97
98
99
100 static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state) {
101     int vmx_ret = 0;
102     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
103
104     // disable global interrupts for vm state initialization
105     v3_disable_ints();
106
107     PrintDebug("Loading VMCS\n");
108     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
109     active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
110     vmx_state->state = VMX_UNLAUNCHED;
111
112     if (vmx_ret != VMX_SUCCESS) {
113         PrintError("VMPTRLD failed\n");
114         return -1;
115     }
116
117
118
119     /******* Setup Host State **********/
120
121     /* Cache GDTR, IDTR, and TR in host struct */
122     addr_t gdtr_base;
123     struct {
124         uint16_t selector;
125         addr_t   base;
126     } __attribute__((packed)) tmp_seg;
127     
128
129     __asm__ __volatile__(
130                          "sgdt (%0);"
131                          :
132                          : "q"(&tmp_seg)
133                          : "memory"
134                          );
135     gdtr_base = tmp_seg.base;
136     vmx_state->host_state.gdtr.base = gdtr_base;
137
138     __asm__ __volatile__(
139                          "sidt (%0);"
140                          :
141                          : "q"(&tmp_seg)
142                          : "memory"
143                          );
144     vmx_state->host_state.idtr.base = tmp_seg.base;
145
146     __asm__ __volatile__(
147                          "str (%0);"
148                          :
149                          : "q"(&tmp_seg)
150                          : "memory"
151                          );
152     vmx_state->host_state.tr.selector = tmp_seg.selector;
153
154     /* The GDTR *index* is bits 3-15 of the selector. */
155     struct tss_descriptor * desc = NULL;
156     desc = (struct tss_descriptor *)(gdtr_base + (8 * (tmp_seg.selector >> 3)));
157
158     tmp_seg.base = ((desc->base1) |
159                     (desc->base2 << 16) |
160                     (desc->base3 << 24) |
161 #ifdef __V3_64BIT__
162                     ((uint64_t)desc->base4 << 32)
163 #else 
164                     (0)
165 #endif
166                     );
167
168     vmx_state->host_state.tr.base = tmp_seg.base;
169
170   
171
172     /********** Setup and VMX Control Fields from MSR ***********/
173     /* Setup IO map */
174
175
176     struct v3_msr tmp_msr;
177
178     v3_get_msr(VMX_PINBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
179
180     /* Add external interrupts, NMI exiting, and virtual NMI */
181     vmx_state->pin_ctrls.value =  tmp_msr.lo;
182     vmx_state->pin_ctrls.nmi_exit = 1;
183     vmx_state->pin_ctrls.ext_int_exit = 1;
184
185     v3_get_msr(VMX_PROCBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
186
187     vmx_state->pri_proc_ctrls.value = tmp_msr.lo;
188     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
189     vmx_state->pri_proc_ctrls.hlt_exit = 1;
190     vmx_state->pri_proc_ctrls.invlpg_exit = 1;
191     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
192     vmx_state->pri_proc_ctrls.pause_exit = 1;
193     vmx_state->pri_proc_ctrls.tsc_offset = 1;
194 #ifdef CONFIG_TIME_VIRTUALIZE_TSC
195     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
196 #endif
197
198     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->vm_info->io_map.arch_data));
199     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
200             (addr_t)V3_PAddr(info->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
201
202
203     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(info->vm_info->msr_map.arch_data));
204
205     v3_get_msr(VMX_EXIT_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
206     vmx_state->exit_ctrls.value = tmp_msr.lo;
207     vmx_state->exit_ctrls.host_64_on = 1;
208
209     if ((vmx_state->exit_ctrls.save_efer == 1) || (vmx_state->exit_ctrls.ld_efer == 1)) {
210         vmx_state->ia32e_avail = 1;
211     }
212
213     v3_get_msr(VMX_ENTRY_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
214     vmx_state->entry_ctrls.value = tmp_msr.lo;
215
216     {
217         struct vmx_exception_bitmap excp_bmap;
218         excp_bmap.value = 0;
219         
220         excp_bmap.pf = 1;
221     
222         vmx_ret |= check_vmcs_write(VMCS_EXCP_BITMAP, excp_bmap.value);
223     }
224     /******* Setup VMXAssist guest state ***********/
225
226     info->rip = 0xd0000;
227     info->vm_regs.rsp = 0x80000;
228
229     struct rflags * flags = (struct rflags *)&(info->ctrl_regs.rflags);
230     flags->rsvd1 = 1;
231
232     /* Print Control MSRs */
233     v3_get_msr(VMX_CR0_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
234     PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
235
236     v3_get_msr(VMX_CR4_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
237     PrintDebug("CR4 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
238
239
240 #define GUEST_CR0 0x80000031
241 #define GUEST_CR4 0x00002000
242     info->ctrl_regs.cr0 = GUEST_CR0;
243     info->ctrl_regs.cr4 = GUEST_CR4;
244
245     ((struct cr0_32 *)&(info->shdw_pg_state.guest_cr0))->pe = 1;
246    
247     /* Setup paging */
248     if (info->shdw_pg_mode == SHADOW_PAGING) {
249         PrintDebug("Creating initial shadow page table\n");
250
251         if (v3_init_passthrough_pts(info) == -1) {
252             PrintError("Could not initialize passthrough page tables\n");
253             return -1;
254         }
255         
256 #define CR0_PE 0x00000001
257 #define CR0_PG 0x80000000
258
259
260         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG) );
261         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
262
263         info->ctrl_regs.cr3 = info->direct_map_pt;
264
265         // vmx_state->pinbased_ctrls |= NMI_EXIT;
266
267         /* Add CR exits */
268         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
269         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
270     }
271
272     // Setup segment registers
273     {
274         struct v3_segment * seg_reg = (struct v3_segment *)&(info->segments);
275
276         int i;
277
278         for (i = 0; i < 10; i++) {
279             seg_reg[i].selector = 3 << 3;
280             seg_reg[i].limit = 0xffff;
281             seg_reg[i].base = 0x0;
282         }
283
284         info->segments.cs.selector = 2<<3;
285
286         /* Set only the segment registers */
287         for (i = 0; i < 6; i++) {
288             seg_reg[i].limit = 0xfffff;
289             seg_reg[i].granularity = 1;
290             seg_reg[i].type = 3;
291             seg_reg[i].system = 1;
292             seg_reg[i].dpl = 0;
293             seg_reg[i].present = 1;
294             seg_reg[i].db = 1;
295         }
296
297         info->segments.cs.type = 0xb;
298
299         info->segments.ldtr.selector = 0x20;
300         info->segments.ldtr.type = 2;
301         info->segments.ldtr.system = 0;
302         info->segments.ldtr.present = 1;
303         info->segments.ldtr.granularity = 0;
304
305     
306         /************* Map in GDT and vmxassist *************/
307
308         uint64_t  gdt[] __attribute__ ((aligned(32))) = {
309             0x0000000000000000ULL,              /* 0x00: reserved */
310             0x0000830000000000ULL,              /* 0x08: 32-bit TSS */
311             //0x0000890000000000ULL,            /* 0x08: 32-bit TSS */
312             0x00CF9b000000FFFFULL,              /* 0x10: CS 32-bit */
313             0x00CF93000000FFFFULL,              /* 0x18: DS 32-bit */
314             0x000082000000FFFFULL,              /* 0x20: LDTR 32-bit */
315         };
316
317 #define VMXASSIST_GDT   0x10000
318         addr_t vmxassist_gdt = 0;
319
320         if (v3_gpa_to_hva(info, VMXASSIST_GDT, &vmxassist_gdt) == -1) {
321             PrintError("Could not find VMXASSIST GDT destination\n");
322             return -1;
323         }
324
325         memcpy((void *)vmxassist_gdt, gdt, sizeof(uint64_t) * 5);
326         
327         info->segments.gdtr.base = VMXASSIST_GDT;
328
329 #define VMXASSIST_TSS   0x40000
330         uint64_t vmxassist_tss = VMXASSIST_TSS;
331         gdt[0x08 / sizeof(gdt[0])] |=
332             ((vmxassist_tss & 0xFF000000) << (56 - 24)) |
333             ((vmxassist_tss & 0x00FF0000) << (32 - 16)) |
334             ((vmxassist_tss & 0x0000FFFF) << (16)) |
335             (8392 - 1);
336
337         info->segments.tr.selector = 0x08;
338         info->segments.tr.base = vmxassist_tss;
339
340         //info->segments.tr.type = 0x9; 
341         info->segments.tr.type = 0x3;
342         info->segments.tr.system = 0;
343         info->segments.tr.present = 1;
344         info->segments.tr.granularity = 0;
345     }
346  
347     // setup VMXASSIST
348     { 
349 #define VMXASSIST_START 0x000d0000
350         extern uint8_t v3_vmxassist_start[];
351         extern uint8_t v3_vmxassist_end[];
352         addr_t vmxassist_dst = 0;
353
354         if (v3_gpa_to_hva(info, VMXASSIST_START, &vmxassist_dst) == -1) {
355             PrintError("Could not find VMXASSIST destination\n");
356             return -1;
357         }
358
359         memcpy((void *)vmxassist_dst, v3_vmxassist_start, v3_vmxassist_end - v3_vmxassist_start);
360     }    
361
362     /*** Write all the info to the VMCS ***/
363
364 #define DEBUGCTL_MSR 0x1d9
365     v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
366     vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
367
368     info->dbg_regs.dr7 = 0x400;
369
370 #ifdef __V3_64BIT__
371     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
372 #else
373     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
374     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
375 #endif
376
377     if (v3_update_vmcs_ctrl_fields(info)) {
378         PrintError("Could not write control fields!\n");
379         return -1;
380     }
381     
382     if (v3_update_vmcs_host_state(info)) {
383         PrintError("Could not write host state\n");
384         return -1;
385     }
386
387
388     vmx_state->assist_state = VMXASSIST_DISABLED;
389
390     // reenable global interrupts for vm state initialization now
391     // that the vm state is initialized. If another VM kicks us off, 
392     // it'll update our vmx state so that we know to reload ourself
393     v3_enable_ints();
394
395     return 0;
396 }
397
398 int v3_init_vmx_vmcs(struct guest_info * info, v3_vm_class_t vm_class) {
399     struct vmx_data * vmx_state = NULL;
400     int vmx_ret = 0;
401     
402     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
403
404     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
405
406     PrintDebug("Allocating VMCS\n");
407     vmx_state->vmcs_ptr_phys = allocate_vmcs();
408
409     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
410
411     info->vmm_data = vmx_state;
412     vmx_state->state = VMX_UNLAUNCHED;
413
414     PrintDebug("Initializing VMCS (addr=%p)\n", info->vmm_data);
415     
416     // TODO: Fix vmcs fields so they're 32-bit
417
418     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
419     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
420
421     if (vmx_ret != VMX_SUCCESS) {
422         PrintError("VMCLEAR failed\n");
423         return -1; 
424     }
425
426     if (vm_class == V3_PC_VM) {
427         PrintDebug("Initializing VMCS\n");
428         init_vmcs_bios(info, vmx_state);
429     } else {
430         PrintError("Invalid VM Class\n");
431         return -1;
432     }
433
434     return 0;
435 }
436
437
438 int v3_deinit_vmx_vmcs(struct guest_info * core) {
439     struct vmx_data * vmx_state = core->vmm_data;
440
441     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
442
443     V3_Free(vmx_state);
444
445     return 0;
446 }
447
448
449 static int update_irq_exit_state(struct guest_info * info) {
450     struct vmx_exit_idt_vec_info idt_vec_info;
451
452     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
453
454     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
455 #ifdef CONFIG_DEBUG_INTERRUPTS
456         PrintDebug("Calling v3_injecting_intr\n");
457 #endif
458         info->intr_core_state.irq_started = 0;
459         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
460     }
461
462     return 0;
463 }
464
465 static int update_irq_entry_state(struct guest_info * info) {
466     struct vmx_exit_idt_vec_info idt_vec_info;
467     struct vmcs_interrupt_state intr_core_state;
468     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
469
470     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
471     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
472
473     /* Check for pending exceptions to inject */
474     if (v3_excp_pending(info)) {
475         struct vmx_entry_int_info int_info;
476         int_info.value = 0;
477
478         // In VMX, almost every exception is hardware
479         // Software exceptions are pretty much only for breakpoint or overflow
480         int_info.type = 3;
481         int_info.vector = v3_get_excp_number(info);
482
483         if (info->excp_state.excp_error_code_valid) {
484             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
485             int_info.error_code = 1;
486
487 #ifdef CONFIG_DEBUG_INTERRUPTS
488             PrintDebug("Injecting exception %d with error code %x\n", 
489                     int_info.vector, info->excp_state.excp_error_code);
490 #endif
491         }
492
493         int_info.valid = 1;
494 #ifdef CONFIG_DEBUG_INTERRUPTS
495         PrintDebug("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
496 #endif
497         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
498
499         v3_injecting_excp(info, int_info.vector);
500
501     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
502                (intr_core_state.val == 0)) {
503        
504         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
505
506 #ifdef CONFIG_DEBUG_INTERRUPTS
507             PrintDebug("IRQ pending from previous injection\n");
508 #endif
509
510             // Copy the IDT vectoring info over to reinject the old interrupt
511             if (idt_vec_info.error_code == 1) {
512                 uint32_t err_code = 0;
513
514                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
515                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
516             }
517
518             idt_vec_info.undef = 0;
519             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
520
521         } else {
522             struct vmx_entry_int_info ent_int;
523             ent_int.value = 0;
524
525             switch (v3_intr_pending(info)) {
526                 case V3_EXTERNAL_IRQ: {
527                     info->intr_core_state.irq_vector = v3_get_intr(info); 
528                     ent_int.vector = info->intr_core_state.irq_vector;
529                     ent_int.type = 0;
530                     ent_int.error_code = 0;
531                     ent_int.valid = 1;
532
533 #ifdef CONFIG_DEBUG_INTERRUPTS
534                     PrintDebug("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
535                                info->intr_core_state.irq_vector, 
536                                (uint32_t)info->num_exits, 
537                                (void *)(addr_t)info->rip);
538 #endif
539
540                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
541                     info->intr_core_state.irq_started = 1;
542
543                     break;
544                 }
545                 case V3_NMI:
546                     PrintDebug("Injecting NMI\n");
547
548                     ent_int.type = 2;
549                     ent_int.vector = 2;
550                     ent_int.valid = 1;
551                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
552
553                     break;
554                 case V3_SOFTWARE_INTR:
555                     PrintDebug("Injecting software interrupt\n");
556                     ent_int.type = 4;
557
558                     ent_int.valid = 1;
559                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
560
561                     break;
562                 case V3_VIRTUAL_IRQ:
563                     // Not sure what to do here, Intel doesn't have virtual IRQs
564                     // May be the same as external interrupts/IRQs
565
566                     break;
567                 case V3_INVALID_INTR:
568                 default:
569                     break;
570             }
571         }
572     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
573         // Enable INTR window exiting so we know when IF=1
574         uint32_t instr_len;
575
576         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
577
578 #ifdef CONFIG_DEBUG_INTERRUPTS
579         PrintDebug("Enabling Interrupt-Window exiting: %d\n", instr_len);
580 #endif
581
582         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
583         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
584     }
585
586
587     return 0;
588 }
589
590
591
592 static struct vmx_exit_info exit_log[10];
593
594 static void print_exit_log(struct guest_info * info) {
595     int cnt = info->num_exits % 10;
596     int i = 0;
597     
598
599     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
600
601     for (i = 0; i < 10; i++) {
602         struct vmx_exit_info * tmp = &exit_log[cnt];
603
604         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
605         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
606         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
607         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
608         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
609
610         cnt--;
611
612         if (cnt == -1) {
613             cnt = 9;
614         }
615
616     }
617
618 }
619
620 /* 
621  * CAUTION and DANGER!!! 
622  * 
623  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
624  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
625  * on its contents will cause things to break. The contents at the time of the exit WILL 
626  * change before the exit handler is executed.
627  */
628 int v3_vmx_enter(struct guest_info * info) {
629     int ret = 0;
630     uint32_t tsc_offset_low, tsc_offset_high;
631     struct vmx_exit_info exit_info;
632     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
633
634     // Conditionally yield the CPU if the timeslice has expired
635     v3_yield_cond(info);
636
637     // Perform any additional yielding needed for time adjustment
638     v3_adjust_time(info);
639
640     // Update timer devices prior to entering VM.
641     v3_update_timers(info);
642
643     // disable global interrupts for vm state transition
644     v3_disable_ints();
645
646
647     if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) {
648         vmcs_load(vmx_info->vmcs_ptr_phys);
649         active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
650     }
651
652
653     v3_vmx_restore_vmcs(info);
654
655
656 #ifdef CONFIG_SYMCALL
657     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
658         update_irq_entry_state(info);
659     }
660 #else 
661     update_irq_entry_state(info);
662 #endif
663
664     {
665         addr_t guest_cr3;
666         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
667         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
668     }
669
670     // Perform last-minute time bookkeeping prior to entering the VM
671     v3_time_enter_vm(info);
672
673     tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
674     tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
675     check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
676     check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
677
678
679     if (vmx_info->state == VMX_UNLAUNCHED) {
680         vmx_info->state = VMX_LAUNCHED;
681         info->vm_info->run_state = VM_RUNNING;
682         ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
683     } else {
684         V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
685         ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
686     }
687     
688     //  PrintDebug("VMX Exit: ret=%d\n", ret);
689
690     if (ret != VMX_SUCCESS) {
691         uint32_t error = 0;
692
693         vmcs_read(VMCS_INSTR_ERR, &error);
694         PrintError("VMENTRY Error: %d\n", error);
695
696         return -1;
697     }
698
699     // Immediate exit from VM time bookkeeping
700     v3_time_exit_vm(info);
701
702     info->num_exits++;
703
704     /* Update guest state */
705     v3_vmx_save_vmcs(info);
706
707     // info->cpl = info->segments.cs.selector & 0x3;
708
709     info->mem_mode = v3_get_vm_mem_mode(info);
710     info->cpu_mode = v3_get_vm_cpu_mode(info);
711
712
713     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
714     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
715     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
716     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
717     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
718     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
719     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
720
721     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
722
723     exit_log[info->num_exits % 10] = exit_info;
724
725
726 #ifdef CONFIG_SYMCALL
727     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
728         update_irq_exit_state(info);
729     }
730 #else
731     update_irq_exit_state(info);
732 #endif
733
734     if (exit_info.exit_reason == VMEXIT_INTR_WINDOW) {
735         // This is a special case whose only job is to inject an interrupt
736         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
737         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
738         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
739
740 #ifdef CONFIG_DEBUG_INTERRUPTS
741         PrintDebug("Interrupts available again! (RIP=%llx)\n", info->rip);
742 #endif
743     }
744
745     // reenable global interrupts after vm exit
746     v3_enable_ints();
747
748     // Conditionally yield the CPU if the timeslice has expired
749     v3_yield_cond(info);
750
751     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
752         PrintError("Error in VMX exit handler\n");
753         return -1;
754     }
755
756     return 0;
757 }
758
759
760 int v3_start_vmx_guest(struct guest_info * info) {
761
762     PrintDebug("Starting VMX core %u\n", info->cpu_id);
763
764     if (info->cpu_id == 0) {
765         info->core_run_state = CORE_RUNNING;
766         info->vm_info->run_state = VM_RUNNING;
767     } else {
768
769         PrintDebug("VMX core %u: Waiting for core initialization\n", info->cpu_id);
770
771         while (info->core_run_state == CORE_STOPPED) {
772             v3_yield(info);
773             //PrintDebug("VMX core %u: still waiting for INIT\n",info->cpu_id);
774         }
775         
776         PrintDebug("VMX core %u initialized\n", info->cpu_id);
777     }
778
779
780     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
781                info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
782                info->segments.cs.limit, (void *)(info->rip));
783
784
785     PrintDebug("VMX core %u: Launching VMX VM\n", info->cpu_id);
786
787     v3_start_time(info);
788
789     while (1) {
790
791         if (info->vm_info->run_state == VM_STOPPED) {
792             info->core_run_state = CORE_STOPPED;
793             break;
794         }
795
796         if (v3_vmx_enter(info) == -1) {
797             v3_print_vmcs();
798             print_exit_log(info);
799             return -1;
800         }
801
802
803
804         if (info->vm_info->run_state == VM_STOPPED) {
805             info->core_run_state = CORE_STOPPED;
806             break;
807         }
808 /*
809         if ((info->num_exits % 5000) == 0) {
810             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
811         }
812 */
813
814     }
815
816     return 0;
817 }
818
819
820
821
822 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
823 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
824 #define CPUID_1_ECX_VTXFLAG 0x00000020
825
826 int v3_is_vmx_capable() {
827     v3_msr_t feature_msr;
828     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
829
830     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
831
832     PrintDebug("ECX: 0x%x\n", ecx);
833
834     if (ecx & CPUID_1_ECX_VTXFLAG) {
835         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
836         
837         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
838
839         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
840             PrintDebug("VMX is locked -- enable in the BIOS\n");
841             return 0;
842         }
843
844     } else {
845         PrintDebug("VMX not supported on this cpu\n");
846         return 0;
847     }
848
849     return 1;
850 }
851
852
853
854
855
856
857 void v3_init_vmx_cpu(int cpu_id) {
858     extern v3_cpu_arch_t v3_cpu_types[];
859
860     if (cpu_id == 0) {
861         if (v3_init_vmx_hw(&hw_info) == -1) {
862             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
863             return;
864         }
865     }
866
867
868     enable_vmx();
869
870
871     // Setup VMXON Region
872     host_vmcs_ptrs[cpu_id] = allocate_vmcs();
873
874     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);
875
876     if (vmx_on(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
877         PrintDebug("VMX Enabled\n");
878     } else {
879         PrintError("VMX initialization failure\n");
880         return;
881     }
882     
883
884     v3_cpu_types[cpu_id] = V3_VMX_CPU;
885
886
887 }
888
889
890 void v3_deinit_vmx_cpu(int cpu_id) {
891     extern v3_cpu_arch_t v3_cpu_types[];
892     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
893     V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
894 }