Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


added vmcs/vmcb freeing
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Peter Dinda <pdinda@northwestern.edu> 
11  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
13  * All rights reserved.
14  *
15  * Author: Peter Dinda <pdinda@northwestern.edu>
16  *         Jack Lange <jarusl@cs.northwestern.edu>
17  *
18  * This is free software.  You are permitted to use,
19  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20  */
21
22
23 #include <palacios/vmx.h>
24 #include <palacios/vmm.h>
25 #include <palacios/vmx_handler.h>
26 #include <palacios/vmcs.h>
27 #include <palacios/vmx_lowlevel.h>
28 #include <palacios/vmm_lowlevel.h>
29 #include <palacios/vmm_ctrl_regs.h>
30 #include <palacios/vmm_config.h>
31 #include <palacios/vm_guest_mem.h>
32 #include <palacios/vmm_direct_paging.h>
33 #include <palacios/vmx_io.h>
34 #include <palacios/vmx_msr.h>
35
36
37 #ifndef CONFIG_DEBUG_VMX
38 #undef PrintDebug
39 #define PrintDebug(fmt, args...)
40 #endif
41
42
43 static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
44
45
46
47 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
48 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
49
50 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
51     int ret = 0;
52
53     ret = vmcs_write(field,val);
54
55     if (ret != VMX_SUCCESS) {
56         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
57         return 1;
58     }
59
60     return 0;
61 }
62
63 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
64     int ret = 0;
65
66     ret = vmcs_read(field, val);
67
68     if (ret != VMX_SUCCESS) {
69         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
70     }
71
72     return ret;
73 }
74
75 #if 0
76 // For the 32 bit reserved bit fields 
77 // MB1s are in the low 32 bits, MBZs are in the high 32 bits of the MSR
78 static uint32_t sanitize_bits1(uint32_t msr_num, uint32_t val) {
79     v3_msr_t mask_msr;
80
81     PrintDebug("sanitize_bits1 (MSR:%x)\n", msr_num);
82
83     v3_get_msr(msr_num, &mask_msr.hi, &mask_msr.lo);
84
85     PrintDebug("MSR %x = %x : %x \n", msr_num, mask_msr.hi, mask_msr.lo);
86
87     val |= mask_msr.lo;
88     val |= mask_msr.hi;
89   
90     return val;
91 }
92
93
94
95 static addr_t sanitize_bits2(uint32_t msr_num0, uint32_t msr_num1, addr_t val) {
96     v3_msr_t msr0, msr1;
97     addr_t msr0_val, msr1_val;
98
99     PrintDebug("sanitize_bits2 (MSR0=%x, MSR1=%x)\n", msr_num0, msr_num1);
100
101     v3_get_msr(msr_num0, &msr0.hi, &msr0.lo);
102     v3_get_msr(msr_num1, &msr1.hi, &msr1.lo);
103   
104     // This generates a mask that is the natural bit width of the CPU
105     msr0_val = msr0.value;
106     msr1_val = msr1.value;
107
108     PrintDebug("MSR %x = %p, %x = %p \n", msr_num0, (void*)msr0_val, msr_num1, (void*)msr1_val);
109
110     val |= msr0_val;
111     val |= msr1_val;
112
113     return val;
114 }
115
116
117
118 #endif
119
120
121 static addr_t allocate_vmcs() {
122     reg_ex_t msr;
123     struct vmcs_data * vmcs_page = NULL;
124
125     PrintDebug("Allocating page\n");
126
127     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
128     memset(vmcs_page, 0, 4096);
129
130     v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
131     
132     vmcs_page->revision = ((struct vmx_basic_msr*)&msr)->revision;
133     PrintDebug("VMX Revision: 0x%x\n",vmcs_page->revision);
134
135     return (addr_t)V3_PAddr((void *)vmcs_page);
136 }
137
138
139
140
141 static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state) {
142     int vmx_ret = 0;
143
144     PrintDebug("Loading VMCS\n");
145     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
146
147     if (vmx_ret != VMX_SUCCESS) {
148         PrintError("VMPTRLD failed\n");
149         return -1;
150     }
151
152
153
154     /******* Setup Host State **********/
155
156     /* Cache GDTR, IDTR, and TR in host struct */
157     addr_t gdtr_base;
158     struct {
159         uint16_t selector;
160         addr_t   base;
161     } __attribute__((packed)) tmp_seg;
162     
163
164     __asm__ __volatile__(
165                          "sgdt (%0);"
166                          :
167                          : "q"(&tmp_seg)
168                          : "memory"
169                          );
170     gdtr_base = tmp_seg.base;
171     vmx_state->host_state.gdtr.base = gdtr_base;
172
173     __asm__ __volatile__(
174                          "sidt (%0);"
175                          :
176                          : "q"(&tmp_seg)
177                          : "memory"
178                          );
179     vmx_state->host_state.idtr.base = tmp_seg.base;
180
181     __asm__ __volatile__(
182                          "str (%0);"
183                          :
184                          : "q"(&tmp_seg)
185                          : "memory"
186                          );
187     vmx_state->host_state.tr.selector = tmp_seg.selector;
188
189     /* The GDTR *index* is bits 3-15 of the selector. */
190     struct tss_descriptor * desc = NULL;
191     desc = (struct tss_descriptor *)(gdtr_base + (8 * (tmp_seg.selector >> 3)));
192
193     tmp_seg.base = ((desc->base1) |
194                     (desc->base2 << 16) |
195                     (desc->base3 << 24) |
196 #ifdef __V3_64BIT__
197                     ((uint64_t)desc->base4 << 32)
198 #else 
199                     (0)
200 #endif
201                     );
202
203     vmx_state->host_state.tr.base = tmp_seg.base;
204
205   
206
207     /********** Setup and VMX Control Fields from MSR ***********/
208     /* Setup IO map */
209
210
211     struct v3_msr tmp_msr;
212
213     v3_get_msr(VMX_PINBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
214
215     /* Add external interrupts, NMI exiting, and virtual NMI */
216     vmx_state->pin_ctrls.value =  tmp_msr.lo;
217     vmx_state->pin_ctrls.nmi_exit = 1;
218     vmx_state->pin_ctrls.ext_int_exit = 1;
219
220     v3_get_msr(VMX_PROCBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
221
222     vmx_state->pri_proc_ctrls.value = tmp_msr.lo;
223     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
224     vmx_state->pri_proc_ctrls.hlt_exit = 1;
225     vmx_state->pri_proc_ctrls.invlpg_exit = 1;
226     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
227     vmx_state->pri_proc_ctrls.pause_exit = 1;
228     vmx_state->pri_proc_ctrls.tsc_offset = 1;
229 #ifdef CONFIG_TIME_VIRTUALIZE_TSC
230     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
231 #endif
232
233     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->vm_info->io_map.arch_data));
234     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
235             (addr_t)V3_PAddr(info->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
236
237
238     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(info->vm_info->msr_map.arch_data));
239
240     v3_get_msr(VMX_EXIT_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
241     vmx_state->exit_ctrls.value = tmp_msr.lo;
242     vmx_state->exit_ctrls.host_64_on = 1;
243
244     if ((vmx_state->exit_ctrls.save_efer == 1) || (vmx_state->exit_ctrls.ld_efer == 1)) {
245         vmx_state->ia32e_avail = 1;
246     }
247
248     v3_get_msr(VMX_ENTRY_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
249     vmx_state->entry_ctrls.value = tmp_msr.lo;
250
251     {
252         struct vmx_exception_bitmap excp_bmap;
253         excp_bmap.value = 0;
254         
255         excp_bmap.pf = 1;
256     
257         vmx_ret |= check_vmcs_write(VMCS_EXCP_BITMAP, excp_bmap.value);
258     }
259     /******* Setup VMXAssist guest state ***********/
260
261     info->rip = 0xd0000;
262     info->vm_regs.rsp = 0x80000;
263
264     struct rflags * flags = (struct rflags *)&(info->ctrl_regs.rflags);
265     flags->rsvd1 = 1;
266
267     /* Print Control MSRs */
268     v3_get_msr(VMX_CR0_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
269     PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
270
271     v3_get_msr(VMX_CR4_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
272     PrintDebug("CR4 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
273
274
275 #define GUEST_CR0 0x80000031
276 #define GUEST_CR4 0x00002000
277     info->ctrl_regs.cr0 = GUEST_CR0;
278     info->ctrl_regs.cr4 = GUEST_CR4;
279
280     ((struct cr0_32 *)&(info->shdw_pg_state.guest_cr0))->pe = 1;
281    
282     /* Setup paging */
283     if (info->shdw_pg_mode == SHADOW_PAGING) {
284         PrintDebug("Creating initial shadow page table\n");
285
286         if (v3_init_passthrough_pts(info) == -1) {
287             PrintError("Could not initialize passthrough page tables\n");
288             return -1;
289         }
290         
291 #define CR0_PE 0x00000001
292 #define CR0_PG 0x80000000
293
294
295         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG) );
296         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
297
298         info->ctrl_regs.cr3 = info->direct_map_pt;
299
300         // vmx_state->pinbased_ctrls |= NMI_EXIT;
301
302         /* Add CR exits */
303         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
304         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
305     }
306
307     // Setup segment registers
308     {
309         struct v3_segment * seg_reg = (struct v3_segment *)&(info->segments);
310
311         int i;
312
313         for (i = 0; i < 10; i++) {
314             seg_reg[i].selector = 3 << 3;
315             seg_reg[i].limit = 0xffff;
316             seg_reg[i].base = 0x0;
317         }
318
319         info->segments.cs.selector = 2<<3;
320
321         /* Set only the segment registers */
322         for (i = 0; i < 6; i++) {
323             seg_reg[i].limit = 0xfffff;
324             seg_reg[i].granularity = 1;
325             seg_reg[i].type = 3;
326             seg_reg[i].system = 1;
327             seg_reg[i].dpl = 0;
328             seg_reg[i].present = 1;
329             seg_reg[i].db = 1;
330         }
331
332         info->segments.cs.type = 0xb;
333
334         info->segments.ldtr.selector = 0x20;
335         info->segments.ldtr.type = 2;
336         info->segments.ldtr.system = 0;
337         info->segments.ldtr.present = 1;
338         info->segments.ldtr.granularity = 0;
339
340     
341         /************* Map in GDT and vmxassist *************/
342
343         uint64_t  gdt[] __attribute__ ((aligned(32))) = {
344             0x0000000000000000ULL,              /* 0x00: reserved */
345             0x0000830000000000ULL,              /* 0x08: 32-bit TSS */
346             //0x0000890000000000ULL,            /* 0x08: 32-bit TSS */
347             0x00CF9b000000FFFFULL,              /* 0x10: CS 32-bit */
348             0x00CF93000000FFFFULL,              /* 0x18: DS 32-bit */
349             0x000082000000FFFFULL,              /* 0x20: LDTR 32-bit */
350         };
351
352 #define VMXASSIST_GDT   0x10000
353         addr_t vmxassist_gdt = 0;
354
355         if (v3_gpa_to_hva(info, VMXASSIST_GDT, &vmxassist_gdt) == -1) {
356             PrintError("Could not find VMXASSIST GDT destination\n");
357             return -1;
358         }
359
360         memcpy((void *)vmxassist_gdt, gdt, sizeof(uint64_t) * 5);
361         
362         info->segments.gdtr.base = VMXASSIST_GDT;
363
364 #define VMXASSIST_TSS   0x40000
365         uint64_t vmxassist_tss = VMXASSIST_TSS;
366         gdt[0x08 / sizeof(gdt[0])] |=
367             ((vmxassist_tss & 0xFF000000) << (56 - 24)) |
368             ((vmxassist_tss & 0x00FF0000) << (32 - 16)) |
369             ((vmxassist_tss & 0x0000FFFF) << (16)) |
370             (8392 - 1);
371
372         info->segments.tr.selector = 0x08;
373         info->segments.tr.base = vmxassist_tss;
374
375         //info->segments.tr.type = 0x9; 
376         info->segments.tr.type = 0x3;
377         info->segments.tr.system = 0;
378         info->segments.tr.present = 1;
379         info->segments.tr.granularity = 0;
380     }
381  
382     // setup VMXASSIST
383     { 
384 #define VMXASSIST_START 0x000d0000
385         extern uint8_t v3_vmxassist_start[];
386         extern uint8_t v3_vmxassist_end[];
387         addr_t vmxassist_dst = 0;
388
389         if (v3_gpa_to_hva(info, VMXASSIST_START, &vmxassist_dst) == -1) {
390             PrintError("Could not find VMXASSIST destination\n");
391             return -1;
392         }
393
394         memcpy((void *)vmxassist_dst, v3_vmxassist_start, v3_vmxassist_end - v3_vmxassist_start);
395     }    
396
397     /*** Write all the info to the VMCS ***/
398
399 #define DEBUGCTL_MSR 0x1d9
400     v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
401     vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
402
403     info->dbg_regs.dr7 = 0x400;
404
405 #ifdef __V3_64BIT__
406     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
407 #else
408     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
409     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
410 #endif
411
412     if (v3_update_vmcs_ctrl_fields(info)) {
413         PrintError("Could not write control fields!\n");
414         return -1;
415     }
416     
417     if (v3_update_vmcs_host_state(info)) {
418         PrintError("Could not write host state\n");
419         return -1;
420     }
421
422
423     vmx_state->state = VMXASSIST_DISABLED;
424
425     return 0;
426 }
427
428 int v3_init_vmx_vmcs(struct guest_info * info, v3_vm_class_t vm_class) {
429     struct vmx_data * vmx_state = NULL;
430     int vmx_ret = 0;
431     
432     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
433
434     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
435
436     PrintDebug("Allocating VMCS\n");
437     vmx_state->vmcs_ptr_phys = allocate_vmcs();
438
439     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
440
441     info->vmm_data = vmx_state;
442
443     PrintDebug("Initializing VMCS (addr=%p)\n", info->vmm_data);
444     
445     // TODO: Fix vmcs fields so they're 32-bit
446
447     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
448     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
449
450     if (vmx_ret != VMX_SUCCESS) {
451         PrintError("VMCLEAR failed\n");
452         return -1; 
453     }
454
455     if (vm_class == V3_PC_VM) {
456         PrintDebug("Initializing VMCS\n");
457         init_vmcs_bios(info, vmx_state);
458     } else {
459         PrintError("Invalid VM Class\n");
460         return -1;
461     }
462
463     return 0;
464 }
465
466
467 int v3_deinit_vmx_vmcs(struct guest_info * core) {
468     struct vmx_data * vmx_state = core->vmm_data;
469
470     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
471
472     V3_Free(vmx_state);
473
474     return 0;
475 }
476
477
478 static int update_irq_exit_state(struct guest_info * info) {
479     struct vmx_exit_idt_vec_info idt_vec_info;
480
481     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
482
483     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
484 #ifdef CONFIG_DEBUG_INTERRUPTS
485         PrintDebug("Calling v3_injecting_intr\n");
486 #endif
487         info->intr_core_state.irq_started = 0;
488         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
489     }
490
491     return 0;
492 }
493
494 static int update_irq_entry_state(struct guest_info * info) {
495     struct vmx_exit_idt_vec_info idt_vec_info;
496     struct vmcs_interrupt_state intr_core_state;
497     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
498
499     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
500     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
501
502     /* Check for pending exceptions to inject */
503     if (v3_excp_pending(info)) {
504         struct vmx_entry_int_info int_info;
505         int_info.value = 0;
506
507         // In VMX, almost every exception is hardware
508         // Software exceptions are pretty much only for breakpoint or overflow
509         int_info.type = 3;
510         int_info.vector = v3_get_excp_number(info);
511
512         if (info->excp_state.excp_error_code_valid) {
513             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
514             int_info.error_code = 1;
515
516 #ifdef CONFIG_DEBUG_INTERRUPTS
517             PrintDebug("Injecting exception %d with error code %x\n", 
518                     int_info.vector, info->excp_state.excp_error_code);
519 #endif
520         }
521
522         int_info.valid = 1;
523 #ifdef CONFIG_DEBUG_INTERRUPTS
524         PrintDebug("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
525 #endif
526         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
527
528         v3_injecting_excp(info, int_info.vector);
529
530     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
531                (intr_core_state.val == 0)) {
532        
533         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
534
535 #ifdef CONFIG_DEBUG_INTERRUPTS
536             PrintDebug("IRQ pending from previous injection\n");
537 #endif
538
539             // Copy the IDT vectoring info over to reinject the old interrupt
540             if (idt_vec_info.error_code == 1) {
541                 uint32_t err_code = 0;
542
543                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
544                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
545             }
546
547             idt_vec_info.undef = 0;
548             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
549
550         } else {
551             struct vmx_entry_int_info ent_int;
552             ent_int.value = 0;
553
554             switch (v3_intr_pending(info)) {
555                 case V3_EXTERNAL_IRQ: {
556                     info->intr_core_state.irq_vector = v3_get_intr(info); 
557                     ent_int.vector = info->intr_core_state.irq_vector;
558                     ent_int.type = 0;
559                     ent_int.error_code = 0;
560                     ent_int.valid = 1;
561
562 #ifdef CONFIG_DEBUG_INTERRUPTS
563                     PrintDebug("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
564                                info->intr_core_state.irq_vector, 
565                                (uint32_t)info->num_exits, 
566                                (void *)(addr_t)info->rip);
567 #endif
568
569                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
570                     info->intr_core_state.irq_started = 1;
571
572                     break;
573                 }
574                 case V3_NMI:
575                     PrintDebug("Injecting NMI\n");
576
577                     ent_int.type = 2;
578                     ent_int.vector = 2;
579                     ent_int.valid = 1;
580                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
581
582                     break;
583                 case V3_SOFTWARE_INTR:
584                     PrintDebug("Injecting software interrupt\n");
585                     ent_int.type = 4;
586
587                     ent_int.valid = 1;
588                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
589
590                     break;
591                 case V3_VIRTUAL_IRQ:
592                     // Not sure what to do here, Intel doesn't have virtual IRQs
593                     // May be the same as external interrupts/IRQs
594
595                     break;
596                 case V3_INVALID_INTR:
597                 default:
598                     break;
599             }
600         }
601     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
602         // Enable INTR window exiting so we know when IF=1
603         uint32_t instr_len;
604
605         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
606
607 #ifdef CONFIG_DEBUG_INTERRUPTS
608         PrintDebug("Enabling Interrupt-Window exiting: %d\n", instr_len);
609 #endif
610
611         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
612         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
613     }
614
615
616     return 0;
617 }
618
619
620
621 static struct vmx_exit_info exit_log[10];
622
623 static void print_exit_log(struct guest_info * info) {
624     int cnt = info->num_exits % 10;
625     int i = 0;
626     
627
628     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
629
630     for (i = 0; i < 10; i++) {
631         struct vmx_exit_info * tmp = &exit_log[cnt];
632
633         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
634         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
635         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
636         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
637         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
638
639         cnt--;
640
641         if (cnt == -1) {
642             cnt = 9;
643         }
644
645     }
646
647 }
648
649 /* 
650  * CAUTION and DANGER!!! 
651  * 
652  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
653  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
654  * on its contents will cause things to break. The contents at the time of the exit WILL 
655  * change before the exit handler is executed.
656  */
657 int v3_vmx_enter(struct guest_info * info) {
658     int ret = 0;
659     uint32_t tsc_offset_low, tsc_offset_high;
660     struct vmx_exit_info exit_info;
661
662     // Conditionally yield the CPU if the timeslice has expired
663     v3_yield_cond(info);
664
665     /* If this guest is frequency-lagged behind host time, wait 
666      * for the appropriate host time before resuming the guest. */
667     v3_adjust_time(info);
668
669     // v3_print_guest_state(info);
670
671     // disable global interrupts for vm state transition
672     v3_disable_ints();
673
674     v3_vmx_restore_vmcs(info);
675
676
677 #ifdef CONFIG_SYMCALL
678     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
679         update_irq_entry_state(info);
680     }
681 #else 
682     update_irq_entry_state(info);
683 #endif
684
685     {
686         addr_t guest_cr3;
687         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
688         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
689     }
690
691     v3_update_timers(info);
692
693     tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
694     tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
695     check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
696     check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
697
698     if (info->vm_info->run_state == VM_STOPPED) {
699         info->vm_info->run_state = VM_RUNNING;
700         ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
701     } else {
702         ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
703     }
704
705     //  PrintDebug("VMX Exit: ret=%d\n", ret);
706
707     if (ret != VMX_SUCCESS) {
708         uint32_t error = 0;
709
710         vmcs_read(VMCS_INSTR_ERR, &error);
711         PrintError("VMENTRY Error: %d\n", error);
712
713         return -1;
714     }
715
716     info->num_exits++;
717
718     /* Update guest state */
719     v3_vmx_save_vmcs(info);
720
721     // info->cpl = info->segments.cs.selector & 0x3;
722
723     info->mem_mode = v3_get_vm_mem_mode(info);
724     info->cpu_mode = v3_get_vm_cpu_mode(info);
725
726
727     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
728     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
729     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
730     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
731     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
732     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
733     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
734
735     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
736
737     exit_log[info->num_exits % 10] = exit_info;
738
739
740 #ifdef CONFIG_SYMCALL
741     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
742         update_irq_exit_state(info);
743     }
744 #else
745     update_irq_exit_state(info);
746 #endif
747
748     // reenable global interrupts after vm exit
749     v3_enable_ints();
750
751     // Conditionally yield the CPU if the timeslice has expired
752     v3_yield_cond(info);
753
754     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
755         PrintError("Error in VMX exit handler\n");
756         return -1;
757     }
758
759     return 0;
760 }
761
762
763 int v3_start_vmx_guest(struct guest_info * info) {
764
765     PrintDebug("Starting VMX core %u\n", info->cpu_id);
766
767     if (info->cpu_id == 0) {
768         info->core_run_state = CORE_RUNNING;
769         info->vm_info->run_state = VM_RUNNING;
770     } else {
771
772         PrintDebug("VMX core %u: Waiting for core initialization\n", info->cpu_id);
773
774         while (info->core_run_state == CORE_STOPPED) {
775             v3_yield(info);
776             //PrintDebug("VMX core %u: still waiting for INIT\n",info->cpu_id);
777         }
778         
779         PrintDebug("VMX core %u initialized\n", info->cpu_id);
780     }
781
782
783     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
784                info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
785                info->segments.cs.limit, (void *)(info->rip));
786
787
788     PrintDebug("VMX core %u: Launching VMX VM\n", info->cpu_id);
789
790     v3_start_time(info);
791
792     while (1) {
793
794         if (info->vm_info->run_state == VM_STOPPED) {
795             info->core_run_state = CORE_STOPPED;
796             break;
797         }
798
799         if (v3_vmx_enter(info) == -1) {
800             v3_print_vmcs();
801             print_exit_log(info);
802             return -1;
803         }
804
805
806
807         if (info->vm_info->run_state == VM_STOPPED) {
808             info->core_run_state = CORE_STOPPED;
809             break;
810         }
811 /*
812         if ((info->num_exits % 5000) == 0) {
813             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
814         }
815 */
816
817     }
818
819     return 0;
820 }
821
822
823 int v3_is_vmx_capable() {
824     v3_msr_t feature_msr;
825     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
826
827     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
828
829     PrintDebug("ECX: 0x%x\n", ecx);
830
831     if (ecx & CPUID_1_ECX_VTXFLAG) {
832         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
833         
834         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
835
836         if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
837             PrintDebug("VMX is locked -- enable in the BIOS\n");
838             return 0;
839         }
840
841     } else {
842         PrintDebug("VMX not supported on this cpu\n");
843         return 0;
844     }
845
846     return 1;
847 }
848
849 static int has_vmx_nested_paging() {
850     return 0;
851 }
852
853
854
855 void v3_init_vmx_cpu(int cpu_id) {
856     extern v3_cpu_arch_t v3_cpu_types[];
857     struct v3_msr tmp_msr;
858     uint64_t ret = 0;
859
860     v3_get_msr(VMX_CR4_FIXED0_MSR,&(tmp_msr.hi),&(tmp_msr.lo));
861 #ifdef __V3_64BIT__
862     __asm__ __volatile__ (
863                           "movq %%cr4, %%rbx;"
864                           "orq  $0x00002000, %%rbx;"
865                           "movq %%rbx, %0;"
866                           : "=m"(ret) 
867                           :
868                           : "%rbx"
869                           );
870
871     if ((~ret & tmp_msr.value) == 0) {
872         __asm__ __volatile__ (
873                               "movq %0, %%cr4;"
874                               :
875                               : "q"(ret)
876                               );
877     } else {
878         PrintError("Invalid CR4 Settings!\n");
879         return;
880     }
881
882     __asm__ __volatile__ (
883                           "movq %%cr0, %%rbx; "
884                           "orq  $0x00000020,%%rbx; "
885                           "movq %%rbx, %%cr0;"
886                           :
887                           :
888                           : "%rbx"
889                           );
890 #elif __V3_32BIT__
891     __asm__ __volatile__ (
892                           "movl %%cr4, %%ecx;"
893                           "orl  $0x00002000, %%ecx;"
894                           "movl %%ecx, %0;"
895                           : "=m"(ret) 
896                           :
897                           : "%ecx"
898                           );
899
900     if ((~ret & tmp_msr.value) == 0) {
901         __asm__ __volatile__ (
902                               "movl %0, %%cr4;"
903                               :
904                               : "q"(ret)
905                               );
906     } else {
907         PrintError("Invalid CR4 Settings!\n");
908         return;
909     }
910
911     __asm__ __volatile__ (
912                           "movl %%cr0, %%ecx; "
913                           "orl  $0x00000020,%%ecx; "
914                           "movl %%ecx, %%cr0;"
915                           :
916                           :
917                           : "%ecx"
918                           );
919
920 #endif
921
922     //
923     // Should check and return Error here.... 
924
925
926     // Setup VMXON Region
927     host_vmcs_ptrs[cpu_id] = allocate_vmcs();
928
929     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);
930
931     if (v3_enable_vmx(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
932         PrintDebug("VMX Enabled\n");
933     } else {
934         PrintError("VMX initialization failure\n");
935         return;
936     }
937     
938
939     if (has_vmx_nested_paging() == 1) {
940         v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
941     } else {
942         v3_cpu_types[cpu_id] = V3_VMX_CPU;
943     }
944
945 }
946