Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Fixed bug with VMX interrupt injection
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Peter Dinda <pdinda@northwestern.edu> 
11  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
13  * All rights reserved.
14  *
15  * Author: Peter Dinda <pdinda@northwestern.edu>
16  *         Jack Lange <jarusl@cs.northwestern.edu>
17  *
18  * This is free software.  You are permitted to use,
19  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20  */
21
22
23 #include <palacios/vmx.h>
24 #include <palacios/vmm.h>
25 #include <palacios/vmx_handler.h>
26 #include <palacios/vmcs.h>
27 #include <palacios/vmx_lowlevel.h>
28 #include <palacios/vmm_lowlevel.h>
29 #include <palacios/vmm_ctrl_regs.h>
30 #include <palacios/vmm_config.h>
31 #include <palacios/vm_guest_mem.h>
32 #include <palacios/vmm_direct_paging.h>
33 #include <palacios/vmx_io.h>
34 #include <palacios/vmx_msr.h>
35
36 static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
37
38
39
40 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
41 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
42
43 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
44     int ret = 0;
45
46     ret = vmcs_write(field,val);
47
48     if (ret != VMX_SUCCESS) {
49         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
50         return 1;
51     }
52
53     return 0;
54 }
55
56 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
57     int ret = 0;
58
59     ret = vmcs_read(field, val);
60
61     if (ret != VMX_SUCCESS) {
62         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
63     }
64
65     return ret;
66 }
67
68 #if 0
69 // For the 32 bit reserved bit fields 
70 // MB1s are in the low 32 bits, MBZs are in the high 32 bits of the MSR
71 static uint32_t sanitize_bits1(uint32_t msr_num, uint32_t val) {
72     v3_msr_t mask_msr;
73
74     PrintDebug("sanitize_bits1 (MSR:%x)\n", msr_num);
75
76     v3_get_msr(msr_num, &mask_msr.hi, &mask_msr.lo);
77
78     PrintDebug("MSR %x = %x : %x \n", msr_num, mask_msr.hi, mask_msr.lo);
79
80     val |= mask_msr.lo;
81     val |= mask_msr.hi;
82   
83     return val;
84 }
85
86
87
88 static addr_t sanitize_bits2(uint32_t msr_num0, uint32_t msr_num1, addr_t val) {
89     v3_msr_t msr0, msr1;
90     addr_t msr0_val, msr1_val;
91
92     PrintDebug("sanitize_bits2 (MSR0=%x, MSR1=%x)\n", msr_num0, msr_num1);
93
94     v3_get_msr(msr_num0, &msr0.hi, &msr0.lo);
95     v3_get_msr(msr_num1, &msr1.hi, &msr1.lo);
96   
97     // This generates a mask that is the natural bit width of the CPU
98     msr0_val = msr0.value;
99     msr1_val = msr1.value;
100
101     PrintDebug("MSR %x = %p, %x = %p \n", msr_num0, (void*)msr0_val, msr_num1, (void*)msr1_val);
102
103     val |= msr0_val;
104     val |= msr1_val;
105
106     return val;
107 }
108
109
110
111 #endif
112
113
114 static addr_t allocate_vmcs() {
115     reg_ex_t msr;
116     struct vmcs_data * vmcs_page = NULL;
117
118     PrintDebug("Allocating page\n");
119
120     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
121     memset(vmcs_page, 0, 4096);
122
123     v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
124     
125     vmcs_page->revision = ((struct vmx_basic_msr*)&msr)->revision;
126     PrintDebug("VMX Revision: 0x%x\n",vmcs_page->revision);
127
128     return (addr_t)V3_PAddr((void *)vmcs_page);
129 }
130
131
132
133
134 static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state) {
135     int vmx_ret = 0;
136
137     PrintDebug("Loading VMCS\n");
138     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
139
140     if (vmx_ret != VMX_SUCCESS) {
141         PrintError("VMPTRLD failed\n");
142         return -1;
143     }
144
145
146
147     /******* Setup Host State **********/
148
149     /* Cache GDTR, IDTR, and TR in host struct */
150     addr_t gdtr_base;
151     struct {
152         uint16_t selector;
153         addr_t   base;
154     } __attribute__((packed)) tmp_seg;
155     
156
157     __asm__ __volatile__(
158                          "sgdt (%0);"
159                          :
160                          : "q"(&tmp_seg)
161                          : "memory"
162                          );
163     gdtr_base = tmp_seg.base;
164     vmx_state->host_state.gdtr.base = gdtr_base;
165
166     __asm__ __volatile__(
167                          "sidt (%0);"
168                          :
169                          : "q"(&tmp_seg)
170                          : "memory"
171                          );
172     vmx_state->host_state.idtr.base = tmp_seg.base;
173
174     __asm__ __volatile__(
175                          "str (%0);"
176                          :
177                          : "q"(&tmp_seg)
178                          : "memory"
179                          );
180     vmx_state->host_state.tr.selector = tmp_seg.selector;
181
182     /* The GDTR *index* is bits 3-15 of the selector. */
183     struct tss_descriptor * desc = NULL;
184     desc = (struct tss_descriptor *)(gdtr_base + (8 * (tmp_seg.selector >> 3)));
185
186     tmp_seg.base = ((desc->base1) |
187                     (desc->base2 << 16) |
188                     (desc->base3 << 24) |
189 #ifdef __V3_64BIT__
190                     ((uint64_t)desc->base4 << 32)
191 #else 
192                     (0)
193 #endif
194                     );
195
196     vmx_state->host_state.tr.base = tmp_seg.base;
197
198   
199
200     /********** Setup and VMX Control Fields from MSR ***********/
201     /* Setup IO map */
202     v3_init_vmx_io_map(info);
203     v3_init_vmx_msr_map(info);
204
205     struct v3_msr tmp_msr;
206
207     v3_get_msr(VMX_PINBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
208
209     /* Add external interrupts, NMI exiting, and virtual NMI */
210     vmx_state->pin_ctrls.value =  tmp_msr.lo;
211     vmx_state->pin_ctrls.nmi_exit = 1;
212     vmx_state->pin_ctrls.ext_int_exit = 1;
213
214     v3_get_msr(VMX_PROCBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
215
216     vmx_state->pri_proc_ctrls.value = tmp_msr.lo;
217     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
218     vmx_state->pri_proc_ctrls.hlt_exit = 1;
219     vmx_state->pri_proc_ctrls.invlpg_exit = 1;
220     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
221     vmx_state->pri_proc_ctrls.pause_exit = 1;
222
223     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->io_map.arch_data));
224     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
225             (addr_t)V3_PAddr(info->io_map.arch_data) + PAGE_SIZE_4KB); 
226
227     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(info->msr_map.arch_data));
228
229     v3_get_msr(VMX_EXIT_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
230     vmx_state->exit_ctrls.value = tmp_msr.lo;
231     vmx_state->exit_ctrls.host_64_on = 1;
232
233     if ((vmx_state->exit_ctrls.save_efer == 1) || (vmx_state->exit_ctrls.ld_efer == 1)) {
234         vmx_state->ia32e_avail = 1;
235     }
236
237     v3_get_msr(VMX_ENTRY_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
238     vmx_state->entry_ctrls.value = tmp_msr.lo;
239
240     {
241         struct vmx_exception_bitmap excp_bmap;
242         excp_bmap.value = 0;
243         
244         excp_bmap.pf = 1;
245     
246         vmx_ret |= check_vmcs_write(VMCS_EXCP_BITMAP, excp_bmap.value);
247     }
248     /******* Setup VMXAssist guest state ***********/
249
250     info->rip = 0xd0000;
251     info->vm_regs.rsp = 0x80000;
252
253     struct rflags * flags = (struct rflags *)&(info->ctrl_regs.rflags);
254     flags->rsvd1 = 1;
255
256     /* Print Control MSRs */
257     v3_get_msr(VMX_CR0_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
258     PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
259
260     v3_get_msr(VMX_CR4_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
261     PrintDebug("CR4 MSR: %p\n", (void *)(addr_t)tmp_msr.value);
262
263
264 #define GUEST_CR0 0x80000031
265 #define GUEST_CR4 0x00002000
266     info->ctrl_regs.cr0 = GUEST_CR0;
267     info->ctrl_regs.cr4 = GUEST_CR4;
268
269     ((struct cr0_32 *)&(info->shdw_pg_state.guest_cr0))->pe = 1;
270    
271     /* Setup paging */
272     if (info->shdw_pg_mode == SHADOW_PAGING) {
273         PrintDebug("Creating initial shadow page table\n");
274
275         if (v3_init_passthrough_pts(info) == -1) {
276             PrintError("Could not initialize passthrough page tables\n");
277             return -1;
278         }
279         
280 #define CR0_PE 0x00000001
281 #define CR0_PG 0x80000000
282
283
284         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG) );
285         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
286
287         info->ctrl_regs.cr3 = info->direct_map_pt;
288
289         // vmx_state->pinbased_ctrls |= NMI_EXIT;
290
291         /* Add CR exits */
292         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
293         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
294     }
295
296     // Setup segment registers
297     {
298         struct v3_segment * seg_reg = (struct v3_segment *)&(info->segments);
299
300         int i;
301
302         for (i = 0; i < 10; i++) {
303             seg_reg[i].selector = 3 << 3;
304             seg_reg[i].limit = 0xffff;
305             seg_reg[i].base = 0x0;
306         }
307
308         info->segments.cs.selector = 2<<3;
309
310         /* Set only the segment registers */
311         for (i = 0; i < 6; i++) {
312             seg_reg[i].limit = 0xfffff;
313             seg_reg[i].granularity = 1;
314             seg_reg[i].type = 3;
315             seg_reg[i].system = 1;
316             seg_reg[i].dpl = 0;
317             seg_reg[i].present = 1;
318             seg_reg[i].db = 1;
319         }
320
321         info->segments.cs.type = 0xb;
322
323         info->segments.ldtr.selector = 0x20;
324         info->segments.ldtr.type = 2;
325         info->segments.ldtr.system = 0;
326         info->segments.ldtr.present = 1;
327         info->segments.ldtr.granularity = 0;
328
329     
330         /************* Map in GDT and vmxassist *************/
331
332         uint64_t  gdt[] __attribute__ ((aligned(32))) = {
333             0x0000000000000000ULL,              /* 0x00: reserved */
334             0x0000830000000000ULL,              /* 0x08: 32-bit TSS */
335             //0x0000890000000000ULL,            /* 0x08: 32-bit TSS */
336             0x00CF9b000000FFFFULL,              /* 0x10: CS 32-bit */
337             0x00CF93000000FFFFULL,              /* 0x18: DS 32-bit */
338             0x000082000000FFFFULL,              /* 0x20: LDTR 32-bit */
339         };
340
341 #define VMXASSIST_GDT   0x10000
342         addr_t vmxassist_gdt = 0;
343
344         if (guest_pa_to_host_va(info, VMXASSIST_GDT, &vmxassist_gdt) == -1) {
345             PrintError("Could not find VMXASSIST GDT destination\n");
346             return -1;
347         }
348
349         memcpy((void *)vmxassist_gdt, gdt, sizeof(uint64_t) * 5);
350         
351         info->segments.gdtr.base = VMXASSIST_GDT;
352
353 #define VMXASSIST_TSS   0x40000
354         uint64_t vmxassist_tss = VMXASSIST_TSS;
355         gdt[0x08 / sizeof(gdt[0])] |=
356             ((vmxassist_tss & 0xFF000000) << (56 - 24)) |
357             ((vmxassist_tss & 0x00FF0000) << (32 - 16)) |
358             ((vmxassist_tss & 0x0000FFFF) << (16)) |
359             (8392 - 1);
360
361         info->segments.tr.selector = 0x08;
362         info->segments.tr.base = vmxassist_tss;
363
364         //info->segments.tr.type = 0x9; 
365         info->segments.tr.type = 0x3;
366         info->segments.tr.system = 0;
367         info->segments.tr.present = 1;
368         info->segments.tr.granularity = 0;
369     }
370  
371     // setup VMXASSIST
372     { 
373 #define VMXASSIST_START 0x000d0000
374         extern uint8_t v3_vmxassist_start[];
375         extern uint8_t v3_vmxassist_end[];
376         addr_t vmxassist_dst = 0;
377
378         if (guest_pa_to_host_va(info, VMXASSIST_START, &vmxassist_dst) == -1) {
379             PrintError("Could not find VMXASSIST destination\n");
380             return -1;
381         }
382
383         memcpy((void *)vmxassist_dst, v3_vmxassist_start, v3_vmxassist_end - v3_vmxassist_start);
384     }    
385
386     /*** Write all the info to the VMCS ***/
387
388 #define DEBUGCTL_MSR 0x1d9
389     v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
390     vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
391
392     info->dbg_regs.dr7 = 0x400;
393
394     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
395     
396
397     if (v3_update_vmcs_ctrl_fields(info)) {
398         PrintError("Could not write control fields!\n");
399         return -1;
400     }
401     
402     if (v3_update_vmcs_host_state(info)) {
403         PrintError("Could not write host state\n");
404         return -1;
405     }
406
407
408     vmx_state->state = VMXASSIST_DISABLED;
409
410     return 0;
411 }
412
413 int v3_init_vmx_vmcs(struct guest_info * info, v3_vm_class_t vm_class) {
414     struct vmx_data * vmx_state = NULL;
415     int vmx_ret = 0;
416     
417     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
418
419     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
420
421     PrintDebug("Allocating VMCS\n");
422     vmx_state->vmcs_ptr_phys = allocate_vmcs();
423
424     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
425
426     info->vmm_data = vmx_state;
427
428     PrintDebug("Initializing VMCS (addr=%p)\n", info->vmm_data);
429     
430     // TODO: Fix vmcs fields so they're 32-bit
431
432     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
433     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
434
435     if (vmx_ret != VMX_SUCCESS) {
436         PrintError("VMCLEAR failed\n");
437         return -1; 
438     }
439
440     if (vm_class == V3_PC_VM) {
441         PrintDebug("Initializing VMCS\n");
442         init_vmcs_bios(info, vmx_state);
443     } else {
444         PrintError("Invalid VM Class\n");
445         return -1;
446     }
447
448     return 0;
449 }
450
451 static int update_irq_exit_state(struct guest_info * info) {
452     struct vmx_exit_idt_vec_info idt_vec_info;
453
454     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
455
456     if ((info->intr_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
457 #ifdef CONFIG_DEBUG_INTERRUPTS
458         PrintDebug("Calling v3_injecting_intr\n");
459 #endif
460         info->intr_state.irq_started = 0;
461         v3_injecting_intr(info, info->intr_state.irq_vector, V3_EXTERNAL_IRQ);
462     }
463
464     return 0;
465 }
466
467 static int update_irq_entry_state(struct guest_info * info) {
468     struct vmx_exit_idt_vec_info idt_vec_info;
469     struct vmcs_interrupt_state intr_state;
470     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
471
472     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
473     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_state));
474
475     /* Check for pending exceptions to inject */
476     if (v3_excp_pending(info)) {
477         struct vmx_entry_int_info int_info;
478         int_info.value = 0;
479
480         // In VMX, almost every exception is hardware
481         // Software exceptions are pretty much only for breakpoint or overflow
482         int_info.type = 3;
483         int_info.vector = v3_get_excp_number(info);
484
485         if (info->excp_state.excp_error_code_valid) {
486             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
487             int_info.error_code = 1;
488
489 #ifdef CONFIG_DEBUG_INTERRUPTS
490             PrintDebug("Injecting exception %d with error code %x\n", 
491                     int_info.vector, info->excp_state.excp_error_code);
492 #endif
493         }
494
495         int_info.valid = 1;
496 #ifdef CONFIG_DEBUG_INTERRUPTS
497         PrintDebug("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)info->rip);
498 #endif
499         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
500
501         v3_injecting_excp(info, int_info.vector);
502
503     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
504                (intr_state.val == 0)) {
505        
506         if ((info->intr_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
507
508 #ifdef CONFIG_DEBUG_INTERRUPTS
509             PrintDebug("IRQ pending from previous injection\n");
510 #endif
511
512             // Copy the IDT vectoring info over to reinject the old interrupt
513             if (idt_vec_info.error_code == 1) {
514                 uint32_t err_code = 0;
515
516                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
517                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
518             }
519
520             idt_vec_info.undef = 0;
521             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
522
523         } else {
524             struct vmx_entry_int_info ent_int;
525             ent_int.value = 0;
526
527             switch (v3_intr_pending(info)) {
528                 case V3_EXTERNAL_IRQ: {
529                     info->intr_state.irq_vector = v3_get_intr(info); 
530                     ent_int.vector = info->intr_state.irq_vector;
531                     ent_int.type = 0;
532                     ent_int.error_code = 0;
533                     ent_int.valid = 1;
534
535 #ifdef CONFIG_DEBUG_INTERRUPTS
536                     PrintDebug("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
537                                info->intr_state.irq_vector, 
538                                (uint32_t)info->num_exits, 
539                                (void *)info->rip);
540 #endif
541
542                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
543                     info->intr_state.irq_started = 1;
544
545                     break;
546                 }
547                 case V3_NMI:
548                     PrintDebug("Injecting NMI\n");
549
550                     ent_int.type = 2;
551                     ent_int.vector = 2;
552                     ent_int.valid = 1;
553                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
554
555                     break;
556                 case V3_SOFTWARE_INTR:
557                     PrintDebug("Injecting software interrupt\n");
558                     ent_int.type = 4;
559
560                     ent_int.valid = 1;
561                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
562
563                     break;
564                 case V3_VIRTUAL_IRQ:
565                     // Not sure what to do here, Intel doesn't have virtual IRQs
566                     // May be the same as external interrupts/IRQs
567
568                     break;
569                 case V3_INVALID_INTR:
570                 default:
571                     break;
572             }
573         }
574     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
575         // Enable INTR window exiting so we know when IF=1
576         uint32_t instr_len;
577
578         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
579
580 #ifdef CONFIG_DEBUG_INTERRUPTS
581         PrintDebug("Enabling Interrupt-Window exiting: %d\n", instr_len);
582 #endif
583
584         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
585         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
586     }
587
588
589     return 0;
590 }
591
592
593
594 static struct vmx_exit_info exit_log[10];
595
596 static void print_exit_log(struct guest_info * info) {
597     int cnt = info->num_exits % 10;
598     int i = 0;
599     
600
601     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
602
603     for (i = 0; i < 10; i++) {
604         struct vmx_exit_info * tmp = &exit_log[cnt];
605
606         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
607         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
608         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
609         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
610         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
611
612         cnt--;
613
614         if (cnt == -1) {
615             cnt = 9;
616         }
617
618     }
619
620 }
621
622 /* 
623  * CAUTION and DANGER!!! 
624  * 
625  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
626  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
627  * on its contents will cause things to break. The contents at the time of the exit WILL 
628  * change before the exit handler is executed.
629  */
630 int v3_vmx_enter(struct guest_info * info) {
631     int ret = 0;
632     uint64_t tmp_tsc = 0;
633     struct vmx_exit_info exit_info;
634
635     // Conditionally yield the CPU if the timeslice has expired
636     v3_yield_cond(info);
637
638
639     // v3_print_guest_state(info);
640
641     // disable global interrupts for vm state transition
642     v3_disable_ints();
643
644     v3_vmx_restore_vmcs(info);
645
646
647 #ifdef CONFIG_SYMBIOTIC
648     if (info->sym_state.sym_call_active == 0) {
649         update_irq_entry_state(info);
650     }
651 #else 
652     update_irq_entry_state(info);
653 #endif
654
655
656     rdtscll(info->time_state.cached_host_tsc);
657
658     if (info->run_state == VM_STOPPED) {
659         info->run_state = VM_RUNNING;
660         ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
661     } else {
662         ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
663     }
664
665     //  PrintDebug("VMX Exit: ret=%d\n", ret);
666
667     if (ret != VMX_SUCCESS) {
668         uint32_t error = 0;
669
670         vmcs_read(VMCS_INSTR_ERR, &error);
671         PrintError("VMENTRY Error: %d\n", error);
672
673         return -1;
674     }
675
676     rdtscll(tmp_tsc);
677
678     info->num_exits++;
679
680     v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc);
681
682     /* Update guest state */
683     v3_vmx_save_vmcs(info);
684
685     info->mem_mode = v3_get_vm_mem_mode(info);
686     info->cpu_mode = v3_get_vm_cpu_mode(info);
687
688
689     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
690     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
691     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
692     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
693     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
694     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
695     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
696
697     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
698
699     exit_log[info->num_exits % 10] = exit_info;
700
701
702 #ifdef CONFIG_SYMBIOTIC
703     if (info->sym_state.sym_call_active == 0) {
704         update_irq_exit_state(info);
705     }
706 #else
707     update_irq_exit_state(info);
708 #endif
709
710     // reenable global interrupts after vm exit
711     v3_enable_ints();
712
713     // Conditionally yield the CPU if the timeslice has expired
714     v3_yield_cond(info);
715
716     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
717         PrintError("Error in VMX exit handler\n");
718         return -1;
719     }
720
721     return 0;
722 }
723
724
725 int v3_start_vmx_guest(struct guest_info* info) {
726
727
728     PrintDebug("Launching VMX guest\n");
729
730     rdtscll(info->time_state.cached_host_tsc);
731
732
733     while (1) {
734         if (v3_vmx_enter(info) == -1) {
735             v3_print_vmcs();
736             print_exit_log(info);
737             return -1;
738         }
739
740 /*
741         if ((info->num_exits % 5000) == 0) {
742             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
743         }
744 */
745
746     }
747
748     return 0;
749 }
750
751
752 int v3_is_vmx_capable() {
753     v3_msr_t feature_msr;
754     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
755
756     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
757
758     PrintDebug("ECX: 0x%x\n", ecx);
759
760     if (ecx & CPUID_1_ECX_VTXFLAG) {
761         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
762         
763         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
764
765         if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
766             PrintDebug("VMX is locked -- enable in the BIOS\n");
767             return 0;
768         }
769
770     } else {
771         PrintDebug("VMX not supported on this cpu\n");
772         return 0;
773     }
774
775     return 1;
776 }
777
778 static int has_vmx_nested_paging() {
779     return 0;
780 }
781
782
783
784 void v3_init_vmx_cpu(int cpu_id) {
785     extern v3_cpu_arch_t v3_cpu_types[];
786     struct v3_msr tmp_msr;
787     uint64_t ret = 0;
788
789     v3_get_msr(VMX_CR4_FIXED0_MSR,&(tmp_msr.hi),&(tmp_msr.lo));
790     
791     __asm__ __volatile__ (
792                           "movq %%cr4, %%rbx;"
793                           "orq  $0x00002000, %%rbx;"
794                           "movq %%rbx, %0;"
795                           : "=m"(ret) 
796                           :
797                           : "%rbx"
798                           );
799
800     if ((~ret & tmp_msr.value) == 0) {
801         __asm__ __volatile__ (
802                               "movq %0, %%cr4;"
803                               :
804                               : "q"(ret)
805                               );
806     } else {
807         PrintError("Invalid CR4 Settings!\n");
808         return;
809     }
810
811     __asm__ __volatile__ (
812                           "movq %%cr0, %%rbx; "
813                           "orq  $0x00000020,%%rbx; "
814                           "movq %%rbx, %%cr0;"
815                           :
816                           :
817                           : "%rbx"
818                           );
819     //
820     // Should check and return Error here.... 
821
822
823     // Setup VMXON Region
824     host_vmcs_ptrs[cpu_id] = allocate_vmcs();
825
826     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);
827
828     if (v3_enable_vmx(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
829         PrintDebug("VMX Enabled\n");
830     } else {
831         PrintError("VMX initialization failure\n");
832         return;
833     }
834     
835
836     if (has_vmx_nested_paging() == 1) {
837         v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
838     } else {
839         v3_cpu_types[cpu_id] = V3_VMX_CPU;
840     }
841
842 }
843