Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


vmx updates
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Peter Dinda <pdinda@northwestern.edu> 
11  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
13  * All rights reserved.
14  *
15  * Author: Peter Dinda <pdinda@northwestern.edu>
16  *         Jack Lange <jarusl@cs.northwestern.edu>
17  *
18  * This is free software.  You are permitted to use,
19  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20  */
21
22
23 #include <palacios/vmx.h>
24 #include <palacios/vmm.h>
25 #include <palacios/vmx_handler.h>
26 #include <palacios/vmcs.h>
27 #include <palacios/vmx_lowlevel.h>
28 #include <palacios/vmm_lowlevel.h>
29 #include <palacios/vmm_ctrl_regs.h>
30 #include <palacios/vmm_config.h>
31 #include <palacios/vmm_time.h>
32 #include <palacios/vm_guest_mem.h>
33 #include <palacios/vmm_direct_paging.h>
34 #include <palacios/vmx_io.h>
35 #include <palacios/vmx_msr.h>
36
37 #include <palacios/vmx_hw_info.h>
38
39 #ifndef CONFIG_DEBUG_VMX
40 #undef PrintDebug
41 #define PrintDebug(fmt, args...)
42 #endif
43
44
45 /* These fields contain the hardware feature sets supported by the local CPU */
46 static struct vmx_hw_info hw_info;
47
48
49 static addr_t active_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
50 static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
51
52 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
53 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
54
55 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
56     int ret = 0;
57
58     ret = vmcs_write(field, val);
59
60     if (ret != VMX_SUCCESS) {
61         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
62         return 1;
63     }
64
65     return 0;
66 }
67
68 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
69     int ret = 0;
70
71     ret = vmcs_read(field, val);
72
73     if (ret != VMX_SUCCESS) {
74         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
75     }
76
77     return ret;
78 }
79
80
81
82
83 static addr_t allocate_vmcs() {
84     struct vmcs_data * vmcs_page = NULL;
85
86     PrintDebug("Allocating page\n");
87
88     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
89     memset(vmcs_page, 0, 4096);
90
91     vmcs_page->revision = hw_info.basic_info.revision;
92     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
93
94     return (addr_t)V3_PAddr((void *)vmcs_page);
95 }
96
97
98
99
100 static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state) {
101     int vmx_ret = 0;
102
103     // disable global interrupts for vm state initialization
104     v3_disable_ints();
105
106     PrintDebug("Loading VMCS\n");
107     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
108     active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
109     vmx_state->state = VMX_UNLAUNCHED;
110
111     if (vmx_ret != VMX_SUCCESS) {
112         PrintError("VMPTRLD failed\n");
113         return -1;
114     }
115
116
117     /*** Setup default state from HW ***/
118
119     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
120     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
121     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
122     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;;
123
124     /* Print Control MSRs */
125     PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)hw_info.cr0.value);
126     PrintDebug("CR4 MSR: %p\n", (void *)(addr_t)hw_info.cr4.value);
127
128
129
130     /******* Setup Host State **********/
131
132     /* Cache GDTR, IDTR, and TR in host struct */
133     addr_t gdtr_base;
134     struct {
135         uint16_t selector;
136         addr_t   base;
137     } __attribute__((packed)) tmp_seg;
138     
139
140     __asm__ __volatile__(
141                          "sgdt (%0);"
142                          :
143                          : "q"(&tmp_seg)
144                          : "memory"
145                          );
146     gdtr_base = tmp_seg.base;
147     vmx_state->host_state.gdtr.base = gdtr_base;
148
149     __asm__ __volatile__(
150                          "sidt (%0);"
151                          :
152                          : "q"(&tmp_seg)
153                          : "memory"
154                          );
155     vmx_state->host_state.idtr.base = tmp_seg.base;
156
157     __asm__ __volatile__(
158                          "str (%0);"
159                          :
160                          : "q"(&tmp_seg)
161                          : "memory"
162                          );
163     vmx_state->host_state.tr.selector = tmp_seg.selector;
164
165     /* The GDTR *index* is bits 3-15 of the selector. */
166     struct tss_descriptor * desc = NULL;
167     desc = (struct tss_descriptor *)(gdtr_base + (8 * (tmp_seg.selector >> 3)));
168
169     tmp_seg.base = ((desc->base1) |
170                     (desc->base2 << 16) |
171                     (desc->base3 << 24) |
172 #ifdef __V3_64BIT__
173                     ((uint64_t)desc->base4 << 32)
174 #else 
175                     (0)
176 #endif
177                     );
178
179     vmx_state->host_state.tr.base = tmp_seg.base;
180
181   
182
183     /********** Setup and VMX Control Fields from MSR ***********/
184
185
186     /* Add external interrupts, NMI exiting, and virtual NMI */
187     vmx_state->pin_ctrls.nmi_exit = 1;
188     vmx_state->pin_ctrls.ext_int_exit = 1;
189
190     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
191     vmx_state->pri_proc_ctrls.hlt_exit = 1;
192     vmx_state->pri_proc_ctrls.invlpg_exit = 1;
193     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
194     vmx_state->pri_proc_ctrls.pause_exit = 1;
195     vmx_state->pri_proc_ctrls.tsc_offset = 1;
196 #ifdef CONFIG_TIME_VIRTUALIZE_TSC
197     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
198 #endif
199
200     /* Setup IO map */
201     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->vm_info->io_map.arch_data));
202     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
203             (addr_t)V3_PAddr(info->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
204
205
206     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(info->vm_info->msr_map.arch_data));
207
208
209     vmx_state->exit_ctrls.host_64_on = 1;
210
211     if ((vmx_state->exit_ctrls.save_efer == 1) || (vmx_state->exit_ctrls.ld_efer == 1)) {
212         vmx_state->ia32e_avail = 1;
213     }
214
215
216     /******* Setup VMXAssist guest state ***********/
217
218     info->rip = 0xd0000;
219     info->vm_regs.rsp = 0x80000;
220     info->ctrl_regs.rflags->rsvd1 = 1;
221
222 #define GUEST_CR0 0x80000031
223 #define GUEST_CR4 0x00002000
224     info->ctrl_regs.cr0 = GUEST_CR0;
225     info->ctrl_regs.cr4 = GUEST_CR4;
226
227     ((struct cr0_32 *)&(info->shdw_pg_state.guest_cr0))->pe = 1;
228    
229     /* Setup paging */
230     if (info->shdw_pg_mode == SHADOW_PAGING) {
231         PrintDebug("Creating initial shadow page table\n");
232
233         if (v3_init_passthrough_pts(info) == -1) {
234             PrintError("Could not initialize passthrough page tables\n");
235             return -1;
236         }
237         
238 #define CR0_PE 0x00000001
239 #define CR0_PG 0x80000000
240
241
242         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG) );
243         vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
244
245         info->ctrl_regs.cr3 = info->direct_map_pt;
246
247         // vmx_state->pinbased_ctrls |= NMI_EXIT;
248
249         /* Add CR exits */
250         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
251         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
252         
253         /* Add page fault exits */
254         vmx_state->excp_bmap.pf = 1;
255     }
256
257     // Setup segment registers
258     {
259         struct v3_segment * seg_reg = (struct v3_segment *)&(info->segments);
260
261         int i;
262
263         for (i = 0; i < 10; i++) {
264             seg_reg[i].selector = 3 << 3;
265             seg_reg[i].limit = 0xffff;
266             seg_reg[i].base = 0x0;
267         }
268
269         info->segments.cs.selector = 2<<3;
270
271         /* Set only the segment registers */
272         for (i = 0; i < 6; i++) {
273             seg_reg[i].limit = 0xfffff;
274             seg_reg[i].granularity = 1;
275             seg_reg[i].type = 3;
276             seg_reg[i].system = 1;
277             seg_reg[i].dpl = 0;
278             seg_reg[i].present = 1;
279             seg_reg[i].db = 1;
280         }
281
282         info->segments.cs.type = 0xb;
283
284         info->segments.ldtr.selector = 0x20;
285         info->segments.ldtr.type = 2;
286         info->segments.ldtr.system = 0;
287         info->segments.ldtr.present = 1;
288         info->segments.ldtr.granularity = 0;
289
290     
291         /************* Map in GDT and vmxassist *************/
292
293         uint64_t  gdt[] __attribute__ ((aligned(32))) = {
294             0x0000000000000000ULL,              /* 0x00: reserved */
295             0x0000830000000000ULL,              /* 0x08: 32-bit TSS */
296             //0x0000890000000000ULL,            /* 0x08: 32-bit TSS */
297             0x00CF9b000000FFFFULL,              /* 0x10: CS 32-bit */
298             0x00CF93000000FFFFULL,              /* 0x18: DS 32-bit */
299             0x000082000000FFFFULL,              /* 0x20: LDTR 32-bit */
300         };
301
302 #define VMXASSIST_GDT   0x10000
303         addr_t vmxassist_gdt = 0;
304
305         if (v3_gpa_to_hva(info, VMXASSIST_GDT, &vmxassist_gdt) == -1) {
306             PrintError("Could not find VMXASSIST GDT destination\n");
307             return -1;
308         }
309
310         memcpy((void *)vmxassist_gdt, gdt, sizeof(uint64_t) * 5);
311         
312         info->segments.gdtr.base = VMXASSIST_GDT;
313
314 #define VMXASSIST_TSS   0x40000
315         uint64_t vmxassist_tss = VMXASSIST_TSS;
316         gdt[0x08 / sizeof(gdt[0])] |=
317             ((vmxassist_tss & 0xFF000000) << (56 - 24)) |
318             ((vmxassist_tss & 0x00FF0000) << (32 - 16)) |
319             ((vmxassist_tss & 0x0000FFFF) << (16)) |
320             (8392 - 1);
321
322         info->segments.tr.selector = 0x08;
323         info->segments.tr.base = vmxassist_tss;
324
325         //info->segments.tr.type = 0x9; 
326         info->segments.tr.type = 0x3;
327         info->segments.tr.system = 0;
328         info->segments.tr.present = 1;
329         info->segments.tr.granularity = 0;
330     }
331  
332     // setup VMXASSIST
333     { 
334 #define VMXASSIST_START 0x000d0000
335         extern uint8_t v3_vmxassist_start[];
336         extern uint8_t v3_vmxassist_end[];
337         addr_t vmxassist_dst = 0;
338
339         if (v3_gpa_to_hva(info, VMXASSIST_START, &vmxassist_dst) == -1) {
340             PrintError("Could not find VMXASSIST destination\n");
341             return -1;
342         }
343
344         memcpy((void *)vmxassist_dst, v3_vmxassist_start, v3_vmxassist_end - v3_vmxassist_start);
345
346
347         vmx_state->assist_state = VMXASSIST_DISABLED;
348     }    
349
350
351
352
353     /* Sanity check ctrl/reg fields against hw_defaults */
354
355
356
357     /*** Write all the info to the VMCS ***/
358   
359     {
360 #define DEBUGCTL_MSR 0x1d9
361         struct v3_msr tmp_msr;
362         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
363         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
364         info->dbg_regs.dr7 = 0x400;
365     }
366
367
368 #ifdef __V3_64BIT__
369     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
370 #else
371     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
372     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
373 #endif
374
375
376  
377
378     if (v3_update_vmcs_ctrl_fields(info)) {
379         PrintError("Could not write control fields!\n");
380         return -1;
381     }
382     
383     if (v3_update_vmcs_host_state(info)) {
384         PrintError("Could not write host state\n");
385         return -1;
386     }
387
388
389
390     // reenable global interrupts for vm state initialization now
391     // that the vm state is initialized. If another VM kicks us off, 
392     // it'll update our vmx state so that we know to reload ourself
393     v3_enable_ints();
394
395     return 0;
396 }
397
398 int v3_init_vmx_vmcs(struct guest_info * info, v3_vm_class_t vm_class) {
399     struct vmx_data * vmx_state = NULL;
400     int vmx_ret = 0;
401     
402     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
403     memset(vmx_state, 0, sizeof(struct vmx_data));
404
405     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
406
407     PrintDebug("Allocating VMCS\n");
408     vmx_state->vmcs_ptr_phys = allocate_vmcs();
409
410     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
411
412     info->vmm_data = vmx_state;
413     vmx_state->state = VMX_UNLAUNCHED;
414
415     PrintDebug("Initializing VMCS (addr=%p)\n", info->vmm_data);
416     
417     // TODO: Fix vmcs fields so they're 32-bit
418
419     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
420     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
421
422     if (vmx_ret != VMX_SUCCESS) {
423         PrintError("VMCLEAR failed\n");
424         return -1; 
425     }
426
427     if (vm_class == V3_PC_VM) {
428         PrintDebug("Initializing VMCS\n");
429         init_vmcs_bios(info, vmx_state);
430     } else {
431         PrintError("Invalid VM Class\n");
432         return -1;
433     }
434
435     return 0;
436 }
437
438
439 int v3_deinit_vmx_vmcs(struct guest_info * core) {
440     struct vmx_data * vmx_state = core->vmm_data;
441
442     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
443
444     V3_Free(vmx_state);
445
446     return 0;
447 }
448
449
450 static int update_irq_exit_state(struct guest_info * info) {
451     struct vmx_exit_idt_vec_info idt_vec_info;
452
453     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
454
455     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
456 #ifdef CONFIG_DEBUG_INTERRUPTS
457         PrintDebug("Calling v3_injecting_intr\n");
458 #endif
459         info->intr_core_state.irq_started = 0;
460         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
461     }
462
463     return 0;
464 }
465
466 static int update_irq_entry_state(struct guest_info * info) {
467     struct vmx_exit_idt_vec_info idt_vec_info;
468     struct vmcs_interrupt_state intr_core_state;
469     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
470
471     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
472     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
473
474     /* Check for pending exceptions to inject */
475     if (v3_excp_pending(info)) {
476         struct vmx_entry_int_info int_info;
477         int_info.value = 0;
478
479         // In VMX, almost every exception is hardware
480         // Software exceptions are pretty much only for breakpoint or overflow
481         int_info.type = 3;
482         int_info.vector = v3_get_excp_number(info);
483
484         if (info->excp_state.excp_error_code_valid) {
485             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
486             int_info.error_code = 1;
487
488 #ifdef CONFIG_DEBUG_INTERRUPTS
489             PrintDebug("Injecting exception %d with error code %x\n", 
490                     int_info.vector, info->excp_state.excp_error_code);
491 #endif
492         }
493
494         int_info.valid = 1;
495 #ifdef CONFIG_DEBUG_INTERRUPTS
496         PrintDebug("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
497 #endif
498         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
499
500         v3_injecting_excp(info, int_info.vector);
501
502     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
503                (intr_core_state.val == 0)) {
504        
505         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
506
507 #ifdef CONFIG_DEBUG_INTERRUPTS
508             PrintDebug("IRQ pending from previous injection\n");
509 #endif
510
511             // Copy the IDT vectoring info over to reinject the old interrupt
512             if (idt_vec_info.error_code == 1) {
513                 uint32_t err_code = 0;
514
515                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
516                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
517             }
518
519             idt_vec_info.undef = 0;
520             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
521
522         } else {
523             struct vmx_entry_int_info ent_int;
524             ent_int.value = 0;
525
526             switch (v3_intr_pending(info)) {
527                 case V3_EXTERNAL_IRQ: {
528                     info->intr_core_state.irq_vector = v3_get_intr(info); 
529                     ent_int.vector = info->intr_core_state.irq_vector;
530                     ent_int.type = 0;
531                     ent_int.error_code = 0;
532                     ent_int.valid = 1;
533
534 #ifdef CONFIG_DEBUG_INTERRUPTS
535                     PrintDebug("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
536                                info->intr_core_state.irq_vector, 
537                                (uint32_t)info->num_exits, 
538                                (void *)(addr_t)info->rip);
539 #endif
540
541                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
542                     info->intr_core_state.irq_started = 1;
543
544                     break;
545                 }
546                 case V3_NMI:
547                     PrintDebug("Injecting NMI\n");
548
549                     ent_int.type = 2;
550                     ent_int.vector = 2;
551                     ent_int.valid = 1;
552                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
553
554                     break;
555                 case V3_SOFTWARE_INTR:
556                     PrintDebug("Injecting software interrupt\n");
557                     ent_int.type = 4;
558
559                     ent_int.valid = 1;
560                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
561
562                     break;
563                 case V3_VIRTUAL_IRQ:
564                     // Not sure what to do here, Intel doesn't have virtual IRQs
565                     // May be the same as external interrupts/IRQs
566
567                     break;
568                 case V3_INVALID_INTR:
569                 default:
570                     break;
571             }
572         }
573     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
574         // Enable INTR window exiting so we know when IF=1
575         uint32_t instr_len;
576
577         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
578
579 #ifdef CONFIG_DEBUG_INTERRUPTS
580         PrintDebug("Enabling Interrupt-Window exiting: %d\n", instr_len);
581 #endif
582
583         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
584         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
585     }
586
587
588     return 0;
589 }
590
591
592
593 static struct vmx_exit_info exit_log[10];
594
595 static void print_exit_log(struct guest_info * info) {
596     int cnt = info->num_exits % 10;
597     int i = 0;
598     
599
600     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
601
602     for (i = 0; i < 10; i++) {
603         struct vmx_exit_info * tmp = &exit_log[cnt];
604
605         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
606         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
607         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
608         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
609         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
610
611         cnt--;
612
613         if (cnt == -1) {
614             cnt = 9;
615         }
616
617     }
618
619 }
620
621 /* 
622  * CAUTION and DANGER!!! 
623  * 
624  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
625  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
626  * on its contents will cause things to break. The contents at the time of the exit WILL 
627  * change before the exit handler is executed.
628  */
629 int v3_vmx_enter(struct guest_info * info) {
630     int ret = 0;
631     uint32_t tsc_offset_low, tsc_offset_high;
632     struct vmx_exit_info exit_info;
633     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
634
635     // Conditionally yield the CPU if the timeslice has expired
636     v3_yield_cond(info);
637
638     // Perform any additional yielding needed for time adjustment
639     v3_adjust_time(info);
640
641     // Update timer devices prior to entering VM.
642     v3_update_timers(info);
643
644     // disable global interrupts for vm state transition
645     v3_disable_ints();
646
647
648     if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) {
649         vmcs_load(vmx_info->vmcs_ptr_phys);
650         active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
651     }
652
653
654     v3_vmx_restore_vmcs(info);
655
656
657 #ifdef CONFIG_SYMCALL
658     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
659         update_irq_entry_state(info);
660     }
661 #else 
662     update_irq_entry_state(info);
663 #endif
664
665     {
666         addr_t guest_cr3;
667         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
668         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
669     }
670
671     // Perform last-minute time bookkeeping prior to entering the VM
672     v3_time_enter_vm(info);
673
674     tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
675     tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
676     check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
677     check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
678
679
680     if (vmx_info->state == VMX_UNLAUNCHED) {
681         vmx_info->state = VMX_LAUNCHED;
682         info->vm_info->run_state = VM_RUNNING;
683         ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
684     } else {
685         V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
686         ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
687     }
688     
689     //  PrintDebug("VMX Exit: ret=%d\n", ret);
690
691     if (ret != VMX_SUCCESS) {
692         uint32_t error = 0;
693
694         vmcs_read(VMCS_INSTR_ERR, &error);
695         PrintError("VMENTRY Error: %d\n", error);
696
697         return -1;
698     }
699
700     // Immediate exit from VM time bookkeeping
701     v3_time_exit_vm(info);
702
703     info->num_exits++;
704
705     /* Update guest state */
706     v3_vmx_save_vmcs(info);
707
708     // info->cpl = info->segments.cs.selector & 0x3;
709
710     info->mem_mode = v3_get_vm_mem_mode(info);
711     info->cpu_mode = v3_get_vm_cpu_mode(info);
712
713
714     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
715     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
716     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
717     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
718     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
719     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
720     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
721
722     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
723
724     exit_log[info->num_exits % 10] = exit_info;
725
726
727 #ifdef CONFIG_SYMCALL
728     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
729         update_irq_exit_state(info);
730     }
731 #else
732     update_irq_exit_state(info);
733 #endif
734
735     if (exit_info.exit_reason == VMEXIT_INTR_WINDOW) {
736         // This is a special case whose only job is to inject an interrupt
737         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
738         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
739         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
740
741 #ifdef CONFIG_DEBUG_INTERRUPTS
742         PrintDebug("Interrupts available again! (RIP=%llx)\n", info->rip);
743 #endif
744     }
745
746     // reenable global interrupts after vm exit
747     v3_enable_ints();
748
749     // Conditionally yield the CPU if the timeslice has expired
750     v3_yield_cond(info);
751
752     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
753         PrintError("Error in VMX exit handler\n");
754         return -1;
755     }
756
757     return 0;
758 }
759
760
761 int v3_start_vmx_guest(struct guest_info * info) {
762
763     PrintDebug("Starting VMX core %u\n", info->cpu_id);
764
765     if (info->cpu_id == 0) {
766         info->core_run_state = CORE_RUNNING;
767         info->vm_info->run_state = VM_RUNNING;
768     } else {
769
770         PrintDebug("VMX core %u: Waiting for core initialization\n", info->cpu_id);
771
772         while (info->core_run_state == CORE_STOPPED) {
773             v3_yield(info);
774             //PrintDebug("VMX core %u: still waiting for INIT\n",info->cpu_id);
775         }
776         
777         PrintDebug("VMX core %u initialized\n", info->cpu_id);
778     }
779
780
781     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
782                info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
783                info->segments.cs.limit, (void *)(info->rip));
784
785
786     PrintDebug("VMX core %u: Launching VMX VM\n", info->cpu_id);
787
788     v3_start_time(info);
789
790     while (1) {
791
792         if (info->vm_info->run_state == VM_STOPPED) {
793             info->core_run_state = CORE_STOPPED;
794             break;
795         }
796
797         if (v3_vmx_enter(info) == -1) {
798             v3_print_vmcs();
799             print_exit_log(info);
800             return -1;
801         }
802
803
804
805         if (info->vm_info->run_state == VM_STOPPED) {
806             info->core_run_state = CORE_STOPPED;
807             break;
808         }
809 /*
810         if ((info->num_exits % 5000) == 0) {
811             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
812         }
813 */
814
815     }
816
817     return 0;
818 }
819
820
821
822
823 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
824 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
825 #define CPUID_1_ECX_VTXFLAG 0x00000020
826
827 int v3_is_vmx_capable() {
828     v3_msr_t feature_msr;
829     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
830
831     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
832
833     PrintDebug("ECX: 0x%x\n", ecx);
834
835     if (ecx & CPUID_1_ECX_VTXFLAG) {
836         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
837         
838         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
839
840         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
841             PrintDebug("VMX is locked -- enable in the BIOS\n");
842             return 0;
843         }
844
845     } else {
846         PrintDebug("VMX not supported on this cpu\n");
847         return 0;
848     }
849
850     return 1;
851 }
852
853
854
855
856
857
858 void v3_init_vmx_cpu(int cpu_id) {
859     extern v3_cpu_arch_t v3_cpu_types[];
860
861     if (cpu_id == 0) {
862         if (v3_init_vmx_hw(&hw_info) == -1) {
863             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
864             return;
865         }
866     }
867
868
869     enable_vmx();
870
871
872     // Setup VMXON Region
873     host_vmcs_ptrs[cpu_id] = allocate_vmcs();
874
875     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);
876
877     if (vmx_on(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
878         PrintDebug("VMX Enabled\n");
879     } else {
880         PrintError("VMX initialization failure\n");
881         return;
882     }
883     
884
885     v3_cpu_types[cpu_id] = V3_VMX_CPU;
886
887
888 }
889
890
891 void v3_deinit_vmx_cpu(int cpu_id) {
892     extern v3_cpu_arch_t v3_cpu_types[];
893     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
894     V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
895 }