Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


can now boot linux (slowly) with EPT enabled
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34
35 #include <palacios/vmx_ept.h>
36 #include <palacios/vmx_assist.h>
37 #include <palacios/vmx_hw_info.h>
38
39 #ifndef CONFIG_DEBUG_VMX
40 #undef PrintDebug
41 #define PrintDebug(fmt, args...)
42 #endif
43
44
45 /* These fields contain the hardware feature sets supported by the local CPU */
46 static struct vmx_hw_info hw_info;
47
48 extern v3_cpu_arch_t v3_cpu_types[];
49
50 static addr_t active_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
51 static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
52
53 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
54 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
55
56 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
57     int ret = 0;
58
59     ret = vmcs_write(field, val);
60
61     if (ret != VMX_SUCCESS) {
62         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
63         return 1;
64     }
65
66     return 0;
67 }
68
69 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
70     int ret = 0;
71
72     ret = vmcs_read(field, val);
73
74     if (ret != VMX_SUCCESS) {
75         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
76     }
77
78     return ret;
79 }
80
81
82
83
84 static addr_t allocate_vmcs() {
85     struct vmcs_data * vmcs_page = NULL;
86
87     PrintDebug("Allocating page\n");
88
89     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
90     memset(vmcs_page, 0, 4096);
91
92     vmcs_page->revision = hw_info.basic_info.revision;
93     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
94
95     return (addr_t)V3_PAddr((void *)vmcs_page);
96 }
97
98
99
100
101 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
102     int vmx_ret = 0;
103
104     // disable global interrupts for vm state initialization
105     v3_disable_ints();
106
107     PrintDebug("Loading VMCS\n");
108     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
109     active_vmcs_ptrs[V3_Get_CPU()] = vmx_state->vmcs_ptr_phys;
110     vmx_state->state = VMX_UNLAUNCHED;
111
112     if (vmx_ret != VMX_SUCCESS) {
113         PrintError("VMPTRLD failed\n");
114         return -1;
115     }
116
117
118     /*** Setup default state from HW ***/
119
120     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
121     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
122     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
123     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
124     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
125
126     /* Print Control MSRs */
127     PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)hw_info.cr0.value);
128     PrintDebug("CR4 MSR: %p\n", (void *)(addr_t)hw_info.cr4.value);
129
130
131
132     /******* Setup Host State **********/
133
134     /* Cache GDTR, IDTR, and TR in host struct */
135     addr_t gdtr_base;
136     struct {
137         uint16_t selector;
138         addr_t   base;
139     } __attribute__((packed)) tmp_seg;
140     
141
142     __asm__ __volatile__(
143                          "sgdt (%0);"
144                          :
145                          : "q"(&tmp_seg)
146                          : "memory"
147                          );
148     gdtr_base = tmp_seg.base;
149     vmx_state->host_state.gdtr.base = gdtr_base;
150
151     __asm__ __volatile__(
152                          "sidt (%0);"
153                          :
154                          : "q"(&tmp_seg)
155                          : "memory"
156                          );
157     vmx_state->host_state.idtr.base = tmp_seg.base;
158
159     __asm__ __volatile__(
160                          "str (%0);"
161                          :
162                          : "q"(&tmp_seg)
163                          : "memory"
164                          );
165     vmx_state->host_state.tr.selector = tmp_seg.selector;
166
167     /* The GDTR *index* is bits 3-15 of the selector. */
168     struct tss_descriptor * desc = NULL;
169     desc = (struct tss_descriptor *)(gdtr_base + (8 * (tmp_seg.selector >> 3)));
170
171     tmp_seg.base = ((desc->base1) |
172                     (desc->base2 << 16) |
173                     (desc->base3 << 24) |
174 #ifdef __V3_64BIT__
175                     ((uint64_t)desc->base4 << 32)
176 #else 
177                     (0)
178 #endif
179                     );
180
181     vmx_state->host_state.tr.base = tmp_seg.base;
182
183
184     /********** Setup VMX Control Fields ***********/
185
186     /* Add external interrupts, NMI exiting, and virtual NMI */
187     vmx_state->pin_ctrls.nmi_exit = 1;
188     vmx_state->pin_ctrls.ext_int_exit = 1;
189
190
191     vmx_state->pri_proc_ctrls.hlt_exit = 1;
192
193
194     vmx_state->pri_proc_ctrls.pause_exit = 0;
195     vmx_state->pri_proc_ctrls.tsc_offset = 1;
196 #ifdef CONFIG_TIME_VIRTUALIZE_TSC
197     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
198 #endif
199
200     /* Setup IO map */
201     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
202     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
203     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
204             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
205
206
207     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
208     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
209
210
211
212
213 #ifdef __V3_64BIT__
214     vmx_state->exit_ctrls.host_64_on = 1;
215 #endif
216
217
218
219     /* Not sure how exactly to handle this... */
220     v3_hook_msr(core->vm_info, EFER_MSR, 
221                 &v3_handle_efer_read,
222                 &v3_handle_efer_write, 
223                 core);
224
225
226     vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
227
228
229
230     /* Setup paging */
231     if (core->shdw_pg_mode == SHADOW_PAGING) {
232         PrintDebug("Creating initial shadow page table\n");
233
234         if (v3_init_passthrough_pts(core) == -1) {
235             PrintError("Could not initialize passthrough page tables\n");
236             return -1;
237         }
238         
239 #define CR0_PE 0x00000001
240 #define CR0_PG 0x80000000
241 #define CR0_WP 0x00010000 // To ensure mem hooks work
242         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
243
244         core->ctrl_regs.cr3 = core->direct_map_pt;
245
246         // vmx_state->pinbased_ctrls |= NMI_EXIT;
247
248         /* Add CR exits */
249         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
250         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
251         
252         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
253         
254         /* Add page fault exits */
255         vmx_state->excp_bmap.pf = 1;
256
257         // Setup VMX Assist
258         v3_vmxassist_init(core, vmx_state);
259
260     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
261                (v3_cpu_types[core->cpu_id] == V3_VMX_EPT_CPU)) {
262
263         // initialize 1to1 pts
264
265 #define CR0_PE 0x00000001
266 #define CR0_PG 0x80000000
267 #define CR0_WP 0x00010000 // To ensure mem hooks work
268         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
269
270         // vmx_state->pinbased_ctrls |= NMI_EXIT;
271
272         /* Disable CR exits */
273         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
274         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
275
276         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
277
278         /* Add page fault exits */
279         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
280         
281         // Setup VMX Assist
282         v3_vmxassist_init(core, vmx_state);
283
284         /* Enable EPT */
285         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
286         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
287         //      vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
288
289         vmx_state->entry_ctrls.ld_efer = 1;
290         vmx_state->exit_ctrls.ld_efer = 1;
291         vmx_state->exit_ctrls.save_efer = 1;
292
293         if (v3_init_ept(core, &hw_info) == -1) {
294             PrintError("Error initializing EPT\n");
295             return -1;
296         }
297
298
299     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
300                (v3_cpu_types[core->cpu_id] == V3_VMX_EPT_UG_CPU)) {
301         int i = 0;
302         // For now we will assume that unrestricted guest mode is assured w/ EPT
303
304
305         core->vm_regs.rsp = 0x00;
306         core->rip = 0xfff0;
307         core->vm_regs.rdx = 0x00000f00;
308         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
309         core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
310
311
312         core->segments.cs.selector = 0xf000;
313         core->segments.cs.limit = 0xffff;
314         core->segments.cs.base = 0x0000000f0000LL;
315
316         // (raw attributes = 0xf3)
317         core->segments.cs.type = 0xb;
318         core->segments.cs.system = 0x1;
319         core->segments.cs.dpl = 0x0;
320         core->segments.cs.present = 1;
321
322
323
324         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
325                                           &(core->segments.es), &(core->segments.fs), 
326                                           &(core->segments.gs), NULL};
327
328         for ( i = 0; segregs[i] != NULL; i++) {
329             struct v3_segment * seg = segregs[i];
330         
331             seg->selector = 0x0000;
332             //    seg->base = seg->selector << 4;
333             seg->base = 0x00000000;
334             seg->limit = 0xffff;
335
336
337             seg->type = 0x3;
338             seg->system = 0x1;
339             seg->dpl = 0x0;
340             seg->present = 1;
341             //    seg->granularity = 1;
342
343         }
344
345
346         core->segments.gdtr.limit = 0x0000ffff;
347         core->segments.gdtr.base = 0x0000000000000000LL;
348
349         core->segments.idtr.limit = 0x0000ffff;
350         core->segments.idtr.base = 0x0000000000000000LL;
351
352         core->segments.ldtr.selector = 0x0000;
353         core->segments.ldtr.limit = 0x0000ffff;
354         core->segments.ldtr.base = 0x0000000000000000LL;
355         core->segments.ldtr.type = 2;
356         core->segments.ldtr.present = 1;
357
358         core->segments.tr.selector = 0x0000;
359         core->segments.tr.limit = 0x0000ffff;
360         core->segments.tr.base = 0x0000000000000000LL;
361         core->segments.tr.type = 0xb;
362         core->segments.tr.present = 1;
363
364         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
365         core->dbg_regs.dr7 = 0x0000000000000400LL;
366
367         /* Enable EPT */
368         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
369         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
370         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
371
372         vmx_state->entry_ctrls.ld_efer = 1;
373         vmx_state->exit_ctrls.ld_efer = 1;
374         vmx_state->exit_ctrls.save_efer = 1;
375
376         /* Disable shadow paging stuff */
377         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
378         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
379
380         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
381
382
383         if (v3_init_ept(core, &hw_info) == -1) {
384             PrintError("Error initializing EPT\n");
385             return -1;
386         }
387
388     } else {
389         PrintError("Invalid Virtual paging mode\n");
390         return -1;
391     }
392
393
394     // Hook the VMX msrs
395
396     // Setup SYSCALL/SYSENTER MSRs in load/store area
397
398
399     /* Sanity check ctrl/reg fields against hw_defaults */
400
401
402
403
404     /*** Write all the info to the VMCS ***/
405   
406     /*
407     {
408         // IS THIS NECESSARY???
409 #define DEBUGCTL_MSR 0x1d9
410         struct v3_msr tmp_msr;
411         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
412         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
413         core->dbg_regs.dr7 = 0x400;
414     }
415     */
416
417 #ifdef __V3_64BIT__
418     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
419 #else
420     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
421     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
422 #endif
423
424
425  
426
427     if (v3_update_vmcs_ctrl_fields(core)) {
428         PrintError("Could not write control fields!\n");
429         return -1;
430     }
431     
432     if (v3_update_vmcs_host_state(core)) {
433         PrintError("Could not write host state\n");
434         return -1;
435     }
436
437     // reenable global interrupts for vm state initialization now
438     // that the vm state is initialized. If another VM kicks us off, 
439     // it'll update our vmx state so that we know to reload ourself
440     v3_enable_ints();
441
442     return 0;
443 }
444
445 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
446     struct vmx_data * vmx_state = NULL;
447     int vmx_ret = 0;
448     
449     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
450     memset(vmx_state, 0, sizeof(struct vmx_data));
451
452     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
453
454     PrintDebug("Allocating VMCS\n");
455     vmx_state->vmcs_ptr_phys = allocate_vmcs();
456
457     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
458
459     core->vmm_data = vmx_state;
460     vmx_state->state = VMX_UNLAUNCHED;
461
462     PrintDebug("Initializing VMCS (addr=%p)\n", core->vmm_data);
463     
464     // TODO: Fix vmcs fields so they're 32-bit
465
466     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
467     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
468
469     if (vmx_ret != VMX_SUCCESS) {
470         PrintError("VMCLEAR failed\n");
471         return -1; 
472     }
473
474     if (vm_class == V3_PC_VM) {
475         PrintDebug("Initializing VMCS\n");
476         init_vmcs_bios(core, vmx_state);
477     } else {
478         PrintError("Invalid VM Class\n");
479         return -1;
480     }
481
482     return 0;
483 }
484
485
486 int v3_deinit_vmx_vmcs(struct guest_info * core) {
487     struct vmx_data * vmx_state = core->vmm_data;
488
489     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
490
491     V3_Free(vmx_state);
492
493     return 0;
494 }
495
496
497 static int update_irq_exit_state(struct guest_info * info) {
498     struct vmx_exit_idt_vec_info idt_vec_info;
499
500     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
501
502     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
503 #ifdef CONFIG_DEBUG_INTERRUPTS
504         PrintDebug("Calling v3_injecting_intr\n");
505 #endif
506         info->intr_core_state.irq_started = 0;
507         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
508     }
509
510     return 0;
511 }
512
513 static int update_irq_entry_state(struct guest_info * info) {
514     struct vmx_exit_idt_vec_info idt_vec_info;
515     struct vmcs_interrupt_state intr_core_state;
516     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
517
518     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
519     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
520
521     /* Check for pending exceptions to inject */
522     if (v3_excp_pending(info)) {
523         struct vmx_entry_int_info int_info;
524         int_info.value = 0;
525
526         // In VMX, almost every exception is hardware
527         // Software exceptions are pretty much only for breakpoint or overflow
528         int_info.type = 3;
529         int_info.vector = v3_get_excp_number(info);
530
531         if (info->excp_state.excp_error_code_valid) {
532             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
533             int_info.error_code = 1;
534
535 #ifdef CONFIG_DEBUG_INTERRUPTS
536             PrintDebug("Injecting exception %d with error code %x\n", 
537                     int_info.vector, info->excp_state.excp_error_code);
538 #endif
539         }
540
541         int_info.valid = 1;
542 #ifdef CONFIG_DEBUG_INTERRUPTS
543         PrintDebug("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
544 #endif
545         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
546
547         v3_injecting_excp(info, int_info.vector);
548
549     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
550                (intr_core_state.val == 0)) {
551        
552         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
553
554 #ifdef CONFIG_DEBUG_INTERRUPTS
555             PrintDebug("IRQ pending from previous injection\n");
556 #endif
557
558             // Copy the IDT vectoring info over to reinject the old interrupt
559             if (idt_vec_info.error_code == 1) {
560                 uint32_t err_code = 0;
561
562                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
563                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
564             }
565
566             idt_vec_info.undef = 0;
567             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
568
569         } else {
570             struct vmx_entry_int_info ent_int;
571             ent_int.value = 0;
572
573             switch (v3_intr_pending(info)) {
574                 case V3_EXTERNAL_IRQ: {
575                     info->intr_core_state.irq_vector = v3_get_intr(info); 
576                     ent_int.vector = info->intr_core_state.irq_vector;
577                     ent_int.type = 0;
578                     ent_int.error_code = 0;
579                     ent_int.valid = 1;
580
581 #ifdef CONFIG_DEBUG_INTERRUPTS
582                     PrintDebug("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
583                                info->intr_core_state.irq_vector, 
584                                (uint32_t)info->num_exits, 
585                                (void *)(addr_t)info->rip);
586 #endif
587
588                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
589                     info->intr_core_state.irq_started = 1;
590
591                     break;
592                 }
593                 case V3_NMI:
594                     PrintDebug("Injecting NMI\n");
595
596                     ent_int.type = 2;
597                     ent_int.vector = 2;
598                     ent_int.valid = 1;
599                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
600
601                     break;
602                 case V3_SOFTWARE_INTR:
603                     PrintDebug("Injecting software interrupt\n");
604                     ent_int.type = 4;
605
606                     ent_int.valid = 1;
607                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
608
609                     break;
610                 case V3_VIRTUAL_IRQ:
611                     // Not sure what to do here, Intel doesn't have virtual IRQs
612                     // May be the same as external interrupts/IRQs
613
614                     break;
615                 case V3_INVALID_INTR:
616                 default:
617                     break;
618             }
619         }
620     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
621         // Enable INTR window exiting so we know when IF=1
622         uint32_t instr_len;
623
624         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
625
626 #ifdef CONFIG_DEBUG_INTERRUPTS
627         PrintDebug("Enabling Interrupt-Window exiting: %d\n", instr_len);
628 #endif
629
630         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
631         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
632     }
633
634
635     return 0;
636 }
637
638
639
640 static struct vmx_exit_info exit_log[10];
641
642 static void print_exit_log(struct guest_info * info) {
643     int cnt = info->num_exits % 10;
644     int i = 0;
645     
646
647     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
648
649     for (i = 0; i < 10; i++) {
650         struct vmx_exit_info * tmp = &exit_log[cnt];
651
652         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
653         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
654         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
655         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
656         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
657
658         cnt--;
659
660         if (cnt == -1) {
661             cnt = 9;
662         }
663
664     }
665
666 }
667
668 /* 
669  * CAUTION and DANGER!!! 
670  * 
671  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
672  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
673  * on its contents will cause things to break. The contents at the time of the exit WILL 
674  * change before the exit handler is executed.
675  */
676 int v3_vmx_enter(struct guest_info * info) {
677     int ret = 0;
678     uint32_t tsc_offset_low, tsc_offset_high;
679     struct vmx_exit_info exit_info;
680     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
681
682     // Conditionally yield the CPU if the timeslice has expired
683     v3_yield_cond(info);
684
685     // Perform any additional yielding needed for time adjustment
686     v3_adjust_time(info);
687
688     // Update timer devices prior to entering VM.
689     v3_update_timers(info);
690
691     // disable global interrupts for vm state transition
692     v3_disable_ints();
693
694
695     if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) {
696         vmcs_load(vmx_info->vmcs_ptr_phys);
697         active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
698     }
699
700
701     v3_vmx_restore_vmcs(info);
702
703
704 #ifdef CONFIG_SYMCALL
705     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
706         update_irq_entry_state(info);
707     }
708 #else 
709     update_irq_entry_state(info);
710 #endif
711
712     {
713         addr_t guest_cr3;
714         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
715         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
716     }
717
718     // Perform last-minute time bookkeeping prior to entering the VM
719     v3_time_enter_vm(info);
720
721     tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
722     tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
723     check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
724     check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
725
726
727     if (vmx_info->state == VMX_UNLAUNCHED) {
728         vmx_info->state = VMX_LAUNCHED;
729         info->vm_info->run_state = VM_RUNNING;
730         ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
731     } else {
732         V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
733         ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
734     }
735     
736     //  PrintDebug("VMX Exit: ret=%d\n", ret);
737
738     if (ret != VMX_SUCCESS) {
739         uint32_t error = 0;
740
741         vmcs_read(VMCS_INSTR_ERR, &error);
742         PrintError("VMENTRY Error: %d\n", error);
743
744         return -1;
745     }
746
747     // Immediate exit from VM time bookkeeping
748     v3_time_exit_vm(info);
749
750     info->num_exits++;
751
752     /* Update guest state */
753     v3_vmx_save_vmcs(info);
754
755     // info->cpl = info->segments.cs.selector & 0x3;
756
757     info->mem_mode = v3_get_vm_mem_mode(info);
758     info->cpu_mode = v3_get_vm_cpu_mode(info);
759
760
761     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
762     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
763     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
764     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
765     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
766     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
767     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
768
769     if (info->shdw_pg_mode == NESTED_PAGING) {
770         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
771     }
772
773     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
774
775     exit_log[info->num_exits % 10] = exit_info;
776
777
778 #ifdef CONFIG_SYMCALL
779     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
780         update_irq_exit_state(info);
781     }
782 #else
783     update_irq_exit_state(info);
784 #endif
785
786     if (exit_info.exit_reason == VMEXIT_INTR_WINDOW) {
787         // This is a special case whose only job is to inject an interrupt
788         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
789         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
790         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
791
792 #ifdef CONFIG_DEBUG_INTERRUPTS
793         PrintDebug("Interrupts available again! (RIP=%llx)\n", info->rip);
794 #endif
795     }
796
797     // reenable global interrupts after vm exit
798     v3_enable_ints();
799
800     // Conditionally yield the CPU if the timeslice has expired
801     v3_yield_cond(info);
802
803     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
804         PrintError("Error in VMX exit handler\n");
805         return -1;
806     }
807
808     return 0;
809 }
810
811
812 int v3_start_vmx_guest(struct guest_info * info) {
813
814     PrintDebug("Starting VMX core %u\n", info->cpu_id);
815
816     if (info->cpu_id == 0) {
817         info->core_run_state = CORE_RUNNING;
818         info->vm_info->run_state = VM_RUNNING;
819     } else {
820
821         PrintDebug("VMX core %u: Waiting for core initialization\n", info->cpu_id);
822
823         while (info->core_run_state == CORE_STOPPED) {
824             v3_yield(info);
825             //PrintDebug("VMX core %u: still waiting for INIT\n",info->cpu_id);
826         }
827         
828         PrintDebug("VMX core %u initialized\n", info->cpu_id);
829     }
830
831
832     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
833                info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
834                info->segments.cs.limit, (void *)(info->rip));
835
836
837     PrintDebug("VMX core %u: Launching VMX VM\n", info->cpu_id);
838
839     v3_start_time(info);
840
841     while (1) {
842
843         if (info->vm_info->run_state == VM_STOPPED) {
844             info->core_run_state = CORE_STOPPED;
845             break;
846         }
847
848         if (v3_vmx_enter(info) == -1) {
849             v3_print_vmcs();
850             print_exit_log(info);
851             return -1;
852         }
853
854
855
856         if (info->vm_info->run_state == VM_STOPPED) {
857             info->core_run_state = CORE_STOPPED;
858             break;
859         }
860 /*
861         if ((info->num_exits % 5000) == 0) {
862             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
863         }
864 */
865
866     }
867
868     return 0;
869 }
870
871
872
873
874 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
875 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
876 #define CPUID_1_ECX_VTXFLAG 0x00000020
877
878 int v3_is_vmx_capable() {
879     v3_msr_t feature_msr;
880     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
881
882     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
883
884     PrintDebug("ECX: 0x%x\n", ecx);
885
886     if (ecx & CPUID_1_ECX_VTXFLAG) {
887         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
888         
889         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
890
891         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
892             PrintDebug("VMX is locked -- enable in the BIOS\n");
893             return 0;
894         }
895
896     } else {
897         PrintDebug("VMX not supported on this cpu\n");
898         return 0;
899     }
900
901     return 1;
902 }
903
904
905
906
907
908
909 void v3_init_vmx_cpu(int cpu_id) {
910
911     if (cpu_id == 0) {
912         if (v3_init_vmx_hw(&hw_info) == -1) {
913             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
914             return;
915         }
916     }
917
918     enable_vmx();
919
920
921     // Setup VMXON Region
922     host_vmcs_ptrs[cpu_id] = allocate_vmcs();
923
924     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);
925
926     if (vmx_on(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
927         PrintDebug("VMX Enabled\n");
928     } else {
929         PrintError("VMX initialization failure\n");
930         return;
931     }
932     
933
934     {
935         struct vmx_sec_proc_ctrls sec_proc_ctrls;
936         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
937         
938         if (sec_proc_ctrls.enable_ept == 0) {
939             V3_Print("VMX EPT (Nested) Paging not supported\n");
940             v3_cpu_types[cpu_id] = V3_VMX_CPU;
941         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
942             V3_Print("VMX EPT (Nested) Paging supported\n");
943             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
944         } else {
945             V3_Print("VMX EPT (Nested) Paging + Unrestricted guest supported\n");
946             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
947         }
948     }
949 }
950
951
952 void v3_deinit_vmx_cpu(int cpu_id) {
953     extern v3_cpu_arch_t v3_cpu_types[];
954     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
955     V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
956 }