Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


added support for Intel EPT with(?) and without unrestricted guest support
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34
35 #include <palacios/vmx_ept.h>
36 #include <palacios/vmx_assist.h>
37 #include <palacios/vmx_hw_info.h>
38
39 #ifndef CONFIG_DEBUG_VMX
40 #undef PrintDebug
41 #define PrintDebug(fmt, args...)
42 #endif
43
44
45 /* These fields contain the hardware feature sets supported by the local CPU */
46 static struct vmx_hw_info hw_info;
47
48 extern v3_cpu_arch_t v3_cpu_types[];
49
50 static addr_t active_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
51 static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
52
53 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
54 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
55
56 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
57     int ret = 0;
58
59     ret = vmcs_write(field, val);
60
61     if (ret != VMX_SUCCESS) {
62         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
63         return 1;
64     }
65
66     return 0;
67 }
68
69 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
70     int ret = 0;
71
72     ret = vmcs_read(field, val);
73
74     if (ret != VMX_SUCCESS) {
75         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
76     }
77
78     return ret;
79 }
80
81
82
83
84 static addr_t allocate_vmcs() {
85     struct vmcs_data * vmcs_page = NULL;
86
87     PrintDebug("Allocating page\n");
88
89     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
90     memset(vmcs_page, 0, 4096);
91
92     vmcs_page->revision = hw_info.basic_info.revision;
93     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
94
95     return (addr_t)V3_PAddr((void *)vmcs_page);
96 }
97
98
99
100
101 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
102     int vmx_ret = 0;
103
104     // disable global interrupts for vm state initialization
105     v3_disable_ints();
106
107     PrintDebug("Loading VMCS\n");
108     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
109     active_vmcs_ptrs[V3_Get_CPU()] = vmx_state->vmcs_ptr_phys;
110     vmx_state->state = VMX_UNLAUNCHED;
111
112     if (vmx_ret != VMX_SUCCESS) {
113         PrintError("VMPTRLD failed\n");
114         return -1;
115     }
116
117
118     /*** Setup default state from HW ***/
119
120     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
121     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
122     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
123     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
124     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
125
126     /* Print Control MSRs */
127     PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)hw_info.cr0.value);
128     PrintDebug("CR4 MSR: %p\n", (void *)(addr_t)hw_info.cr4.value);
129
130
131
132     /******* Setup Host State **********/
133
134     /* Cache GDTR, IDTR, and TR in host struct */
135     addr_t gdtr_base;
136     struct {
137         uint16_t selector;
138         addr_t   base;
139     } __attribute__((packed)) tmp_seg;
140     
141
142     __asm__ __volatile__(
143                          "sgdt (%0);"
144                          :
145                          : "q"(&tmp_seg)
146                          : "memory"
147                          );
148     gdtr_base = tmp_seg.base;
149     vmx_state->host_state.gdtr.base = gdtr_base;
150
151     __asm__ __volatile__(
152                          "sidt (%0);"
153                          :
154                          : "q"(&tmp_seg)
155                          : "memory"
156                          );
157     vmx_state->host_state.idtr.base = tmp_seg.base;
158
159     __asm__ __volatile__(
160                          "str (%0);"
161                          :
162                          : "q"(&tmp_seg)
163                          : "memory"
164                          );
165     vmx_state->host_state.tr.selector = tmp_seg.selector;
166
167     /* The GDTR *index* is bits 3-15 of the selector. */
168     struct tss_descriptor * desc = NULL;
169     desc = (struct tss_descriptor *)(gdtr_base + (8 * (tmp_seg.selector >> 3)));
170
171     tmp_seg.base = ((desc->base1) |
172                     (desc->base2 << 16) |
173                     (desc->base3 << 24) |
174 #ifdef __V3_64BIT__
175                     ((uint64_t)desc->base4 << 32)
176 #else 
177                     (0)
178 #endif
179                     );
180
181     vmx_state->host_state.tr.base = tmp_seg.base;
182
183
184     /********** Setup VMX Control Fields ***********/
185
186     /* Add external interrupts, NMI exiting, and virtual NMI */
187     vmx_state->pin_ctrls.nmi_exit = 1;
188     vmx_state->pin_ctrls.ext_int_exit = 1;
189
190
191     vmx_state->pri_proc_ctrls.hlt_exit = 1;
192     vmx_state->pri_proc_ctrls.invlpg_exit = 1;
193
194     vmx_state->pri_proc_ctrls.pause_exit = 1;
195     vmx_state->pri_proc_ctrls.tsc_offset = 1;
196 #ifdef CONFIG_TIME_VIRTUALIZE_TSC
197     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
198 #endif
199
200     /* Setup IO map */
201     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
202     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
203     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
204             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
205
206
207     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
208     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
209
210
211
212
213 #ifdef __V3_64BIT__
214     vmx_state->exit_ctrls.host_64_on = 1;
215 #endif
216
217
218
219     /* Not sure how exactly to handle this... */
220     v3_hook_msr(core->vm_info, EFER_MSR, 
221                 &v3_handle_efer_read,
222                 &v3_handle_efer_write, 
223                 core);
224
225
226     vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
227
228
229
230     /* Setup paging */
231     if (core->shdw_pg_mode == SHADOW_PAGING) {
232         PrintDebug("Creating initial shadow page table\n");
233
234         if (v3_init_passthrough_pts(core) == -1) {
235             PrintError("Could not initialize passthrough page tables\n");
236             return -1;
237         }
238         
239 #define CR0_PE 0x00000001
240 #define CR0_PG 0x80000000
241 #define CR0_WP 0x00010000 // To ensure mem hooks work
242         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
243
244         core->ctrl_regs.cr3 = core->direct_map_pt;
245
246         // vmx_state->pinbased_ctrls |= NMI_EXIT;
247
248         /* Add CR exits */
249         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
250         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
251         
252         /* Add page fault exits */
253         vmx_state->excp_bmap.pf = 1;
254
255         // Setup VMX Assist
256         v3_vmxassist_init(core, vmx_state);
257
258     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
259                (v3_cpu_types[core->cpu_id] == V3_VMX_EPT_CPU)) {
260
261         // initialize 1to1 pts
262
263 #define CR0_PE 0x00000001
264 #define CR0_PG 0x80000000
265 #define CR0_WP 0x00010000 // To ensure mem hooks work
266         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
267
268         // vmx_state->pinbased_ctrls |= NMI_EXIT;
269
270         /* Add CR exits */
271         //vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
272         //vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
273
274         /* Add page fault exits */
275         vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
276         
277         // Setup VMX Assist
278         v3_vmxassist_init(core, vmx_state);
279
280         /* Enable EPT */
281         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
282         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
283         //      vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
284
285         vmx_state->entry_ctrls.ld_efer = 1;
286         vmx_state->exit_ctrls.ld_efer = 1;
287         vmx_state->exit_ctrls.save_efer = 1;
288
289         if (v3_init_ept(core, &hw_info) == -1) {
290             PrintError("Error initializing EPT\n");
291             return -1;
292         }
293
294
295     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
296                (v3_cpu_types[core->cpu_id] == V3_VMX_EPT_UG_CPU)) {
297         int i = 0;
298         // For now we will assume that unrestricted guest mode is assured w/ EPT
299
300         core->vm_regs.rsp = 0x00;
301         core->rip = 0xfff0;
302         core->vm_regs.rdx = 0x00000f00;
303         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
304         core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
305
306
307         core->segments.cs.selector = 0xf000;
308         core->segments.cs.limit = 0xffff;
309         core->segments.cs.base = 0x0000000f0000LL;
310
311         // (raw attributes = 0xf3)
312         core->segments.cs.type = 0xb;
313         core->segments.cs.system = 0x1;
314         core->segments.cs.dpl = 0x0;
315         core->segments.cs.present = 1;
316
317
318
319         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
320                                           &(core->segments.es), &(core->segments.fs), 
321                                           &(core->segments.gs), NULL};
322
323         for ( i = 0; segregs[i] != NULL; i++) {
324             struct v3_segment * seg = segregs[i];
325         
326             seg->selector = 0x0000;
327             //    seg->base = seg->selector << 4;
328             seg->base = 0x00000000;
329             seg->limit = 0xffff;
330
331
332             seg->type = 0x3;
333             seg->system = 0x1;
334             seg->dpl = 0x0;
335             seg->present = 1;
336             //    seg->granularity = 1;
337
338         }
339
340
341         core->segments.gdtr.limit = 0x0000ffff;
342         core->segments.gdtr.base = 0x0000000000000000LL;
343
344         core->segments.idtr.limit = 0x0000ffff;
345         core->segments.idtr.base = 0x0000000000000000LL;
346
347         core->segments.ldtr.selector = 0x0000;
348         core->segments.ldtr.limit = 0x0000ffff;
349         core->segments.ldtr.base = 0x0000000000000000LL;
350         core->segments.ldtr.type = 2;
351         core->segments.ldtr.present = 1;
352
353         core->segments.tr.selector = 0x0000;
354         core->segments.tr.limit = 0x0000ffff;
355         core->segments.tr.base = 0x0000000000000000LL;
356         core->segments.tr.type = 0xb;
357         core->segments.tr.present = 1;
358
359         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
360         core->dbg_regs.dr7 = 0x0000000000000400LL;
361
362         /* Enable EPT */
363         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
364         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
365         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
366
367         vmx_state->entry_ctrls.ld_efer = 1;
368         vmx_state->exit_ctrls.ld_efer = 1;
369         vmx_state->exit_ctrls.save_efer = 1;
370
371
372         if (v3_init_ept(core, &hw_info) == -1) {
373             PrintError("Error initializing EPT\n");
374             return -1;
375         }
376
377     } else {
378         PrintError("Invalid Virtual paging mode\n");
379         return -1;
380     }
381
382
383     // Hook the VMX msrs
384
385     // Setup SYSCALL/SYSENTER MSRs in load/store area
386
387
388     /* Sanity check ctrl/reg fields against hw_defaults */
389
390
391
392
393     /*** Write all the info to the VMCS ***/
394   
395     /*
396     {
397         // IS THIS NECESSARY???
398 #define DEBUGCTL_MSR 0x1d9
399         struct v3_msr tmp_msr;
400         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
401         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
402         core->dbg_regs.dr7 = 0x400;
403     }
404     */
405
406 #ifdef __V3_64BIT__
407     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
408 #else
409     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
410     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
411 #endif
412
413
414  
415
416     if (v3_update_vmcs_ctrl_fields(core)) {
417         PrintError("Could not write control fields!\n");
418         return -1;
419     }
420     
421     if (v3_update_vmcs_host_state(core)) {
422         PrintError("Could not write host state\n");
423         return -1;
424     }
425
426     // reenable global interrupts for vm state initialization now
427     // that the vm state is initialized. If another VM kicks us off, 
428     // it'll update our vmx state so that we know to reload ourself
429     v3_enable_ints();
430
431     return 0;
432 }
433
434 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
435     struct vmx_data * vmx_state = NULL;
436     int vmx_ret = 0;
437     
438     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
439     memset(vmx_state, 0, sizeof(struct vmx_data));
440
441     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
442
443     PrintDebug("Allocating VMCS\n");
444     vmx_state->vmcs_ptr_phys = allocate_vmcs();
445
446     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
447
448     core->vmm_data = vmx_state;
449     vmx_state->state = VMX_UNLAUNCHED;
450
451     PrintDebug("Initializing VMCS (addr=%p)\n", core->vmm_data);
452     
453     // TODO: Fix vmcs fields so they're 32-bit
454
455     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
456     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
457
458     if (vmx_ret != VMX_SUCCESS) {
459         PrintError("VMCLEAR failed\n");
460         return -1; 
461     }
462
463     if (vm_class == V3_PC_VM) {
464         PrintDebug("Initializing VMCS\n");
465         init_vmcs_bios(core, vmx_state);
466     } else {
467         PrintError("Invalid VM Class\n");
468         return -1;
469     }
470
471     return 0;
472 }
473
474
475 int v3_deinit_vmx_vmcs(struct guest_info * core) {
476     struct vmx_data * vmx_state = core->vmm_data;
477
478     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
479
480     V3_Free(vmx_state);
481
482     return 0;
483 }
484
485
486 static int update_irq_exit_state(struct guest_info * info) {
487     struct vmx_exit_idt_vec_info idt_vec_info;
488
489     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
490
491     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
492 #ifdef CONFIG_DEBUG_INTERRUPTS
493         PrintDebug("Calling v3_injecting_intr\n");
494 #endif
495         info->intr_core_state.irq_started = 0;
496         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
497     }
498
499     return 0;
500 }
501
502 static int update_irq_entry_state(struct guest_info * info) {
503     struct vmx_exit_idt_vec_info idt_vec_info;
504     struct vmcs_interrupt_state intr_core_state;
505     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
506
507     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
508     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
509
510     /* Check for pending exceptions to inject */
511     if (v3_excp_pending(info)) {
512         struct vmx_entry_int_info int_info;
513         int_info.value = 0;
514
515         // In VMX, almost every exception is hardware
516         // Software exceptions are pretty much only for breakpoint or overflow
517         int_info.type = 3;
518         int_info.vector = v3_get_excp_number(info);
519
520         if (info->excp_state.excp_error_code_valid) {
521             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
522             int_info.error_code = 1;
523
524 #ifdef CONFIG_DEBUG_INTERRUPTS
525             PrintDebug("Injecting exception %d with error code %x\n", 
526                     int_info.vector, info->excp_state.excp_error_code);
527 #endif
528         }
529
530         int_info.valid = 1;
531 #ifdef CONFIG_DEBUG_INTERRUPTS
532         PrintDebug("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
533 #endif
534         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
535
536         v3_injecting_excp(info, int_info.vector);
537
538     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
539                (intr_core_state.val == 0)) {
540        
541         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
542
543 #ifdef CONFIG_DEBUG_INTERRUPTS
544             PrintDebug("IRQ pending from previous injection\n");
545 #endif
546
547             // Copy the IDT vectoring info over to reinject the old interrupt
548             if (idt_vec_info.error_code == 1) {
549                 uint32_t err_code = 0;
550
551                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
552                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
553             }
554
555             idt_vec_info.undef = 0;
556             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
557
558         } else {
559             struct vmx_entry_int_info ent_int;
560             ent_int.value = 0;
561
562             switch (v3_intr_pending(info)) {
563                 case V3_EXTERNAL_IRQ: {
564                     info->intr_core_state.irq_vector = v3_get_intr(info); 
565                     ent_int.vector = info->intr_core_state.irq_vector;
566                     ent_int.type = 0;
567                     ent_int.error_code = 0;
568                     ent_int.valid = 1;
569
570 #ifdef CONFIG_DEBUG_INTERRUPTS
571                     PrintDebug("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
572                                info->intr_core_state.irq_vector, 
573                                (uint32_t)info->num_exits, 
574                                (void *)(addr_t)info->rip);
575 #endif
576
577                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
578                     info->intr_core_state.irq_started = 1;
579
580                     break;
581                 }
582                 case V3_NMI:
583                     PrintDebug("Injecting NMI\n");
584
585                     ent_int.type = 2;
586                     ent_int.vector = 2;
587                     ent_int.valid = 1;
588                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
589
590                     break;
591                 case V3_SOFTWARE_INTR:
592                     PrintDebug("Injecting software interrupt\n");
593                     ent_int.type = 4;
594
595                     ent_int.valid = 1;
596                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
597
598                     break;
599                 case V3_VIRTUAL_IRQ:
600                     // Not sure what to do here, Intel doesn't have virtual IRQs
601                     // May be the same as external interrupts/IRQs
602
603                     break;
604                 case V3_INVALID_INTR:
605                 default:
606                     break;
607             }
608         }
609     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
610         // Enable INTR window exiting so we know when IF=1
611         uint32_t instr_len;
612
613         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
614
615 #ifdef CONFIG_DEBUG_INTERRUPTS
616         PrintDebug("Enabling Interrupt-Window exiting: %d\n", instr_len);
617 #endif
618
619         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
620         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
621     }
622
623
624     return 0;
625 }
626
627
628
629 static struct vmx_exit_info exit_log[10];
630
631 static void print_exit_log(struct guest_info * info) {
632     int cnt = info->num_exits % 10;
633     int i = 0;
634     
635
636     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
637
638     for (i = 0; i < 10; i++) {
639         struct vmx_exit_info * tmp = &exit_log[cnt];
640
641         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
642         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
643         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
644         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
645         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
646
647         cnt--;
648
649         if (cnt == -1) {
650             cnt = 9;
651         }
652
653     }
654
655 }
656
657 /* 
658  * CAUTION and DANGER!!! 
659  * 
660  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
661  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
662  * on its contents will cause things to break. The contents at the time of the exit WILL 
663  * change before the exit handler is executed.
664  */
665 int v3_vmx_enter(struct guest_info * info) {
666     int ret = 0;
667     uint32_t tsc_offset_low, tsc_offset_high;
668     struct vmx_exit_info exit_info;
669     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
670
671     // Conditionally yield the CPU if the timeslice has expired
672     v3_yield_cond(info);
673
674     // Perform any additional yielding needed for time adjustment
675     v3_adjust_time(info);
676
677     // Update timer devices prior to entering VM.
678     v3_update_timers(info);
679
680     // disable global interrupts for vm state transition
681     v3_disable_ints();
682
683
684     if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) {
685         vmcs_load(vmx_info->vmcs_ptr_phys);
686         active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
687     }
688
689
690     v3_vmx_restore_vmcs(info);
691
692
693 #ifdef CONFIG_SYMCALL
694     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
695         update_irq_entry_state(info);
696     }
697 #else 
698     update_irq_entry_state(info);
699 #endif
700
701     {
702         addr_t guest_cr3;
703         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
704         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
705     }
706
707     // Perform last-minute time bookkeeping prior to entering the VM
708     v3_time_enter_vm(info);
709
710     tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
711     tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
712     check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
713     check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
714
715
716     if (vmx_info->state == VMX_UNLAUNCHED) {
717         vmx_info->state = VMX_LAUNCHED;
718         info->vm_info->run_state = VM_RUNNING;
719         ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
720     } else {
721         V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
722         ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
723     }
724     
725     //  PrintDebug("VMX Exit: ret=%d\n", ret);
726
727     if (ret != VMX_SUCCESS) {
728         uint32_t error = 0;
729
730         vmcs_read(VMCS_INSTR_ERR, &error);
731         PrintError("VMENTRY Error: %d\n", error);
732
733         return -1;
734     }
735
736     // Immediate exit from VM time bookkeeping
737     v3_time_exit_vm(info);
738
739     info->num_exits++;
740
741     /* Update guest state */
742     v3_vmx_save_vmcs(info);
743
744     // info->cpl = info->segments.cs.selector & 0x3;
745
746     info->mem_mode = v3_get_vm_mem_mode(info);
747     info->cpu_mode = v3_get_vm_cpu_mode(info);
748
749
750     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
751     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
752     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
753     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
754     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
755     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
756     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
757
758     if (info->shdw_pg_mode == NESTED_PAGING) {
759         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
760     }
761
762     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
763
764     exit_log[info->num_exits % 10] = exit_info;
765
766
767 #ifdef CONFIG_SYMCALL
768     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
769         update_irq_exit_state(info);
770     }
771 #else
772     update_irq_exit_state(info);
773 #endif
774
775     if (exit_info.exit_reason == VMEXIT_INTR_WINDOW) {
776         // This is a special case whose only job is to inject an interrupt
777         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
778         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
779         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
780
781 #ifdef CONFIG_DEBUG_INTERRUPTS
782         PrintDebug("Interrupts available again! (RIP=%llx)\n", info->rip);
783 #endif
784     }
785
786     // reenable global interrupts after vm exit
787     v3_enable_ints();
788
789     // Conditionally yield the CPU if the timeslice has expired
790     v3_yield_cond(info);
791
792     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
793         PrintError("Error in VMX exit handler\n");
794         return -1;
795     }
796
797     return 0;
798 }
799
800
801 int v3_start_vmx_guest(struct guest_info * info) {
802
803     PrintDebug("Starting VMX core %u\n", info->cpu_id);
804
805     if (info->cpu_id == 0) {
806         info->core_run_state = CORE_RUNNING;
807         info->vm_info->run_state = VM_RUNNING;
808     } else {
809
810         PrintDebug("VMX core %u: Waiting for core initialization\n", info->cpu_id);
811
812         while (info->core_run_state == CORE_STOPPED) {
813             v3_yield(info);
814             //PrintDebug("VMX core %u: still waiting for INIT\n",info->cpu_id);
815         }
816         
817         PrintDebug("VMX core %u initialized\n", info->cpu_id);
818     }
819
820
821     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
822                info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
823                info->segments.cs.limit, (void *)(info->rip));
824
825
826     PrintDebug("VMX core %u: Launching VMX VM\n", info->cpu_id);
827
828     v3_start_time(info);
829
830     while (1) {
831
832         if (info->vm_info->run_state == VM_STOPPED) {
833             info->core_run_state = CORE_STOPPED;
834             break;
835         }
836
837         if (v3_vmx_enter(info) == -1) {
838             v3_print_vmcs();
839             print_exit_log(info);
840             return -1;
841         }
842
843
844
845         if (info->vm_info->run_state == VM_STOPPED) {
846             info->core_run_state = CORE_STOPPED;
847             break;
848         }
849 /*
850         if ((info->num_exits % 5000) == 0) {
851             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
852         }
853 */
854
855     }
856
857     return 0;
858 }
859
860
861
862
863 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
864 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
865 #define CPUID_1_ECX_VTXFLAG 0x00000020
866
867 int v3_is_vmx_capable() {
868     v3_msr_t feature_msr;
869     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
870
871     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
872
873     PrintDebug("ECX: 0x%x\n", ecx);
874
875     if (ecx & CPUID_1_ECX_VTXFLAG) {
876         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
877         
878         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
879
880         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
881             PrintDebug("VMX is locked -- enable in the BIOS\n");
882             return 0;
883         }
884
885     } else {
886         PrintDebug("VMX not supported on this cpu\n");
887         return 0;
888     }
889
890     return 1;
891 }
892
893
894
895
896
897
898 void v3_init_vmx_cpu(int cpu_id) {
899
900     if (cpu_id == 0) {
901         if (v3_init_vmx_hw(&hw_info) == -1) {
902             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
903             return;
904         }
905     }
906
907     enable_vmx();
908
909
910     // Setup VMXON Region
911     host_vmcs_ptrs[cpu_id] = allocate_vmcs();
912
913     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);
914
915     if (vmx_on(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
916         PrintDebug("VMX Enabled\n");
917     } else {
918         PrintError("VMX initialization failure\n");
919         return;
920     }
921     
922
923     {
924         struct vmx_sec_proc_ctrls sec_proc_ctrls;
925         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
926         
927         if (sec_proc_ctrls.enable_ept == 0) {
928             V3_Print("VMX EPT (Nested) Paging not supported\n");
929             v3_cpu_types[cpu_id] = V3_VMX_CPU;
930         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
931             V3_Print("VMX EPT (Nested) Paging supported\n");
932             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
933         } else {
934             V3_Print("VMX EPT (Nested) Paging + Unrestricted guest supported\n");
935             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
936         }
937     }
938 }
939
940
941 void v3_deinit_vmx_cpu(int cpu_id) {
942     extern v3_cpu_arch_t v3_cpu_types[];
943     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
944     V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
945 }