Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


fix for intel hardware
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmx.h>
22 #include <palacios/vmm.h>
23 #include <palacios/vmx_handler.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmx_lowlevel.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_ctrl_regs.h>
28 #include <palacios/vmm_config.h>
29 #include <palacios/vmm_time.h>
30 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmx_io.h>
33 #include <palacios/vmx_msr.h>
34
35 #include <palacios/vmx_ept.h>
36 #include <palacios/vmx_assist.h>
37 #include <palacios/vmx_hw_info.h>
38
39 #ifndef CONFIG_DEBUG_VMX
40 #undef PrintDebug
41 #define PrintDebug(fmt, args...)
42 #endif
43
44
45 /* These fields contain the hardware feature sets supported by the local CPU */
46 static struct vmx_hw_info hw_info;
47
48 extern v3_cpu_arch_t v3_cpu_types[];
49
50 static addr_t active_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
51 static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
52
53 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
54 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
55
56 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
57     int ret = 0;
58
59     ret = vmcs_write(field, val);
60
61     if (ret != VMX_SUCCESS) {
62         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
63         return 1;
64     }
65
66     return 0;
67 }
68
69 static int inline check_vmcs_read(vmcs_field_t field, void * val) {
70     int ret = 0;
71
72     ret = vmcs_read(field, val);
73
74     if (ret != VMX_SUCCESS) {
75         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
76     }
77
78     return ret;
79 }
80
81
82
83
84 static addr_t allocate_vmcs() {
85     struct vmcs_data * vmcs_page = NULL;
86
87     PrintDebug("Allocating page\n");
88
89     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
90     memset(vmcs_page, 0, 4096);
91
92     vmcs_page->revision = hw_info.basic_info.revision;
93     PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
94
95     return (addr_t)V3_PAddr((void *)vmcs_page);
96 }
97
98
99
100
101 static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) {
102     int vmx_ret = 0;
103
104     // disable global interrupts for vm state initialization
105     v3_disable_ints();
106
107     PrintDebug("Loading VMCS\n");
108     vmx_ret = vmcs_load(vmx_state->vmcs_ptr_phys);
109     active_vmcs_ptrs[V3_Get_CPU()] = vmx_state->vmcs_ptr_phys;
110     vmx_state->state = VMX_UNLAUNCHED;
111
112     if (vmx_ret != VMX_SUCCESS) {
113         PrintError("VMPTRLD failed\n");
114         return -1;
115     }
116
117
118     /*** Setup default state from HW ***/
119
120     vmx_state->pin_ctrls.value = hw_info.pin_ctrls.def_val;
121     vmx_state->pri_proc_ctrls.value = hw_info.proc_ctrls.def_val;
122     vmx_state->exit_ctrls.value = hw_info.exit_ctrls.def_val;
123     vmx_state->entry_ctrls.value = hw_info.entry_ctrls.def_val;
124     vmx_state->sec_proc_ctrls.value = hw_info.sec_proc_ctrls.def_val;
125
126     /* Print Control MSRs */
127     PrintDebug("CR0 MSR: %p\n", (void *)(addr_t)hw_info.cr0.value);
128     PrintDebug("CR4 MSR: %p\n", (void *)(addr_t)hw_info.cr4.value);
129
130
131
132     /******* Setup Host State **********/
133
134     /* Cache GDTR, IDTR, and TR in host struct */
135     addr_t gdtr_base;
136     struct {
137         uint16_t selector;
138         addr_t   base;
139     } __attribute__((packed)) tmp_seg;
140     
141
142     __asm__ __volatile__(
143                          "sgdt (%0);"
144                          :
145                          : "q"(&tmp_seg)
146                          : "memory"
147                          );
148     gdtr_base = tmp_seg.base;
149     vmx_state->host_state.gdtr.base = gdtr_base;
150
151     __asm__ __volatile__(
152                          "sidt (%0);"
153                          :
154                          : "q"(&tmp_seg)
155                          : "memory"
156                          );
157     vmx_state->host_state.idtr.base = tmp_seg.base;
158
159     __asm__ __volatile__(
160                          "str (%0);"
161                          :
162                          : "q"(&tmp_seg)
163                          : "memory"
164                          );
165     vmx_state->host_state.tr.selector = tmp_seg.selector;
166
167     /* The GDTR *index* is bits 3-15 of the selector. */
168     struct tss_descriptor * desc = NULL;
169     desc = (struct tss_descriptor *)(gdtr_base + (8 * (tmp_seg.selector >> 3)));
170
171     tmp_seg.base = ((desc->base1) |
172                     (desc->base2 << 16) |
173                     (desc->base3 << 24) |
174 #ifdef __V3_64BIT__
175                     ((uint64_t)desc->base4 << 32)
176 #else 
177                     (0)
178 #endif
179                     );
180
181     vmx_state->host_state.tr.base = tmp_seg.base;
182
183
184     /********** Setup VMX Control Fields ***********/
185
186     /* Add external interrupts, NMI exiting, and virtual NMI */
187     vmx_state->pin_ctrls.nmi_exit = 1;
188     vmx_state->pin_ctrls.ext_int_exit = 1;
189
190
191     vmx_state->pri_proc_ctrls.hlt_exit = 1;
192
193
194     vmx_state->pri_proc_ctrls.pause_exit = 0;
195     vmx_state->pri_proc_ctrls.tsc_offset = 1;
196 #ifdef CONFIG_TIME_VIRTUALIZE_TSC
197     vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
198 #endif
199
200     /* Setup IO map */
201     vmx_state->pri_proc_ctrls.use_io_bitmap = 1;
202     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(core->vm_info->io_map.arch_data));
203     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
204             (addr_t)V3_PAddr(core->vm_info->io_map.arch_data) + PAGE_SIZE_4KB);
205
206
207     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
208     vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data));
209
210
211
212     
213
214
215
216 #ifdef __V3_64BIT__
217     vmx_state->exit_ctrls.host_64_on = 1;
218 #endif
219
220
221     /* Not sure how exactly to handle this... */
222     v3_hook_msr(core->vm_info, EFER_MSR, 
223                 &v3_handle_efer_read,
224                 &v3_handle_efer_write, 
225                 core);
226
227     // Or is it this??? 
228     vmx_state->entry_ctrls.ld_efer = 1;
229     vmx_state->exit_ctrls.ld_efer = 1;
230     vmx_state->exit_ctrls.save_efer = 1;
231     /*   ***   */
232
233     vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE);
234
235
236     /* Setup paging */
237     if (core->shdw_pg_mode == SHADOW_PAGING) {
238         PrintDebug("Creating initial shadow page table\n");
239
240         if (v3_init_passthrough_pts(core) == -1) {
241             PrintError("Could not initialize passthrough page tables\n");
242             return -1;
243         }
244         
245 #define CR0_PE 0x00000001
246 #define CR0_PG 0x80000000
247 #define CR0_WP 0x00010000 // To ensure mem hooks work
248         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
249
250         core->ctrl_regs.cr3 = core->direct_map_pt;
251
252         // vmx_state->pinbased_ctrls |= NMI_EXIT;
253
254         /* Add CR exits */
255         vmx_state->pri_proc_ctrls.cr3_ld_exit = 1;
256         vmx_state->pri_proc_ctrls.cr3_str_exit = 1;
257         
258         vmx_state->pri_proc_ctrls.invlpg_exit = 1;
259         
260         /* Add page fault exits */
261         vmx_state->excp_bmap.pf = 1;
262
263         // Setup VMX Assist
264         v3_vmxassist_init(core, vmx_state);
265
266     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
267                (v3_cpu_types[core->cpu_id] == V3_VMX_EPT_CPU)) {
268
269 #define CR0_PE 0x00000001
270 #define CR0_PG 0x80000000
271 #define CR0_WP 0x00010000 // To ensure mem hooks work
272         vmx_ret |= check_vmcs_write(VMCS_CR0_MASK, (CR0_PE | CR0_PG | CR0_WP));
273
274         // vmx_state->pinbased_ctrls |= NMI_EXIT;
275
276         /* Disable CR exits */
277         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
278         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
279
280         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
281
282         /* Add page fault exits */
283         //      vmx_state->excp_bmap.pf = 1; // This should never happen..., enabled to catch bugs
284         
285         // Setup VMX Assist
286         v3_vmxassist_init(core, vmx_state);
287
288         /* Enable EPT */
289         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
290         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
291
292
293
294         if (v3_init_ept(core, &hw_info) == -1) {
295             PrintError("Error initializing EPT\n");
296             return -1;
297         }
298
299     } else if ((core->shdw_pg_mode == NESTED_PAGING) && 
300                (v3_cpu_types[core->cpu_id] == V3_VMX_EPT_UG_CPU)) {
301         int i = 0;
302         // For now we will assume that unrestricted guest mode is assured w/ EPT
303
304
305         core->vm_regs.rsp = 0x00;
306         core->rip = 0xfff0;
307         core->vm_regs.rdx = 0x00000f00;
308         core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
309         core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
310
311
312         core->segments.cs.selector = 0xf000;
313         core->segments.cs.limit = 0xffff;
314         core->segments.cs.base = 0x0000000f0000LL;
315
316         // (raw attributes = 0xf3)
317         core->segments.cs.type = 0xb;
318         core->segments.cs.system = 0x1;
319         core->segments.cs.dpl = 0x0;
320         core->segments.cs.present = 1;
321
322
323
324         struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
325                                           &(core->segments.es), &(core->segments.fs), 
326                                           &(core->segments.gs), NULL};
327
328         for ( i = 0; segregs[i] != NULL; i++) {
329             struct v3_segment * seg = segregs[i];
330         
331             seg->selector = 0x0000;
332             //    seg->base = seg->selector << 4;
333             seg->base = 0x00000000;
334             seg->limit = 0xffff;
335
336
337             seg->type = 0x3;
338             seg->system = 0x1;
339             seg->dpl = 0x0;
340             seg->present = 1;
341             //    seg->granularity = 1;
342
343         }
344
345
346         core->segments.gdtr.limit = 0x0000ffff;
347         core->segments.gdtr.base = 0x0000000000000000LL;
348
349         core->segments.idtr.limit = 0x0000ffff;
350         core->segments.idtr.base = 0x0000000000000000LL;
351
352         core->segments.ldtr.selector = 0x0000;
353         core->segments.ldtr.limit = 0x0000ffff;
354         core->segments.ldtr.base = 0x0000000000000000LL;
355         core->segments.ldtr.type = 2;
356         core->segments.ldtr.present = 1;
357
358         core->segments.tr.selector = 0x0000;
359         core->segments.tr.limit = 0x0000ffff;
360         core->segments.tr.base = 0x0000000000000000LL;
361         core->segments.tr.type = 0xb;
362         core->segments.tr.present = 1;
363
364         //      core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
365         core->dbg_regs.dr7 = 0x0000000000000400LL;
366
367         /* Enable EPT */
368         vmx_state->pri_proc_ctrls.sec_ctrls = 1; // Enable secondary proc controls
369         vmx_state->sec_proc_ctrls.enable_ept = 1; // enable EPT paging
370         vmx_state->sec_proc_ctrls.unrstrct_guest = 1; // enable unrestricted guest operation
371
372
373         /* Disable shadow paging stuff */
374         vmx_state->pri_proc_ctrls.cr3_ld_exit = 0;
375         vmx_state->pri_proc_ctrls.cr3_str_exit = 0;
376
377         vmx_state->pri_proc_ctrls.invlpg_exit = 0;
378
379
380         if (v3_init_ept(core, &hw_info) == -1) {
381             PrintError("Error initializing EPT\n");
382             return -1;
383         }
384
385     } else {
386         PrintError("Invalid Virtual paging mode\n");
387         return -1;
388     }
389
390
391     // hook vmx msrs
392
393     // Setup SYSCALL/SYSENTER MSRs in load/store area
394     
395     // save STAR, LSTAR, FMASK, KERNEL_GS_BASE MSRs in MSR load/store area
396     {
397 #define IA32_STAR 0xc0000081
398 #define IA32_LSTAR 0xc0000082
399 #define IA32_FMASK 0xc0000084
400 #define IA32_KERN_GS_BASE 0xc0000102
401
402 #define IA32_CSTAR 0xc0000083 // Compatibility mode STAR (ignored for now... hopefully its not that important...)
403
404         int msr_ret = 0;
405
406         struct vmcs_msr_entry * exit_store_msrs = NULL;
407         struct vmcs_msr_entry * exit_load_msrs = NULL;
408         struct vmcs_msr_entry * entry_load_msrs = NULL;;
409         int max_msrs = (hw_info.misc_info.max_msr_cache_size + 1) * 4;
410
411         V3_Print("Setting up MSR load/store areas (max_msr_count=%d)\n", max_msrs);
412
413         if (max_msrs < 4) {
414             PrintError("Max MSR cache size is too small (%d)\n", max_msrs);
415             return -1;
416         }
417
418         vmx_state->msr_area = V3_VAddr(V3_AllocPages(1));
419
420         if (vmx_state->msr_area == NULL) {
421             PrintError("could not allocate msr load/store area\n");
422             return -1;
423         }
424
425         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_CNT, 4);
426         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_CNT, 4);
427         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_CNT, 4);
428         
429         
430         exit_store_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area);
431         exit_load_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area + (sizeof(struct vmcs_msr_entry) * 4));
432         entry_load_msrs = (struct vmcs_msr_entry *)(vmx_state->msr_area + (sizeof(struct vmcs_msr_entry) * 8));
433
434
435         exit_store_msrs[0].index = IA32_STAR;
436         exit_store_msrs[1].index = IA32_LSTAR;
437         exit_store_msrs[2].index = IA32_FMASK;
438         exit_store_msrs[3].index = IA32_KERN_GS_BASE;
439         
440         memcpy(exit_store_msrs, exit_load_msrs, sizeof(struct vmcs_msr_entry) * 4);
441         memcpy(exit_store_msrs, entry_load_msrs, sizeof(struct vmcs_msr_entry) * 4);
442
443         
444         v3_get_msr(IA32_STAR, &(exit_load_msrs[0].hi), &(exit_load_msrs[0].lo));
445         v3_get_msr(IA32_LSTAR, &(exit_load_msrs[1].hi), &(exit_load_msrs[1].lo));
446         v3_get_msr(IA32_FMASK, &(exit_load_msrs[2].hi), &(exit_load_msrs[2].lo));
447         v3_get_msr(IA32_KERN_GS_BASE, &(exit_load_msrs[3].hi), &(exit_load_msrs[3].lo));
448
449         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_STORE_ADDR, (addr_t)V3_PAddr(exit_store_msrs));
450         msr_ret |= check_vmcs_write(VMCS_EXIT_MSR_LOAD_ADDR, (addr_t)V3_PAddr(exit_load_msrs));
451         msr_ret |= check_vmcs_write(VMCS_ENTRY_MSR_LOAD_ADDR, (addr_t)V3_PAddr(entry_load_msrs));
452
453     }    
454
455     /* Sanity check ctrl/reg fields against hw_defaults */
456
457
458
459
460     /*** Write all the info to the VMCS ***/
461   
462     /*
463     {
464         // IS THIS NECESSARY???
465 #define DEBUGCTL_MSR 0x1d9
466         struct v3_msr tmp_msr;
467         v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
468         vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
469         core->dbg_regs.dr7 = 0x400;
470     }
471     */
472
473 #ifdef __V3_64BIT__
474     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffffffffffULL);
475 #else
476     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, (addr_t)0xffffffffUL);
477     vmx_ret |= check_vmcs_write(VMCS_LINK_PTR_HIGH, (addr_t)0xffffffffUL);
478 #endif
479
480
481  
482
483     if (v3_update_vmcs_ctrl_fields(core)) {
484         PrintError("Could not write control fields!\n");
485         return -1;
486     }
487     
488     if (v3_update_vmcs_host_state(core)) {
489         PrintError("Could not write host state\n");
490         return -1;
491     }
492
493     // reenable global interrupts for vm state initialization now
494     // that the vm state is initialized. If another VM kicks us off, 
495     // it'll update our vmx state so that we know to reload ourself
496     v3_enable_ints();
497
498     return 0;
499 }
500
501 int v3_init_vmx_vmcs(struct guest_info * core, v3_vm_class_t vm_class) {
502     struct vmx_data * vmx_state = NULL;
503     int vmx_ret = 0;
504     
505     vmx_state = (struct vmx_data *)V3_Malloc(sizeof(struct vmx_data));
506     memset(vmx_state, 0, sizeof(struct vmx_data));
507
508     PrintDebug("vmx_data pointer: %p\n", (void *)vmx_state);
509
510     PrintDebug("Allocating VMCS\n");
511     vmx_state->vmcs_ptr_phys = allocate_vmcs();
512
513     PrintDebug("VMCS pointer: %p\n", (void *)(vmx_state->vmcs_ptr_phys));
514
515     core->vmm_data = vmx_state;
516     vmx_state->state = VMX_UNLAUNCHED;
517
518     PrintDebug("Initializing VMCS (addr=%p)\n", core->vmm_data);
519     
520     // TODO: Fix vmcs fields so they're 32-bit
521
522     PrintDebug("Clearing VMCS: %p\n", (void *)vmx_state->vmcs_ptr_phys);
523     vmx_ret = vmcs_clear(vmx_state->vmcs_ptr_phys);
524
525     if (vmx_ret != VMX_SUCCESS) {
526         PrintError("VMCLEAR failed\n");
527         return -1; 
528     }
529
530     if (vm_class == V3_PC_VM) {
531         PrintDebug("Initializing VMCS\n");
532         if (init_vmcs_bios(core, vmx_state) == -1) {
533             PrintError("Error initializing VMCS to BIOS state\n");
534             return -1;
535         }
536     } else {
537         PrintError("Invalid VM Class\n");
538         return -1;
539     }
540
541     return 0;
542 }
543
544
545 int v3_deinit_vmx_vmcs(struct guest_info * core) {
546     struct vmx_data * vmx_state = core->vmm_data;
547
548     V3_FreePages((void *)(vmx_state->vmcs_ptr_phys), 1);
549     V3_FreePages(vmx_state->msr_area, 1);
550
551     V3_Free(vmx_state);
552
553     return 0;
554 }
555
556
557 static int update_irq_exit_state(struct guest_info * info) {
558     struct vmx_exit_idt_vec_info idt_vec_info;
559
560     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
561
562     if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 0)) {
563 #ifdef CONFIG_DEBUG_INTERRUPTS
564         V3_Print("Calling v3_injecting_intr\n");
565 #endif
566         info->intr_core_state.irq_started = 0;
567         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
568     }
569
570     return 0;
571 }
572
573 static int update_irq_entry_state(struct guest_info * info) {
574     struct vmx_exit_idt_vec_info idt_vec_info;
575     struct vmcs_interrupt_state intr_core_state;
576     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
577
578     check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value));
579     check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_core_state));
580
581     /* Check for pending exceptions to inject */
582     if (v3_excp_pending(info)) {
583         struct vmx_entry_int_info int_info;
584         int_info.value = 0;
585
586         // In VMX, almost every exception is hardware
587         // Software exceptions are pretty much only for breakpoint or overflow
588         int_info.type = 3;
589         int_info.vector = v3_get_excp_number(info);
590
591         if (info->excp_state.excp_error_code_valid) {
592             check_vmcs_write(VMCS_ENTRY_EXCP_ERR, info->excp_state.excp_error_code);
593             int_info.error_code = 1;
594
595 #ifdef CONFIG_DEBUG_INTERRUPTS
596             V3_Print("Injecting exception %d with error code %x\n", 
597                     int_info.vector, info->excp_state.excp_error_code);
598 #endif
599         }
600
601         int_info.valid = 1;
602 #ifdef CONFIG_DEBUG_INTERRUPTS
603         V3_Print("Injecting exception %d (EIP=%p)\n", int_info.vector, (void *)(addr_t)info->rip);
604 #endif
605         check_vmcs_write(VMCS_ENTRY_INT_INFO, int_info.value);
606
607         v3_injecting_excp(info, int_info.vector);
608
609     } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && 
610                (intr_core_state.val == 0)) {
611        
612         if ((info->intr_core_state.irq_started == 1) && (idt_vec_info.valid == 1)) {
613
614 #ifdef CONFIG_DEBUG_INTERRUPTS
615             V3_Print("IRQ pending from previous injection\n");
616 #endif
617
618             // Copy the IDT vectoring info over to reinject the old interrupt
619             if (idt_vec_info.error_code == 1) {
620                 uint32_t err_code = 0;
621
622                 check_vmcs_read(VMCS_IDT_VECTOR_ERR, &err_code);
623                 check_vmcs_write(VMCS_ENTRY_EXCP_ERR, err_code);
624             }
625
626             idt_vec_info.undef = 0;
627             check_vmcs_write(VMCS_ENTRY_INT_INFO, idt_vec_info.value);
628
629         } else {
630             struct vmx_entry_int_info ent_int;
631             ent_int.value = 0;
632
633             switch (v3_intr_pending(info)) {
634                 case V3_EXTERNAL_IRQ: {
635                     info->intr_core_state.irq_vector = v3_get_intr(info); 
636                     ent_int.vector = info->intr_core_state.irq_vector;
637                     ent_int.type = 0;
638                     ent_int.error_code = 0;
639                     ent_int.valid = 1;
640
641 #ifdef CONFIG_DEBUG_INTERRUPTS
642                     V3_Print("Injecting Interrupt %d at exit %u(EIP=%p)\n", 
643                                info->intr_core_state.irq_vector, 
644                                (uint32_t)info->num_exits, 
645                                (void *)(addr_t)info->rip);
646 #endif
647
648                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
649                     info->intr_core_state.irq_started = 1;
650
651                     break;
652                 }
653                 case V3_NMI:
654                     PrintDebug("Injecting NMI\n");
655
656                     ent_int.type = 2;
657                     ent_int.vector = 2;
658                     ent_int.valid = 1;
659                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
660
661                     break;
662                 case V3_SOFTWARE_INTR:
663                     PrintDebug("Injecting software interrupt\n");
664                     ent_int.type = 4;
665
666                     ent_int.valid = 1;
667                     check_vmcs_write(VMCS_ENTRY_INT_INFO, ent_int.value);
668
669                     break;
670                 case V3_VIRTUAL_IRQ:
671                     // Not sure what to do here, Intel doesn't have virtual IRQs
672                     // May be the same as external interrupts/IRQs
673
674                     break;
675                 case V3_INVALID_INTR:
676                 default:
677                     break;
678             }
679         }
680     } else if ((v3_intr_pending(info)) && (vmx_info->pri_proc_ctrls.int_wndw_exit == 0)) {
681         // Enable INTR window exiting so we know when IF=1
682         uint32_t instr_len;
683
684         check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
685
686 #ifdef CONFIG_DEBUG_INTERRUPTS
687         V3_Print("Enabling Interrupt-Window exiting: %d\n", instr_len);
688 #endif
689
690         vmx_info->pri_proc_ctrls.int_wndw_exit = 1;
691         check_vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
692     }
693
694
695     return 0;
696 }
697
698
699
700 static struct vmx_exit_info exit_log[10];
701
702 static void print_exit_log(struct guest_info * info) {
703     int cnt = info->num_exits % 10;
704     int i = 0;
705     
706
707     V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits);
708
709     for (i = 0; i < 10; i++) {
710         struct vmx_exit_info * tmp = &exit_log[cnt];
711
712         V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason);
713         V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual);
714         V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info);
715         V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err);
716         V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info);
717
718         cnt--;
719
720         if (cnt == -1) {
721             cnt = 9;
722         }
723
724     }
725
726 }
727
728 /* 
729  * CAUTION and DANGER!!! 
730  * 
731  * The VMCS CANNOT(!!) be accessed outside of the cli/sti calls inside this function
732  * When exectuing a symbiotic call, the VMCS WILL be overwritten, so any dependencies 
733  * on its contents will cause things to break. The contents at the time of the exit WILL 
734  * change before the exit handler is executed.
735  */
736 int v3_vmx_enter(struct guest_info * info) {
737     int ret = 0;
738     uint32_t tsc_offset_low, tsc_offset_high;
739     struct vmx_exit_info exit_info;
740     struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
741
742     // Conditionally yield the CPU if the timeslice has expired
743     v3_yield_cond(info);
744
745     // Perform any additional yielding needed for time adjustment
746     v3_adjust_time(info);
747
748     // Update timer devices prior to entering VM.
749     v3_update_timers(info);
750
751     // disable global interrupts for vm state transition
752     v3_disable_ints();
753
754
755     if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) {
756         vmcs_load(vmx_info->vmcs_ptr_phys);
757         active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
758     }
759
760
761     v3_vmx_restore_vmcs(info);
762
763
764 #ifdef CONFIG_SYMCALL
765     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
766         update_irq_entry_state(info);
767     }
768 #else 
769     update_irq_entry_state(info);
770 #endif
771
772     {
773         addr_t guest_cr3;
774         vmcs_read(VMCS_GUEST_CR3, &guest_cr3);
775         vmcs_write(VMCS_GUEST_CR3, guest_cr3);
776     }
777
778     // Perform last-minute time bookkeeping prior to entering the VM
779     v3_time_enter_vm(info);
780
781     tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
782     tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
783     check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
784     check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
785
786     if (v3_update_vmcs_host_state(info)) {
787         v3_enable_ints();
788         PrintError("Could not write host state\n");
789         return -1;
790     }
791
792
793     if (vmx_info->state == VMX_UNLAUNCHED) {
794         vmx_info->state = VMX_LAUNCHED;
795         info->vm_info->run_state = VM_RUNNING;
796         ret = v3_vmx_launch(&(info->vm_regs), info, &(info->ctrl_regs));
797     } else {
798         V3_ASSERT(vmx_info->state != VMX_UNLAUNCHED);
799         ret = v3_vmx_resume(&(info->vm_regs), info, &(info->ctrl_regs));
800     }
801     
802     //  PrintDebug("VMX Exit: ret=%d\n", ret);
803
804     if (ret != VMX_SUCCESS) {
805         uint32_t error = 0;
806
807         vmcs_read(VMCS_INSTR_ERR, &error);
808
809         v3_enable_ints();
810
811         PrintError("VMENTRY Error: %d\n", error);
812         return -1;
813     }
814
815     // Immediate exit from VM time bookkeeping
816     v3_time_exit_vm(info);
817
818     info->num_exits++;
819
820     /* Update guest state */
821     v3_vmx_save_vmcs(info);
822
823     // info->cpl = info->segments.cs.selector & 0x3;
824
825     info->mem_mode = v3_get_vm_mem_mode(info);
826     info->cpu_mode = v3_get_vm_cpu_mode(info);
827
828
829     check_vmcs_read(VMCS_EXIT_INSTR_LEN, &(exit_info.instr_len));
830     check_vmcs_read(VMCS_EXIT_INSTR_INFO, &(exit_info.instr_info));
831     check_vmcs_read(VMCS_EXIT_REASON, &(exit_info.exit_reason));
832     check_vmcs_read(VMCS_EXIT_QUAL, &(exit_info.exit_qual));
833     check_vmcs_read(VMCS_EXIT_INT_INFO, &(exit_info.int_info));
834     check_vmcs_read(VMCS_EXIT_INT_ERR, &(exit_info.int_err));
835     check_vmcs_read(VMCS_GUEST_LINEAR_ADDR, &(exit_info.guest_linear_addr));
836
837     if (info->shdw_pg_mode == NESTED_PAGING) {
838         check_vmcs_read(VMCS_GUEST_PHYS_ADDR, &(exit_info.ept_fault_addr));
839     }
840
841     //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual);
842
843     exit_log[info->num_exits % 10] = exit_info;
844
845
846 #ifdef CONFIG_SYMCALL
847     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
848         update_irq_exit_state(info);
849     }
850 #else
851     update_irq_exit_state(info);
852 #endif
853
854     if (exit_info.exit_reason == VMEXIT_INTR_WINDOW) {
855         // This is a special case whose only job is to inject an interrupt
856         vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
857         vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
858         vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
859
860 #ifdef CONFIG_DEBUG_INTERRUPTS
861        V3_Print("Interrupts available again! (RIP=%llx)\n", info->rip);
862 #endif
863     }
864
865     // reenable global interrupts after vm exit
866     v3_enable_ints();
867
868     // Conditionally yield the CPU if the timeslice has expired
869     v3_yield_cond(info);
870
871     if (v3_handle_vmx_exit(info, &exit_info) == -1) {
872         PrintError("Error in VMX exit handler\n");
873         return -1;
874     }
875
876     return 0;
877 }
878
879
880 int v3_start_vmx_guest(struct guest_info * info) {
881
882     PrintDebug("Starting VMX core %u\n", info->cpu_id);
883
884     if (info->cpu_id == 0) {
885         info->core_run_state = CORE_RUNNING;
886         info->vm_info->run_state = VM_RUNNING;
887     } else {
888
889         PrintDebug("VMX core %u: Waiting for core initialization\n", info->cpu_id);
890
891         while (info->core_run_state == CORE_STOPPED) {
892             v3_yield(info);
893             //PrintDebug("VMX core %u: still waiting for INIT\n",info->cpu_id);
894         }
895         
896         PrintDebug("VMX core %u initialized\n", info->cpu_id);
897     }
898
899
900     PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
901                info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
902                info->segments.cs.limit, (void *)(info->rip));
903
904
905     PrintDebug("VMX core %u: Launching VMX VM\n", info->cpu_id);
906
907     v3_start_time(info);
908
909     while (1) {
910
911         if (info->vm_info->run_state == VM_STOPPED) {
912             info->core_run_state = CORE_STOPPED;
913             break;
914         }
915
916         if (v3_vmx_enter(info) == -1) {
917             v3_print_vmcs();
918             print_exit_log(info);
919             return -1;
920         }
921
922
923
924         if (info->vm_info->run_state == VM_STOPPED) {
925             info->core_run_state = CORE_STOPPED;
926             break;
927         }
928 /*
929         if ((info->num_exits % 5000) == 0) {
930             V3_Print("VMX Exit number %d\n", (uint32_t)info->num_exits);
931         }
932 */
933
934     }
935
936     return 0;
937 }
938
939
940
941
942 #define VMX_FEATURE_CONTROL_MSR     0x0000003a
943 #define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
944 #define CPUID_1_ECX_VTXFLAG 0x00000020
945
946 int v3_is_vmx_capable() {
947     v3_msr_t feature_msr;
948     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
949
950     v3_cpuid(0x1, &eax, &ebx, &ecx, &edx);
951
952     PrintDebug("ECX: 0x%x\n", ecx);
953
954     if (ecx & CPUID_1_ECX_VTXFLAG) {
955         v3_get_msr(VMX_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
956         
957         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
958
959         if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
960             PrintDebug("VMX is locked -- enable in the BIOS\n");
961             return 0;
962         }
963
964     } else {
965         PrintDebug("VMX not supported on this cpu\n");
966         return 0;
967     }
968
969     return 1;
970 }
971
972
973
974
975
976
977 void v3_init_vmx_cpu(int cpu_id) {
978
979     if (cpu_id == 0) {
980         if (v3_init_vmx_hw(&hw_info) == -1) {
981             PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
982             return;
983         }
984     }
985
986     enable_vmx();
987
988
989     // Setup VMXON Region
990     host_vmcs_ptrs[cpu_id] = allocate_vmcs();
991
992     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);
993
994     if (vmx_on(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
995         PrintDebug("VMX Enabled\n");
996     } else {
997         PrintError("VMX initialization failure\n");
998         return;
999     }
1000     
1001
1002     {
1003         struct vmx_sec_proc_ctrls sec_proc_ctrls;
1004         sec_proc_ctrls.value = v3_vmx_get_ctrl_features(&(hw_info.sec_proc_ctrls));
1005         
1006         if (sec_proc_ctrls.enable_ept == 0) {
1007             V3_Print("VMX EPT (Nested) Paging not supported\n");
1008             v3_cpu_types[cpu_id] = V3_VMX_CPU;
1009         } else if (sec_proc_ctrls.unrstrct_guest == 0) {
1010             V3_Print("VMX EPT (Nested) Paging supported\n");
1011             v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
1012         } else {
1013             V3_Print("VMX EPT (Nested) Paging + Unrestricted guest supported\n");
1014             v3_cpu_types[cpu_id] = V3_VMX_EPT_UG_CPU;
1015         }
1016     }
1017 }
1018
1019
1020 void v3_deinit_vmx_cpu(int cpu_id) {
1021     extern v3_cpu_arch_t v3_cpu_types[];
1022     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1023     V3_FreePages((void *)host_vmcs_ptrs[cpu_id], 1);
1024 }