Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Additional VMX support. Bootstrapping code added.
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Peter Dinda <pdinda@northwestern.edu> 
11  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
13  * All rights reserved.
14  *
15  * Author: Peter Dinda <pdinda@northwestern.edu>
16  *         Jack Lange <jarusl@cs.northwestern.edu>
17  *
18  * This is free software.  You are permitted to use,
19  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20  */
21
22
23 #include <palacios/vmx.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmm.h>
26 #include <palacios/vmx_lowlevel.h>
27
28
29 // 
30 // 
31 // CRUFT
32 //
33 //
34
35
36
37 #include <palacios/vmm_util.h>
38 #include <palacios/vmm_string.h>
39 #include <palacios/vmm_ctrl_regs.h>
40
41
42
43 extern int Launch_VM(ullong_t vmcsPtr, uint_t eip);
44
45 #define NUMPORTS 65536
46
47
48 #define VMXASSIST_INFO_PORT   0x0e9
49 #define ROMBIOS_PANIC_PORT    0x400
50 #define ROMBIOS_PANIC_PORT2   0x401
51 #define ROMBIOS_INFO_PORT     0x402
52 #define ROMBIOS_DEBUG_PORT    0x403
53
54
55
56 static uint_t GetLinearIP(struct VM * vm) {
57   if (vm->state == VM_VMXASSIST_V8086_BIOS || vm->state == VM_VMXASSIST_V8086) { 
58     return vm->vmcs.guestStateArea.cs.baseAddr + vm->vmcs.guestStateArea.rip;
59   } else {
60     return vm->vmcs.guestStateArea.rip;
61   }
62 }
63
64
65
66
67 #define MAX_CODE 512
68 #define INSTR_OFFSET_START 17
69 #define NOP_SEQ_LEN        10
70 #define INSTR_OFFSET_END   (INSTR_OFFSET_START + NOP_SEQ_LEN - 1)
71 #define TEMPLATE_CODE_LEN  35
72
73 uint_t oldesp = 0;
74 uint_t myregs = 0;
75
76
77
78 static struct vmcs_data* vmxon_ptr;
79
80
81
82 extern uint_t VMCS_LAUNCH();
83 extern uint_t Init_VMCS_HostState();
84 extern uint_t Init_VMCS_GuestState();
85
86
87
88
89 extern int Get_CR2();
90 extern int vmRunning;
91
92
93
94
95
96 void DecodeCurrentInstruction(struct VM *vm, struct Instruction *inst)
97 {
98   // this is a gruesome hack
99   uint_t address = GetLinearIP(vm);
100   uint_t length = vm->vmcs.exitInfoFields.instrLength;
101   unsigned char *t = (unsigned char *) address;
102
103
104   
105   PrintTrace("DecodeCurrentInstruction: instruction is\n");
106   PrintTraceMemDump(t,length);
107   
108   if (length==3 && t[0]==0x0f && t[1]==0x22 && t[2]==0xc0) { 
109     // mov from eax to cr0
110     // usually used to signal
111     inst->type=VM_MOV_TO_CR0;
112     inst->address=address;
113     inst->size=length;
114     inst->input1=vm->registers.eax;
115     inst->input2=vm->vmcs.guestStateArea.cr0;
116     inst->output=vm->registers.eax;
117     PrintTrace("MOV FROM EAX TO CR0\n");
118   } else {
119     inst->type=VM_UNKNOWN_INST;
120   }
121 }
122
123
124 static void setup_v8086_mode_for_boot(struct guest_info* vm_info)
125 {
126
127     ((struct vmx_data*)vm_info->vmm_data)->state = VMXASSIST_V8086_BIOS;
128     ((struct rflags)info->ctrl_regs.rflags).vm = 1;
129     ((struct rflags)info->ctrl_regs.rflags).iopl = 3;
130
131
132     vm_info->rip = 0xfff0;
133
134     vm_info->segments.cs.selector = 0xf000;
135     vm_info->segments.cs.base = 0xf000<<4;
136     vm_info->segments.cs.limit = 0xffff;
137     vm_info->segments.cs.type = 3;
138     vm_info->segments.cs.system = 1;
139     vm_info->segments.cs.dpl = 3;
140     vm_info->segments.cs.present = 1;
141     vm_info->segments.cs.granularity = 0;
142
143     vm_info->segments.ss.selector = 0x0000;
144     vm_info->segments.ss.base = 0x0000<<4;
145     vm_info->segments.ss.limit = 0xffff;
146     vm_info->segments.ss.type = 3;
147     vm_info->segments.ss.system = 1;
148     vm_info->segments.ss.dpl = 3;
149     vm_info->segments.ss.present = 1;
150     vm_info->segments.ss.granularity = 0;
151
152     vm_info->segments.es.selector = 0x0000;
153     vm_info->segments.es.base = 0x0000<<4;
154     vm_info->segments.es.limit = 0xffff;
155     vm_info->segments.es.type = 3;
156     vm_info->segments.es.system = 1;
157     vm_info->segments.es.dpl = 3;
158     vm_info->segments.es.present = 1;
159     vm_info->segments.es.granularity = 0;
160
161     vm_info->segments.fs.selector = 0x0000;
162     vm_info->segments.fs.base = 0x0000<<4;
163     vm_info->segments.fs.limit = 0xffff;
164     vm_info->segments.fs.type = 3;
165     vm_info->segments.fs.system = 1;
166     vm_info->segments.fs.dpl = 3;
167     vm_info->segments.fs.present = 1;
168     vm_info->segments.fs.granularity = 0;
169
170     vm_info->segments.gs.selector = 0x0000;
171     vm_info->segments.gs.base = 0x0000<<4;
172     vm_info->segments.gs.limit = 0xffff;
173     vm_info->segments.gs.type = 3;
174     vm_info->segments.gs.system = 1;
175     vm_info->segments.gs.dpl = 3;
176     vm_info->segments.gs.present = 1;
177     vm_info->segments.gs.granularity = 0;
178 }
179
180 static void ConfigureExits(struct VM *vm)
181 {
182   CopyOutVMCSExecCtrlFields(&(vm->vmcs.execCtrlFields));
183
184   vm->vmcs.execCtrlFields.pinCtrls |= 0 
185     // EXTERNAL_INTERRUPT_EXITING 
186     | NMI_EXITING;
187   vm->vmcs.execCtrlFields.procCtrls |= 0
188       // INTERRUPT_WINDOWS_EXIT 
189       | USE_TSC_OFFSETTING
190       | HLT_EXITING  
191       | INVLPG_EXITING           
192       | MWAIT_EXITING            
193       | RDPMC_EXITING           
194       | RDTSC_EXITING         
195       | MOVDR_EXITING         
196       | UNCONDITION_IO_EXITING
197       | MONITOR_EXITING       
198       | PAUSE_EXITING         ;
199
200   CopyInVMCSExecCtrlFields(&(vm->vmcs.execCtrlFields));
201   
202   CopyOutVMCSExitCtrlFields(&(vm->vmcs.exitCtrlFields));
203
204   vm->vmcs.exitCtrlFields.exitCtrls |= ACK_IRQ_ON_EXIT;
205   
206   CopyInVMCSExitCtrlFields(&(vm->vmcs.exitCtrlFields));
207
208
209 /*   VMCS_READ(VM_EXIT_CTRLS, &flags); */
210 /*   flags |= ACK_IRQ_ON_EXIT; */
211 /*   VMCS_WRITE(VM_EXIT_CTRLS, &flags); */
212 }
213
214
215 extern int RunVMM();
216 extern int SAFE_VM_LAUNCH();
217
218 int MyLaunch(struct VM *vm)
219 {
220   ullong_t vmcs = (ullong_t)((uint_t) (vm->vmcsregion));
221   uint_t entry_eip = vm->descriptor.entry_ip;
222   uint_t exit_eip = vm->descriptor.exit_eip;
223   uint_t guest_esp = vm->descriptor.guest_esp;
224   uint_t f = 0xffffffff;
225   uint_t tmpReg = 0;
226   int ret;
227   int vmm_ret = 0;
228
229   PrintTrace("Guest ESP: 0x%x (%u)\n", guest_esp, guest_esp);
230
231   exit_eip = (uint_t)RunVMM;
232
233   PrintTrace("Clear\n");
234   VMCS_CLEAR(vmcs);
235   PrintTrace("Load\n");
236   VMCS_LOAD(vmcs);
237
238
239   PrintTrace("VMCS_LINK_PTR\n");
240   VMCS_WRITE(VMCS_LINK_PTR, &f);
241   PrintTrace("VMCS_LINK_PTR_HIGH\n");
242   VMCS_WRITE(VMCS_LINK_PTR_HIGH, &f);
243
244  
245   SetCtrlBitsCorrectly(IA32_VMX_PINBASED_CTLS_MSR, PIN_VM_EXEC_CTRLS);
246   SetCtrlBitsCorrectly(IA32_VMX_PROCBASED_CTLS_MSR, PROC_VM_EXEC_CTRLS);
247   SetCtrlBitsCorrectly(IA32_VMX_EXIT_CTLS_MSR, VM_EXIT_CTRLS);
248   SetCtrlBitsCorrectly(IA32_VMX_ENTRY_CTLS_MSR, VM_ENTRY_CTRLS);
249
250   //
251   //
252   //SetCtrlBitsCorrectly(IA32_something,GUEST_IA32_DEBUGCTL);
253   //SetCtrlBitsCorrectly(IA32_something,GUEST_IA32_DEBUGCTL_HIGH);
254
255
256   /* Host state */
257   PrintTrace("Setting up host state\n");
258   SetCRBitsCorrectly(IA32_VMX_CR0_FIXED0_MSR, IA32_VMX_CR0_FIXED1_MSR, HOST_CR0);
259   SetCRBitsCorrectly(IA32_VMX_CR4_FIXED0_MSR, IA32_VMX_CR4_FIXED1_MSR, HOST_CR4);
260   ret = Init_VMCS_HostState();
261
262   if (ret != VMX_SUCCESS) {
263     if (ret == VMX_FAIL_VALID) {
264       PrintTrace("Init Host state: VMCS FAILED WITH ERROR\n");
265     } else {
266       PrintTrace("Init Host state: Invalid VMCS\n");
267     }
268     return ret;
269   }
270
271   //  PrintTrace("HOST_RIP: %x (%u)\n", exit_eip, exit_eip);
272   VMCS_WRITE(HOST_RIP, &exit_eip);
273
274   /* Guest state */
275   PrintTrace("Setting up guest state\n");
276   PrintTrace("GUEST_RIP: %x (%u)\n", entry_eip, entry_eip);
277   VMCS_WRITE(GUEST_RIP, &entry_eip);
278
279   SetCRBitsCorrectly(IA32_VMX_CR0_FIXED0_MSR, IA32_VMX_CR0_FIXED1_MSR, GUEST_CR0);
280   SetCRBitsCorrectly(IA32_VMX_CR4_FIXED0_MSR, IA32_VMX_CR4_FIXED1_MSR, GUEST_CR4);
281   ret = Init_VMCS_GuestState();
282
283   PrintTrace("InitGuestState returned\n");
284
285   if (ret != VMX_SUCCESS) {
286     if (ret == VMX_FAIL_VALID) {
287       PrintTrace("Init Guest state: VMCS FAILED WITH ERROR\n");
288     } else {
289       PrintTrace("Init Guest state: Invalid VMCS\n");
290     }
291     return ret;
292   }
293   PrintTrace("GUEST_RSP: %x (%u)\n", guest_esp, (uint_t)guest_esp);
294   VMCS_WRITE(GUEST_RSP, &guest_esp);
295
296   //  tmpReg = 0x4100;
297   tmpReg = 0xffffffff;
298   if (VMCS_WRITE(EXCEPTION_BITMAP, &tmpReg) != VMX_SUCCESS) {
299     PrintInfo("Bitmap error\n");
300   }
301
302   ConfigureExits(vm);
303
304   PrintTrace("VMCS_LAUNCH\n");
305
306   vm->state=VM_VMXASSIST_STARTUP;
307
308   vmm_ret = SAFE_VM_LAUNCH();
309
310   PrintTrace("VMM error %d\n", vmm_ret);
311
312   return vmm_ret;
313 }
314
315
316
317   
318 int VMLaunch(struct VMDescriptor *vm) 
319 {
320   VMCS * vmcs = CreateVMCS();
321   int rc;
322
323   ullong_t vmcs_ptr = (ullong_t)((uint_t)vmcs);
324   uint_t top = (vmcs_ptr >> 32) & 0xffffffff;
325   uint_t bottom = (vmcs_ptr) & 0xffffffff;
326
327   theVM.vmcsregion = vmcs;
328   theVM.descriptor = *vm;
329
330   PrintTrace("vmcs_ptr_top=%x vmcs_ptr_bottom=%x, eip=%x\n", top, bottom, vm->entry_ip);
331   rc = MyLaunch(&theVM); // vmcs_ptr, vm->entry_ip, vm->exit_eip, vm->guest_esp);
332   PrintTrace("Returned from MyLaunch();\n");
333   return rc;
334 }
335
336
337
338
339 //
340 //
341 //  END CRUFT
342 //
343 //
344
345
346 // For the 32 bit reserved bit fields 
347 // MB1s are in the low 32 bits, MBZs are in the high 32 bits of the MSR
348 static uint32_t sanitize_bits1(uint32_t msr_num, uint32_t val) {
349     v3_msr_t mask_msr;
350
351     PrintDebug("sanitize_bits1 (MSR:%x)\n", msr_num);
352
353     v3_get_msr(msr_num, &mask_msr.hi, &mask_msr.lo);
354
355     PrintDebug("MSR %x = %x : %x \n", msr_num, msr.hi, msr.lo);
356
357     val &= mask_msr.lo;
358     val &= mask_msr.hi;
359   
360     return val;
361 }
362
363
364 static addr_t sanitize_bits2(uint32_t msr_num0, uint32_t msr_num1, addr_t val) {
365     v3_msr_t msr0, msr1;
366     addr_t msr0_val, msr1_val;
367
368     PrintDebug("sanitize_bits2 (MSR0=%x, MSR1=%x)\n", msr_num0, msr_num1);
369
370     v3_get_msr(msr_num0, &msr0.hi, &msr0.lo);
371     v3_get_msr(msr_num1, &msr1.hi, &msr1.lo);
372   
373     // This generates a mask that is the natural bit width of the CPU
374     msr0_val = msr0.value;
375     msr1_val = msr1.value;
376
377     PrintDebug("MSR %x = %p, %x = %p \n", msr_num0, msr0_val, msr_num1, msr1_val);
378
379     val &= msr0_val;
380     val &= msr1_val;
381
382     return val;
383 }
384
385
386
387 static vmcs_data* allocate_vmcs() {
388     reg_ex_t msr;
389     vmcs_data* vmcs_page = (vmcs_data*)V3_VAddr(V3_AllocPages(1));
390
391     memset(vmcs_page, 0, 4096);
392
393     v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
394     
395     vmcs_page->revision = ((struct vmx_basic_msr)msr).revision;
396
397     return vmcs_page;
398 }
399
400
401
402 static void init_vmcs_bios(struct guest_info * vm_info) 
403 {
404
405
406 }
407
408
409
410 static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config_ptr) {
411     v3_pre_config_guest(info, config_ptr);
412
413     struct vmx_data* data;
414
415     PrintDebug("Allocating vmx_data\n");
416     data = (struct vmx_data*)V3_Malloc(sizeof(struct vmx_data));
417     PrintDebug("Allocating VMCS\n");
418     data->vmcs = allocate_vmcs();
419
420     info->vmm_data = (void*)data;
421
422     PrintDebug("Initializing VMCS (addr=%p)\n", info->vmm_data);
423     init_vmcs_bios((struct vmx_data*)(info->vmm_data), info);
424
425     v3_post_config_guest(info, config_ptr);
426
427     return 0;
428 }
429
430
431
432
433 static int start_vmx_guest(struct guest_info *info) {
434     struct vmx_data* vmx_data = (struct vmx_data*)info->vmm_data;
435     int vmx_ret;
436
437     // Have to do a whole lot of flag setting here
438     vmx_ret = vmcs_clear(vmx_data->vmcs);
439     if(vmx_ret != VMX_SUCCESS) {
440         PrintDebug("VMCS Clear failed\n");
441         return -1;
442     }
443     vmx_ret = vmcs_load(vmx_data->vmcs);
444     if(vmx_ret != VMX_SUCCESS) {
445         PrintDebug("Executing VMPTRLD\n");
446         return -1;
447     }
448
449     // Setup guest state
450     return -1;
451 }
452
453
454
455
456
457
458 int v3_is_vmx_capable() {
459     uint_t ret;
460     v3_msr_t feature_msr;
461     addr_t eax = 0, ebx = 0, ecx = 0, edx = 0;
462
463     v3_cpuid(CPUID_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
464
465     if (ecx & CPUID_1_ECX_VTXFLAG) {
466         v3_get_msr(IA32_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
467         
468         PrintTrace("MSRREGlow: 0x%.8x\n", feature_msr.lo);
469
470         if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
471             PrintDebug("VMX is locked -- enable in the BIOS\n");
472             return 0;
473         }
474
475     } else {
476         PrintDebug("VMX not supported on this cpu\n");
477         return 0;
478     }
479
480     return 1;
481 }
482
483 static int has_vmx_nested_paging() {
484     return 0;
485 }
486
487
488
489 // We set up the global host state that is unlikely to change across processes here
490 // Segment Descriptors mainly
491
492 struct seg_descriptor {
493
494 };
495
496
497 static int setup_base_host_state() {
498     uint8_t gdt[10];
499     
500
501
502     //   vmwrite(HOST_IDTR_BASE, 
503
504
505 }
506
507
508
509 void v3_init_vmx(struct v3_ctrl_ops * vm_ops) {
510     v3_msr_t basic_msr;
511
512     
513     __asm__ __volatile__ (
514                           "movl %%cr4, %%ebx; "
515                           "orl  %%ebx, 0x00002000; "
516                           "movl %%ebx, %%cr4"
517                           );
518
519
520
521     // Should check and return Error here.... 
522     __asm__ __volatile__ (
523                           "movl %%cr0, %%ebx; "
524                           "orl  %%ebx, 0x00000020; "
525                           "movl %%ebx, %%cr0"
526                           );
527
528     // Setup VMXON Region
529     vmxon_ptr = allocate_vmcs();
530     PrintDebug("VMX revision: 0x%p\n", (void*)vmxon_ptr);
531
532     if (v3_enable_vmx(vmxon_ptr) == 0) {
533         PrintDebug("VMX Enabled\n");
534     } else {
535         PrintError("VMX initialization failure\n");
536         return;
537     }
538         
539
540     if (has_vmx_nested_paging() == 1) {
541         v3_cpu_type = V3_VMX_EPT_CPU;
542     } else {
543         v3_cpu_type = V3_VMX_CPU;
544     }
545
546     // Setup the VMX specific vmm operations
547     vmm_ops->init_guest = &init_vmx_guest;
548     vmm_ops->start_guest = &start_vmx_guest;
549     vmm_ops->has_nested_paging = &has_vmx_nested_paging;
550
551 }