Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


vmx refactoring
[palacios.git] / palacios / src / palacios / vmx.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Peter Dinda <pdinda@northwestern.edu> 
11  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
13  * All rights reserved.
14  *
15  * Author: Peter Dinda <pdinda@northwestern.edu>
16  *         Jack Lange <jarusl@cs.northwestern.edu>
17  *
18  * This is free software.  You are permitted to use,
19  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20  */
21
22
23 #include <palacios/vmx.h>
24 #include <palacios/vmcs.h>
25 #include <palacios/vmm.h>
26 #include <palacios/vmx_lowlevel.h>
27
28
29 // 
30 // 
31 // CRUFT
32 //
33 //
34
35
36
37 #include <palacios/vmm_util.h>
38 #include <palacios/vmm_string.h>
39 #include <palacios/vmm_ctrl_regs.h>
40
41
42
43 extern int Launch_VM(ullong_t vmcsPtr, uint_t eip);
44
45 #define NUMPORTS 65536
46
47
48 #define VMXASSIST_INFO_PORT   0x0e9
49 #define ROMBIOS_PANIC_PORT    0x400
50 #define ROMBIOS_PANIC_PORT2   0x401
51 #define ROMBIOS_INFO_PORT     0x402
52 #define ROMBIOS_DEBUG_PORT    0x403
53
54
55
56 static uint_t GetLinearIP(struct VM * vm) {
57   if (vm->state == VM_VMXASSIST_V8086_BIOS || vm->state == VM_VMXASSIST_V8086) { 
58     return vm->vmcs.guestStateArea.cs.baseAddr + vm->vmcs.guestStateArea.rip;
59   } else {
60     return vm->vmcs.guestStateArea.rip;
61   }
62 }
63
64
65
66
67 #define MAX_CODE 512
68 #define INSTR_OFFSET_START 17
69 #define NOP_SEQ_LEN        10
70 #define INSTR_OFFSET_END   (INSTR_OFFSET_START + NOP_SEQ_LEN - 1)
71 #define TEMPLATE_CODE_LEN  35
72
73 uint_t oldesp = 0;
74 uint_t myregs = 0;
75
76
77
78
79
80
81 extern uint_t VMCS_LAUNCH();
82 extern uint_t Init_VMCS_HostState();
83 extern uint_t Init_VMCS_GuestState();
84
85
86
87
88 extern int Get_CR2();
89 extern int vmRunning;
90
91
92
93
94
95 void DecodeCurrentInstruction(struct VM *vm, struct Instruction *inst)
96 {
97   // this is a gruesome hack
98   uint_t address = GetLinearIP(vm);
99   uint_t length = vm->vmcs.exitInfoFields.instrLength;
100   unsigned char *t = (unsigned char *) address;
101
102
103   
104   PrintTrace("DecodeCurrentInstruction: instruction is\n");
105   PrintTraceMemDump(t,length);
106   
107   if (length==3 && t[0]==0x0f && t[1]==0x22 && t[2]==0xc0) { 
108     // mov from eax to cr0
109     // usually used to signal
110     inst->type=VM_MOV_TO_CR0;
111     inst->address=address;
112     inst->size=length;
113     inst->input1=vm->registers.eax;
114     inst->input2=vm->vmcs.guestStateArea.cr0;
115     inst->output=vm->registers.eax;
116     PrintTrace("MOV FROM EAX TO CR0\n");
117   } else {
118     inst->type=VM_UNKNOWN_INST;
119   }
120 }
121
122
123 static void setup_v8086_mode_for_boot(struct guest_info* vm_info)
124 {
125
126     ((struct vmx_data*)vm_info->vmm_data)->state = VMXASSIST_V8086_BIOS;
127     ((struct rflags)info->ctrl_regs.rflags).vm = 1;
128     ((struct rflags)info->ctrl_regs.rflags).iopl = 3;
129
130
131     vm_info->rip = 0xfff0;
132
133     vm_info->segments.cs.selector = 0xf000;
134     vm_info->segments.cs.base = 0xf000<<4;
135     vm_info->segments.cs.limit = 0xffff;
136     vm_info->segments.cs.type = 3;
137     vm_info->segments.cs.system = 1;
138     vm_info->segments.cs.dpl = 3;
139     vm_info->segments.cs.present = 1;
140     vm_info->segments.cs.granularity = 0;
141
142     vm_info->segments.ss.selector = 0x0000;
143     vm_info->segments.ss.base = 0x0000<<4;
144     vm_info->segments.ss.limit = 0xffff;
145     vm_info->segments.ss.type = 3;
146     vm_info->segments.ss.system = 1;
147     vm_info->segments.ss.dpl = 3;
148     vm_info->segments.ss.present = 1;
149     vm_info->segments.ss.granularity = 0;
150
151     vm_info->segments.es.selector = 0x0000;
152     vm_info->segments.es.base = 0x0000<<4;
153     vm_info->segments.es.limit = 0xffff;
154     vm_info->segments.es.type = 3;
155     vm_info->segments.es.system = 1;
156     vm_info->segments.es.dpl = 3;
157     vm_info->segments.es.present = 1;
158     vm_info->segments.es.granularity = 0;
159
160     vm_info->segments.fs.selector = 0x0000;
161     vm_info->segments.fs.base = 0x0000<<4;
162     vm_info->segments.fs.limit = 0xffff;
163     vm_info->segments.fs.type = 3;
164     vm_info->segments.fs.system = 1;
165     vm_info->segments.fs.dpl = 3;
166     vm_info->segments.fs.present = 1;
167     vm_info->segments.fs.granularity = 0;
168
169     vm_info->segments.gs.selector = 0x0000;
170     vm_info->segments.gs.base = 0x0000<<4;
171     vm_info->segments.gs.limit = 0xffff;
172     vm_info->segments.gs.type = 3;
173     vm_info->segments.gs.system = 1;
174     vm_info->segments.gs.dpl = 3;
175     vm_info->segments.gs.present = 1;
176     vm_info->segments.gs.granularity = 0;
177 }
178
179 static void ConfigureExits(struct VM *vm)
180 {
181   CopyOutVMCSExecCtrlFields(&(vm->vmcs.execCtrlFields));
182
183   vm->vmcs.execCtrlFields.pinCtrls |= 0 
184     // EXTERNAL_INTERRUPT_EXITING 
185     | NMI_EXITING;
186   vm->vmcs.execCtrlFields.procCtrls |= 0
187       // INTERRUPT_WINDOWS_EXIT 
188       | USE_TSC_OFFSETTING
189       | HLT_EXITING  
190       | INVLPG_EXITING           
191       | MWAIT_EXITING            
192       | RDPMC_EXITING           
193       | RDTSC_EXITING         
194       | MOVDR_EXITING         
195       | UNCONDITION_IO_EXITING
196       | MONITOR_EXITING       
197       | PAUSE_EXITING         ;
198
199   CopyInVMCSExecCtrlFields(&(vm->vmcs.execCtrlFields));
200   
201   CopyOutVMCSExitCtrlFields(&(vm->vmcs.exitCtrlFields));
202
203   vm->vmcs.exitCtrlFields.exitCtrls |= ACK_IRQ_ON_EXIT;
204   
205   CopyInVMCSExitCtrlFields(&(vm->vmcs.exitCtrlFields));
206
207
208 /*   VMCS_READ(VM_EXIT_CTRLS, &flags); */
209 /*   flags |= ACK_IRQ_ON_EXIT; */
210 /*   VMCS_WRITE(VM_EXIT_CTRLS, &flags); */
211 }
212
213
214 extern int RunVMM();
215 extern int SAFE_VM_LAUNCH();
216
217 int MyLaunch(struct VM *vm)
218 {
219   ullong_t vmcs = (ullong_t)((uint_t) (vm->vmcsregion));
220   uint_t entry_eip = vm->descriptor.entry_ip;
221   uint_t exit_eip = vm->descriptor.exit_eip;
222   uint_t guest_esp = vm->descriptor.guest_esp;
223   uint_t f = 0xffffffff;
224   uint_t tmpReg = 0;
225   int ret;
226   int vmm_ret = 0;
227
228   PrintTrace("Guest ESP: 0x%x (%u)\n", guest_esp, guest_esp);
229
230   exit_eip = (uint_t)RunVMM;
231
232   PrintTrace("Clear\n");
233   VMCS_CLEAR(vmcs);
234   PrintTrace("Load\n");
235   VMCS_LOAD(vmcs);
236
237
238   PrintTrace("VMCS_LINK_PTR\n");
239   VMCS_WRITE(VMCS_LINK_PTR, &f);
240   PrintTrace("VMCS_LINK_PTR_HIGH\n");
241   VMCS_WRITE(VMCS_LINK_PTR_HIGH, &f);
242
243  
244   SetCtrlBitsCorrectly(IA32_VMX_PINBASED_CTLS_MSR, PIN_VM_EXEC_CTRLS);
245   SetCtrlBitsCorrectly(IA32_VMX_PROCBASED_CTLS_MSR, PROC_VM_EXEC_CTRLS);
246   SetCtrlBitsCorrectly(IA32_VMX_EXIT_CTLS_MSR, VM_EXIT_CTRLS);
247   SetCtrlBitsCorrectly(IA32_VMX_ENTRY_CTLS_MSR, VM_ENTRY_CTRLS);
248
249   //
250   //
251   //SetCtrlBitsCorrectly(IA32_something,GUEST_IA32_DEBUGCTL);
252   //SetCtrlBitsCorrectly(IA32_something,GUEST_IA32_DEBUGCTL_HIGH);
253
254
255   /* Host state */
256   PrintTrace("Setting up host state\n");
257   SetCRBitsCorrectly(IA32_VMX_CR0_FIXED0_MSR, IA32_VMX_CR0_FIXED1_MSR, HOST_CR0);
258   SetCRBitsCorrectly(IA32_VMX_CR4_FIXED0_MSR, IA32_VMX_CR4_FIXED1_MSR, HOST_CR4);
259   ret = Init_VMCS_HostState();
260
261   if (ret != VMX_SUCCESS) {
262     if (ret == VMX_FAIL_VALID) {
263       PrintTrace("Init Host state: VMCS FAILED WITH ERROR\n");
264     } else {
265       PrintTrace("Init Host state: Invalid VMCS\n");
266     }
267     return ret;
268   }
269
270   //  PrintTrace("HOST_RIP: %x (%u)\n", exit_eip, exit_eip);
271   VMCS_WRITE(HOST_RIP, &exit_eip);
272
273   /* Guest state */
274   PrintTrace("Setting up guest state\n");
275   PrintTrace("GUEST_RIP: %x (%u)\n", entry_eip, entry_eip);
276   VMCS_WRITE(GUEST_RIP, &entry_eip);
277
278   SetCRBitsCorrectly(IA32_VMX_CR0_FIXED0_MSR, IA32_VMX_CR0_FIXED1_MSR, GUEST_CR0);
279   SetCRBitsCorrectly(IA32_VMX_CR4_FIXED0_MSR, IA32_VMX_CR4_FIXED1_MSR, GUEST_CR4);
280   ret = Init_VMCS_GuestState();
281
282   PrintTrace("InitGuestState returned\n");
283
284   if (ret != VMX_SUCCESS) {
285     if (ret == VMX_FAIL_VALID) {
286       PrintTrace("Init Guest state: VMCS FAILED WITH ERROR\n");
287     } else {
288       PrintTrace("Init Guest state: Invalid VMCS\n");
289     }
290     return ret;
291   }
292   PrintTrace("GUEST_RSP: %x (%u)\n", guest_esp, (uint_t)guest_esp);
293   VMCS_WRITE(GUEST_RSP, &guest_esp);
294
295   //  tmpReg = 0x4100;
296   tmpReg = 0xffffffff;
297   if (VMCS_WRITE(EXCEPTION_BITMAP, &tmpReg) != VMX_SUCCESS) {
298     PrintInfo("Bitmap error\n");
299   }
300
301   ConfigureExits(vm);
302
303   PrintTrace("VMCS_LAUNCH\n");
304
305   vm->state=VM_VMXASSIST_STARTUP;
306
307   vmm_ret = SAFE_VM_LAUNCH();
308
309   PrintTrace("VMM error %d\n", vmm_ret);
310
311   return vmm_ret;
312 }
313
314
315
316   
317 int VMLaunch(struct VMDescriptor *vm) 
318 {
319   VMCS * vmcs = CreateVMCS();
320   int rc;
321
322   ullong_t vmcs_ptr = (ullong_t)((uint_t)vmcs);
323   uint_t top = (vmcs_ptr >> 32) & 0xffffffff;
324   uint_t bottom = (vmcs_ptr) & 0xffffffff;
325
326   theVM.vmcsregion = vmcs;
327   theVM.descriptor = *vm;
328
329   PrintTrace("vmcs_ptr_top=%x vmcs_ptr_bottom=%x, eip=%x\n", top, bottom, vm->entry_ip);
330   rc = MyLaunch(&theVM); // vmcs_ptr, vm->entry_ip, vm->exit_eip, vm->guest_esp);
331   PrintTrace("Returned from MyLaunch();\n");
332   return rc;
333 }
334
335
336
337
338 //
339 //
340 //  END CRUFT
341 //
342 //
343
344
345 // For the 32 bit reserved bit fields 
346 // MB1s are in the low 32 bits, MBZs are in the high 32 bits of the MSR
347 static uint32_t sanitize_bits1(uint32_t msr_num, uint32_t val) {
348     v3_msr_t mask_msr;
349
350     PrintDebug("sanitize_bits1 (MSR:%x)\n", msr_num);
351
352     v3_get_msr(msr_num, &mask_msr.hi, &mask_msr.lo);
353
354     PrintDebug("MSR %x = %x : %x \n", msr_num, msr.hi, msr.lo);
355
356     val &= mask_msr.lo;
357     val &= mask_msr.hi;
358   
359     return val;
360 }
361
362
363 static addr_t sanitize_bits2(uint32_t msr_num0, uint32_t msr_num1, addr_t val) {
364     v3_msr_t msr0, msr1;
365     addr_t msr0_val, msr1_val;
366
367     PrintDebug("sanitize_bits2 (MSR0=%x, MSR1=%x)\n", msr_num0, msr_num1);
368
369     v3_get_msr(msr_num0, &msr0.hi, &msr0.lo);
370     v3_get_msr(msr_num1, &msr1.hi, &msr1.lo);
371   
372     // This generates a mask that is the natural bit width of the CPU
373     msr0_val = msr0.value;
374     msr1_val = msr1.value;
375
376     PrintDebug("MSR %x = %p, %x = %p \n", msr_num0, msr0_val, msr_num1, msr1_val);
377
378     val &= msr0_val;
379     val &= msr1_val;
380
381     return val;
382 }
383
384
385
386 static vmcs_data* allocate_vmcs() {
387     reg_ex_t msr;
388     vmcs_data* vmcs_page = (vmcs_data*)V3_VAddr(V3_AllocPages(1));
389
390     memset(vmcs_page, 0, 4096);
391
392     v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
393     
394     vmcs_page->revision = ((struct vmx_basic_msr)msr).revision;
395
396     return vmcs_page;
397 }
398
399
400
401 static void init_vmcs_bios(vmcs_t * vmcs, struct guest_info * vm_info) {
402
403 }
404
405
406
407 static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config_ptr) {
408     v3_pre_config_guest(info, config_ptr);
409
410     struct vmx_data* data;
411
412     PrintDebug("Allocating vmx_data\n");
413     data = (struct vmx_data*)V3_Malloc(sizeof(vmx_data));
414     PrintDebug("Allocating VMCS\n");
415     data->vmcs = allocate_vmcs();
416
417     info->vmm_data = (void*)data;
418
419     PrintDebug("Initializing VMCS (addr=%p)\n", (void *)info->vmm_data);
420     init_vmcs_bios((vmcs_t *)(info->vmm_data), info);
421
422     v3_post_config_guest(info, config_ptr);
423
424     return 0;
425 }
426
427
428
429
430 static int start_svm_guest(struct guest_info *info) {
431     return -1;
432 }
433
434
435
436
437
438
439 int v3_is_vmx_capable() {
440     uint_t ret;
441     v3_msr_t feature_msr;
442     addr_t eax = 0, ebx = 0, ecx = 0, edx = 0;
443
444     v3_cpuid(CPUID_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
445
446     if (ecx & CPUID_1_ECX_VTXFLAG) {
447         v3_get_msr(IA32_FEATURE_CONTROL_MSR, &(feature_msr.hi), &(feature_msr.lo));
448         
449         PrintTrace("MSRREGlow: 0x%.8x\n", feature_msr.lo);
450
451         if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
452             PrintDebug("VMX is locked -- enable in the BIOS\n");
453             return 0;
454         }
455
456     } else {
457         PrintDebug("VMX not supported on this cpu\n");
458         return 0;
459     }
460
461     return 1;
462 }
463
464 static int has_vmx_nested_paging() {
465     return 0;
466 }
467
468
469
470 // We set up the global host state that is unlikely to change across processes here
471 // Segment Descriptors mainly
472
473 struct seg_descriptor {
474
475 };
476
477
478 static int setup_base_host_state() {
479     uint8_t gdt[10];
480     
481
482
483     //   vmwrite(HOST_IDTR_BASE, 
484
485
486 }
487
488
489
490 void v3_init_vmx(struct v3_ctrl_ops * vm_ops) {
491     v3_msr_t basic_msr;
492
493     // Setup the host state save area
494     void * host_state = V3_AllocPages(1);
495
496     v3_get_msr(VMX_BASIC_MSR, &(basic_msr.hi), &(basic_msr.lo));
497     
498     *(uint32_t *)host_state = ((struct vmx_basic_msr *)basic_msr.value)->revision;
499     
500     PrintDebug("VMX revision: 0x%p\n", host_state);
501
502     __asm__ __volatile__ (
503                           "movl %%cr4, %%ebx; "
504                           "orl  %%ebx, 0x00002000; "
505                           "movl %%ebx, %%cr4"
506                           );
507
508
509
510     // Should check and return Error here.... 
511     __asm__ __volatile__ (
512                           "movl %%cr0, %%ebx; "
513                           "orl  %%ebx, 0x00000020; "
514                           "movl %%ebx, %%cr0"
515                           );
516
517
518     if (v3_enable_vmx(host_state) == 0) {
519         PrintDebug("VMX Enabled\n");
520     } else {
521         PrintError("VMX initialization failure\n");
522         return;
523     }
524         
525
526     if (has_vmx_nested_paging() == 1) {
527         v3_cpu_type = V3_VMX_EPT_CPU;
528     } else {
529         v3_cpu_type = V3_VMX_CPU;
530     }
531
532     // Setup the VMX specific vmm operations
533     vmm_ops->init_guest = &init_vmx_guest;
534     vmm_ops->start_guest = &start_vmx_guest;
535     vmm_ops->has_nested_paging = &has_vmx_nested_paging;
536
537 }