2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_mem.h>
21 #include <palacios/vmm.h>
22 #include <palacios/vmcb.h>
23 #include <palacios/vmm_decoder.h>
24 #include <palacios/vm_guest_mem.h>
25 #include <palacios/vmm_ctrl_regs.h>
26 #include <palacios/vmm_direct_paging.h>
27 #include <palacios/svm.h>
29 #ifndef CONFIG_DEBUG_CTRL_REGS
31 #define PrintDebug(fmt, args...)
35 static int handle_lmsw(struct guest_info * info, struct x86_instr * dec_instr);
36 static int handle_clts(struct guest_info * info, struct x86_instr * dec_instr);
37 static int handle_mov_to_cr0(struct guest_info * info, struct x86_instr * dec_instr);
40 // First Attempt = 494 lines
41 // current = 106 lines
42 int v3_handle_cr0_write(struct guest_info * info) {
45 struct x86_instr dec_instr;
47 if (info->mem_mode == PHYSICAL_MEM) {
48 ret = v3_read_gpa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
50 ret = v3_read_gva_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
53 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
54 PrintError("Could not decode instruction\n");
59 if (dec_instr.op_type == V3_OP_LMSW) {
60 if (handle_lmsw(info, &dec_instr) == -1) {
63 } else if (dec_instr.op_type == V3_OP_MOV2CR) {
64 if (handle_mov_to_cr0(info, &dec_instr) == -1) {
67 } else if (dec_instr.op_type == V3_OP_CLTS) {
68 if (handle_clts(info, &dec_instr) == -1) {
72 PrintError("Unhandled opcode in handle_cr0_write\n");
76 info->rip += dec_instr.instr_length;
84 // The CR0 register only has flags in the low 32 bits
85 // The hardware does a format check to make sure the high bits are zero
86 // Because of this we can ignore the high 32 bits here
87 static int handle_mov_to_cr0(struct guest_info * info, struct x86_instr * dec_instr) {
89 struct cr0_32 * shadow_cr0 = (struct cr0_32 *)&(info->ctrl_regs.cr0);
90 struct cr0_32 * new_cr0 = (struct cr0_32 *)(dec_instr->src_operand.operand);
91 struct cr0_32 * guest_cr0 = (struct cr0_32 *)&(info->shdw_pg_state.guest_cr0);
92 uint_t paging_transition = 0;
94 PrintDebug("MOV2CR0 (MODE=%s)\n", v3_cpu_mode_to_str(info->cpu_mode));
96 PrintDebug("OperandVal = %x, length=%d\n", *(uint_t *)new_cr0, dec_instr->src_operand.size);
98 PrintDebug("Old CR0=%x\n", *(uint_t *)shadow_cr0);
99 PrintDebug("Old Guest CR0=%x\n", *(uint_t *)guest_cr0);
102 // We detect if this is a paging transition
103 if (guest_cr0->pg != new_cr0->pg) {
104 paging_transition = 1;
107 // Guest always sees the value they wrote
108 *guest_cr0 = *new_cr0;
110 // This value must always be set to 1
113 // Set the shadow register to catch non-virtualized flags
114 *shadow_cr0 = *guest_cr0;
116 // Paging is always enabled
119 // Was there a paging transition
120 // Meaning we need to change the page tables
121 if (paging_transition) {
122 if (v3_get_vm_mem_mode(info) == VIRTUAL_MEM) {
124 struct efer_64 * guest_efer = (struct efer_64 *)&(info->shdw_pg_state.guest_efer);
125 struct efer_64 * shadow_efer = (struct efer_64 *)&(info->ctrl_regs.efer);
127 // Check long mode LME to set LME
128 if (guest_efer->lme == 1) {
129 PrintDebug("Enabing Long Mode\n");
132 shadow_efer->lma = 1;
133 shadow_efer->lme = 1;
135 PrintDebug("New EFER %p\n", (void *)*(addr_t *)(shadow_efer));
138 PrintDebug("Activating Shadow Page Tables\n");
140 if (v3_activate_shadow_pt(info) == -1) {
141 PrintError("Failed to activate shadow page tables\n");
148 if (v3_activate_passthrough_pt(info) == -1) {
149 PrintError("Failed to activate passthrough page tables\n");
156 PrintDebug("New Guest CR0=%x\n",*(uint_t *)guest_cr0);
157 PrintDebug("New CR0=%x\n", *(uint_t *)shadow_cr0);
165 static int handle_clts(struct guest_info * info, struct x86_instr * dec_instr) {
167 struct cr0_32 * real_cr0 = (struct cr0_32*)&(info->ctrl_regs.cr0);
171 if (info->shdw_pg_mode == SHADOW_PAGING) {
172 struct cr0_32 * guest_cr0 = (struct cr0_32 *)&(info->shdw_pg_state.guest_cr0);
179 static int handle_lmsw(struct guest_info * info, struct x86_instr * dec_instr) {
180 struct cr0_real * real_cr0 = (struct cr0_real *)&(info->ctrl_regs.cr0);
181 // XED is a mess, and basically reverses the operand order for an LMSW
182 struct cr0_real * new_cr0 = (struct cr0_real *)(dec_instr->dst_operand.operand);
185 PrintDebug("LMSW\n");
187 new_cr0_val = (*(char*)(new_cr0)) & 0x0f;
189 PrintDebug("OperandVal = %x\n", new_cr0_val);
191 // We can just copy the new value through
192 // we don't need to virtualize the lower 4 bits
193 PrintDebug("Old CR0=%x\n", *(uint_t *)real_cr0);
194 *(uchar_t*)real_cr0 &= 0xf0;
195 *(uchar_t*)real_cr0 |= new_cr0_val;
196 PrintDebug("New CR0=%x\n", *(uint_t *)real_cr0);
199 // If Shadow paging is enabled we push the changes to the virtualized copy of cr0
200 if (info->shdw_pg_mode == SHADOW_PAGING) {
201 struct cr0_real * guest_cr0 = (struct cr0_real*)&(info->shdw_pg_state.guest_cr0);
203 PrintDebug("Old Guest CR0=%x\n", *(uint_t *)guest_cr0);
204 *(uchar_t*)guest_cr0 &= 0xf0;
205 *(uchar_t*)guest_cr0 |= new_cr0_val;
206 PrintDebug("New Guest CR0=%x\n", *(uint_t *)guest_cr0);
215 // First attempt = 253 lines
216 // current = 51 lines
217 int v3_handle_cr0_read(struct guest_info * info) {
220 struct x86_instr dec_instr;
222 if (info->mem_mode == PHYSICAL_MEM) {
223 ret = v3_read_gpa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
225 ret = v3_read_gva_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
229 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
230 PrintError("Could not decode instruction\n");
234 if (dec_instr.op_type == V3_OP_MOVCR2) {
235 PrintDebug("MOVCR2 (mode=%s)\n", v3_cpu_mode_to_str(info->cpu_mode));
237 if ((v3_get_vm_cpu_mode(info) == LONG) ||
238 (v3_get_vm_cpu_mode(info) == LONG_32_COMPAT)) {
239 struct cr0_64 * dst_reg = (struct cr0_64 *)(dec_instr.dst_operand.operand);
241 if (info->shdw_pg_mode == SHADOW_PAGING) {
242 struct cr0_64 * guest_cr0 = (struct cr0_64 *)&(info->shdw_pg_state.guest_cr0);
243 *dst_reg = *guest_cr0;
245 struct cr0_64 * shadow_cr0 = (struct cr0_64 *)&(info->ctrl_regs.cr0);
246 *dst_reg = *shadow_cr0;
249 PrintDebug("returned CR0: %p\n", (void *)*(addr_t *)dst_reg);
251 struct cr0_32 * dst_reg = (struct cr0_32 *)(dec_instr.dst_operand.operand);
253 if (info->shdw_pg_mode == SHADOW_PAGING) {
254 struct cr0_32 * guest_cr0 = (struct cr0_32 *)&(info->shdw_pg_state.guest_cr0);
255 *dst_reg = *guest_cr0;
257 struct cr0_32 * shadow_cr0 = (struct cr0_32 *)&(info->ctrl_regs.cr0);
258 *dst_reg = *shadow_cr0;
261 PrintDebug("returned CR0: %x\n", *(uint_t*)dst_reg);
264 } else if (dec_instr.op_type == V3_OP_SMSW) {
265 struct cr0_real * shadow_cr0 = (struct cr0_real *)&(info->ctrl_regs.cr0);
266 struct cr0_real * dst_reg = (struct cr0_real *)(dec_instr.dst_operand.operand);
267 char cr0_val = *(char*)shadow_cr0 & 0x0f;
269 PrintDebug("SMSW\n");
271 // The lower 4 bits of the guest/shadow CR0 are mapped through
272 // We can treat nested and shadow paging the same here
273 *(char *)dst_reg &= 0xf0;
274 *(char *)dst_reg |= cr0_val;
277 PrintError("Unhandled opcode in handle_cr0_read\n");
281 info->rip += dec_instr.instr_length;
289 // First Attempt = 256 lines
290 // current = 65 lines
291 int v3_handle_cr3_write(struct guest_info * info) {
294 struct x86_instr dec_instr;
296 if (info->mem_mode == PHYSICAL_MEM) {
297 ret = v3_read_gpa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
299 ret = v3_read_gva_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
302 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
303 PrintError("Could not decode instruction\n");
307 if (dec_instr.op_type == V3_OP_MOV2CR) {
308 PrintDebug("MOV2CR3 (cpu_mode=%s)\n", v3_cpu_mode_to_str(info->cpu_mode));
310 if (info->shdw_pg_mode == SHADOW_PAGING) {
311 PrintDebug("Old Shadow CR3=%p; Old Guest CR3=%p\n",
312 (void *)(addr_t)(info->ctrl_regs.cr3),
313 (void*)(addr_t)(info->shdw_pg_state.guest_cr3));
316 // We update the guest CR3
317 if (info->cpu_mode == LONG) {
318 struct cr3_64 * new_cr3 = (struct cr3_64 *)(dec_instr.src_operand.operand);
319 struct cr3_64 * guest_cr3 = (struct cr3_64 *)&(info->shdw_pg_state.guest_cr3);
320 *guest_cr3 = *new_cr3;
322 struct cr3_32 * new_cr3 = (struct cr3_32 *)(dec_instr.src_operand.operand);
323 struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.guest_cr3);
324 *guest_cr3 = *new_cr3;
328 // If Paging is enabled in the guest then we need to change the shadow page tables
329 if (info->mem_mode == VIRTUAL_MEM) {
330 if (v3_activate_shadow_pt(info) == -1) {
331 PrintError("Failed to activate 32 bit shadow page table\n");
336 PrintDebug("New Shadow CR3=%p; New Guest CR3=%p\n",
337 (void *)(addr_t)(info->ctrl_regs.cr3),
338 (void*)(addr_t)(info->shdw_pg_state.guest_cr3));
340 } else if (info->shdw_pg_mode == NESTED_PAGING) {
342 // This is just a passthrough operation which we probably don't need here
343 if (info->cpu_mode == LONG) {
344 struct cr3_64 * new_cr3 = (struct cr3_64 *)(dec_instr.src_operand.operand);
345 struct cr3_64 * guest_cr3 = (struct cr3_64 *)&(info->ctrl_regs.cr3);
346 *guest_cr3 = *new_cr3;
348 struct cr3_32 * new_cr3 = (struct cr3_32 *)(dec_instr.src_operand.operand);
349 struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->ctrl_regs.cr3);
350 *guest_cr3 = *new_cr3;
355 PrintError("Unhandled opcode in handle_cr3_write\n");
359 info->rip += dec_instr.instr_length;
366 // first attempt = 156 lines
367 // current = 36 lines
368 int v3_handle_cr3_read(struct guest_info * info) {
371 struct x86_instr dec_instr;
373 if (info->mem_mode == PHYSICAL_MEM) {
374 ret = v3_read_gpa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
376 ret = v3_read_gva_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
379 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
380 PrintError("Could not decode instruction\n");
384 if (dec_instr.op_type == V3_OP_MOVCR2) {
385 PrintDebug("MOVCR32 (mode=%s)\n", v3_cpu_mode_to_str(info->cpu_mode));
387 if (info->shdw_pg_mode == SHADOW_PAGING) {
389 if ((v3_get_vm_cpu_mode(info) == LONG) ||
390 (v3_get_vm_cpu_mode(info) == LONG_32_COMPAT)) {
391 struct cr3_64 * dst_reg = (struct cr3_64 *)(dec_instr.dst_operand.operand);
392 struct cr3_64 * guest_cr3 = (struct cr3_64 *)&(info->shdw_pg_state.guest_cr3);
393 *dst_reg = *guest_cr3;
395 struct cr3_32 * dst_reg = (struct cr3_32 *)(dec_instr.dst_operand.operand);
396 struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.guest_cr3);
397 *dst_reg = *guest_cr3;
400 } else if (info->shdw_pg_mode == NESTED_PAGING) {
402 // This is just a passthrough operation which we probably don't need here
403 if ((v3_get_vm_cpu_mode(info) == LONG) ||
404 (v3_get_vm_cpu_mode(info) == LONG_32_COMPAT)) {
405 struct cr3_64 * dst_reg = (struct cr3_64 *)(dec_instr.dst_operand.operand);
406 struct cr3_64 * guest_cr3 = (struct cr3_64 *)&(info->ctrl_regs.cr3);
407 *dst_reg = *guest_cr3;
409 struct cr3_32 * dst_reg = (struct cr3_32 *)(dec_instr.dst_operand.operand);
410 struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->ctrl_regs.cr3);
411 *dst_reg = *guest_cr3;
416 PrintError("Unhandled opcode in handle_cr3_read\n");
420 info->rip += dec_instr.instr_length;
426 // We don't need to virtualize CR4, all we need is to detect the activation of PAE
427 int v3_handle_cr4_read(struct guest_info * info) {
428 // PrintError("CR4 Read not handled\n");
433 int v3_handle_cr4_write(struct guest_info * info) {
437 struct x86_instr dec_instr;
438 v3_cpu_mode_t cpu_mode = v3_get_vm_cpu_mode(info);
440 if (info->mem_mode == PHYSICAL_MEM) {
441 ret = v3_read_gpa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
443 ret = v3_read_gva_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
446 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
447 PrintError("Could not decode instruction\n");
451 if (dec_instr.op_type != V3_OP_MOV2CR) {
452 PrintError("Invalid opcode in write to CR4\n");
456 // Check to see if we need to flush the tlb
458 if (v3_get_vm_mem_mode(info) == VIRTUAL_MEM) {
459 struct cr4_32 * new_cr4 = (struct cr4_32 *)(dec_instr.src_operand.operand);
460 struct cr4_32 * cr4 = (struct cr4_32 *)&(info->ctrl_regs.cr4);
462 // if pse, pge, or pae have changed while PG (in any mode) is on
463 // the side effect is a TLB flush, which means we need to
464 // toss the current shadow page tables too
467 // TODO - PAE FLAG needs to be special cased
468 if ((cr4->pse != new_cr4->pse) ||
469 (cr4->pge != new_cr4->pge) ||
470 (cr4->pae != new_cr4->pae)) {
471 PrintDebug("Handling PSE/PGE/PAE -> TLBFlush case, flag set\n");
478 if ((cpu_mode == PROTECTED) || (cpu_mode == PROTECTED_PAE)) {
479 struct cr4_32 * new_cr4 = (struct cr4_32 *)(dec_instr.src_operand.operand);
480 struct cr4_32 * cr4 = (struct cr4_32 *)&(info->ctrl_regs.cr4);
482 PrintDebug("OperandVal = %x, length = %d\n", *(uint_t *)new_cr4, dec_instr.src_operand.size);
483 PrintDebug("Old CR4=%x\n", *(uint_t *)cr4);
485 if ((info->shdw_pg_mode == SHADOW_PAGING)) {
486 if (v3_get_vm_mem_mode(info) == PHYSICAL_MEM) {
488 if ((cr4->pae == 0) && (new_cr4->pae == 1)) {
489 PrintDebug("Creating PAE passthrough tables\n");
491 // create 32 bit PAE direct map page table
492 if (v3_reset_passthrough_pts(info) == -1) {
493 PrintError("Could not create 32 bit PAE passthrough pages tables\n");
497 // reset cr3 to new page tables
498 info->ctrl_regs.cr3 = *(addr_t*)&(info->direct_map_pt);
500 } else if ((cr4->pae == 1) && (new_cr4->pae == 0)) {
501 // Create passthrough standard 32bit pagetables
502 PrintError("Switching From PAE to Protected mode not supported\n");
509 PrintDebug("New CR4=%x\n", *(uint_t *)cr4);
511 } else if ((cpu_mode == LONG) || (cpu_mode == LONG_32_COMPAT)) {
512 struct cr4_64 * new_cr4 = (struct cr4_64 *)(dec_instr.src_operand.operand);
513 struct cr4_64 * cr4 = (struct cr4_64 *)&(info->ctrl_regs.cr4);
515 PrintDebug("Old CR4=%p\n", (void *)*(addr_t *)cr4);
516 PrintDebug("New CR4=%p\n", (void *)*(addr_t *)new_cr4);
518 if (new_cr4->pae == 0) {
519 // cannot turn off PAE in long mode GPF the guest
520 PrintError("Cannot disable PAE in long mode, should send GPF\n");
527 PrintError("CR4 write not supported in CPU_MODE: %s\n", v3_cpu_mode_to_str(cpu_mode));
533 PrintDebug("Handling PSE/PGE/PAE -> TLBFlush (doing flush now!)\n");
534 if (v3_activate_shadow_pt(info) == -1) {
535 PrintError("Failed to activate shadow page tables when emulating TLB flush in handling cr4 write\n");
541 info->rip += dec_instr.instr_length;
546 int v3_handle_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * dst, void * priv_data) {
547 PrintDebug("EFER Read HI=%x LO=%x\n", core->shdw_pg_state.guest_efer.hi, core->shdw_pg_state.guest_efer.lo);
549 dst->value = core->shdw_pg_state.guest_efer.value;
556 // TODO: this is a disaster we need to clean this up...
557 int v3_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
558 //struct efer_64 * new_efer = (struct efer_64 *)&(src.value);
559 struct efer_64 * shadow_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
560 struct v3_msr * guest_efer = &(core->shdw_pg_state.guest_efer);
562 PrintDebug("EFER Write\n");
563 PrintDebug("EFER Write Values: HI=%x LO=%x\n", src.hi, src.lo);
564 //PrintDebug("Old EFER=%p\n", (void *)*(addr_t*)(shadow_efer));
566 // We virtualize the guests efer to hide the SVME and LMA bits
567 guest_efer->value = src.value;
570 // Enable/Disable Syscall
571 shadow_efer->sce = src.value & 0x1;
576 int v3_handle_vm_cr_read(struct guest_info * core, uint_t msr, struct v3_msr * dst, void * priv_data) {
577 /* tell the guest that the BIOS disabled SVM, that way it doesn't get
578 * confused by the fact that CPUID reports SVM as available but it still
581 dst->value = SVM_VM_CR_MSR_lock | SVM_VM_CR_MSR_svmdis;
582 PrintDebug("VM_CR Read HI=%x LO=%x\n", dst->hi, dst->lo);
586 int v3_handle_vm_cr_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
587 PrintDebug("VM_CR Write\n");
588 PrintDebug("VM_CR Write Values: HI=%x LO=%x\n", src.hi, src.lo);
590 /* writes to LOCK and SVMDIS are silently ignored (according to the spec),
591 * other writes indicate the guest wants to use some feature we haven't
594 if (src.value & ~(SVM_VM_CR_MSR_lock | SVM_VM_CR_MSR_svmdis)) {
595 PrintDebug("VM_CR write sets unsupported bits: HI=%x LO=%x\n", src.hi, src.lo);