2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_mem.h>
21 #include <palacios/vmm.h>
22 #include <palacios/vmcb.h>
23 #include <palacios/vmm_decoder.h>
24 #include <palacios/vm_guest_mem.h>
25 #include <palacios/vmm_ctrl_regs.h>
29 /* Segmentation is a problem here...
31 * When we get a memory operand, presumably we use the default segment (which is?)
32 * unless an alternate segment was specfied in the prefix...
36 #ifndef DEBUG_CTRL_REGS
38 #define PrintDebug(fmt, args...)
42 static int handle_lmsw(struct guest_info * info, struct x86_instr * dec_instr);
43 static int handle_clts(struct guest_info * info, struct x86_instr * dec_instr);
45 static int handle_mov_to_cr0(struct guest_info * info, struct x86_instr * dec_instr);
46 static int handle_mov_to_cr0_32(struct guest_info * info, struct x86_instr * dec_instr);
47 static int handle_mov_to_cr0_32pae(struct guest_info * info, struct x86_instr * dec_instr);
48 static int handle_mov_to_cr0_64(struct guest_info * info, struct x86_instr * dec_instr);
49 static int handle_mov_to_cr0_64compat(struct guest_info * info, struct x86_instr * dec_instr);
51 static int handle_mov_to_cr3_32(struct guest_info * info, struct x86_instr * dec_instr);
52 static int handle_mov_to_cr3_32pae(struct guest_info * info, struct x86_instr * dec_instr);
53 static int handle_mov_to_cr3_64(struct guest_info * info, struct x86_instr * dec_instr);
54 static int handle_mov_to_cr3_64compat(struct guest_info * info, struct x86_instr * dec_instr);
59 // First Attempt = 494 lines
60 // current = 106 lines
61 int v3_handle_cr0_write(struct guest_info * info) {
64 struct x86_instr dec_instr;
66 if (info->mem_mode == PHYSICAL_MEM) {
67 ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
69 ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
72 /* The IFetch will already have faulted in the necessary bytes for the full instruction
74 // I think we should inject a GPF into the guest
75 PrintError("Could not read instruction (ret=%d)\n", ret);
80 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
81 PrintError("Could not decode instruction\n");
85 if (v3_opcode_cmp(V3_OPCODE_LMSW, (const uchar_t *)(dec_instr.opcode)) == 0) {
87 if (handle_lmsw(info, &dec_instr) == -1) {
91 } else if (v3_opcode_cmp(V3_OPCODE_MOV2CR, (const uchar_t *)(dec_instr.opcode)) == 0) {
93 if (handle_mov_to_cr0(info, &dec_instr) == -1) {
97 } else if (v3_opcode_cmp(V3_OPCODE_CLTS, (const uchar_t *)(dec_instr.opcode)) == 0) {
99 if (handle_clts(info, &dec_instr) == -1) {
104 PrintError("Unhandled opcode in handle_cr0_write\n");
108 info->rip += dec_instr.instr_length;
116 static int handle_mov_to_cr0(struct guest_info * info, struct x86_instr * dec_instr) {
117 PrintDebug("MOV2CR0\n");
119 switch (info->cpu_mode) {
122 return handle_mov_to_cr0_32(info, dec_instr);
124 return handle_mov_to_cr0_32pae(info, dec_instr);
126 return handle_mov_to_cr0_64(info, dec_instr);
128 return handle_mov_to_cr0_64compat(info, dec_instr);
130 PrintError("Invalid CPU Operating Mode: %d\n", info->cpu_mode);
136 static int handle_mov_to_cr0_32pae(struct guest_info * info, struct x86_instr * dec_instr) {
137 PrintError("32 bit PAE mov to CR0 not implemented\n");
141 static int handle_mov_to_cr0_64(struct guest_info * info, struct x86_instr * dec_instr) {
142 PrintError("64 bit mov to CR0 not implemented\n");
146 static int handle_mov_to_cr0_64compat(struct guest_info * info, struct x86_instr * dec_instr) {
147 PrintError("64 bit compatibility mode move to CR0 not implemented\n");
152 static int handle_mov_to_cr0_32(struct guest_info * info, struct x86_instr * dec_instr) {
154 struct cr0_32 * shadow_cr0 = (struct cr0_32 *)&(info->ctrl_regs.cr0);
155 struct cr0_32 * new_cr0 = (struct cr0_32 *)(dec_instr->src_operand.operand);
156 struct cr0_32 * guest_cr0 = (struct cr0_32 *)&(info->shdw_pg_state.guest_cr0);
158 PrintDebug("OperandVal = %x, length=%d\n", *(uint_t *)new_cr0, dec_instr->src_operand.size);
160 PrintDebug("Old CR0=%x\n", *(uint_t *)shadow_cr0);
161 PrintDebug("Old Guest CR0=%x\n", *(uint_t *)guest_cr0);
163 // Guest always sees the value they wrote
164 *guest_cr0 = *new_cr0;
166 // This value must always be set to 1
169 // Set the shadow register to catch non-virtualized flags
170 *shadow_cr0 = *guest_cr0;
173 if (v3_get_mem_mode(info) == VIRTUAL_MEM) {
174 struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.guest_cr3);
175 PrintDebug("Setting up Guest Page Table\n");
176 info->ctrl_regs.cr3 = *(addr_t*)guest_cr3;
178 info->ctrl_regs.cr3 = *(addr_t*)&(info->direct_map_pt);
182 PrintDebug("New Guest CR0=%x\n",*(uint_t *)guest_cr0);
184 PrintDebug("New CR0=%x\n", *(uint_t *)shadow_cr0);
192 static int handle_clts(struct guest_info * info, struct x86_instr * dec_instr) {
194 struct cr0_32 * real_cr0 = (struct cr0_32*)&(info->ctrl_regs.cr0);
198 if (info->shdw_pg_mode == SHADOW_PAGING) {
199 struct cr0_32 * guest_cr0 = (struct cr0_32 *)&(info->shdw_pg_state.guest_cr0);
206 static int handle_lmsw(struct guest_info * info, struct x86_instr * dec_instr) {
207 struct cr0_real * real_cr0 = (struct cr0_real*)&(info->ctrl_regs.cr0);
208 struct cr0_real * new_cr0 = (struct cr0_real *)(dec_instr->src_operand.operand);
211 PrintDebug("LMSW\n");
213 new_cr0_val = (*(char*)(new_cr0)) & 0x0f;
215 PrintDebug("OperandVal = %x\n", new_cr0_val);
217 // We can just copy the new value through
218 // we don't need to virtualize the lower 4 bits
219 PrintDebug("Old CR0=%x\n", *(uint_t *)real_cr0);
220 *(uchar_t*)real_cr0 &= 0xf0;
221 *(uchar_t*)real_cr0 |= new_cr0_val;
222 PrintDebug("New CR0=%x\n", *(uint_t *)real_cr0);
225 // If Shadow paging is enabled we push the changes to the virtualized copy of cr0
226 if (info->shdw_pg_mode == SHADOW_PAGING) {
227 struct cr0_real * guest_cr0 = (struct cr0_real*)&(info->shdw_pg_state.guest_cr0);
229 PrintDebug("Old Guest CR0=%x\n", *(uint_t *)guest_cr0);
230 *(uchar_t*)guest_cr0 &= 0xf0;
231 *(uchar_t*)guest_cr0 |= new_cr0_val;
232 PrintDebug("New Guest CR0=%x\n", *(uint_t *)guest_cr0);
241 // First attempt = 253 lines
242 // current = 51 lines
243 int v3_handle_cr0_read(struct guest_info * info) {
246 struct x86_instr dec_instr;
248 if (info->mem_mode == PHYSICAL_MEM) {
249 ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
251 ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
254 /* The IFetch will already have faulted in the necessary bytes for the full instruction
256 // I think we should inject a GPF into the guest
257 PrintError("Could not read instruction (ret=%d)\n", ret);
262 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
263 PrintError("Could not decode instruction\n");
267 if (v3_opcode_cmp(V3_OPCODE_MOVCR2, (const uchar_t *)(dec_instr.opcode)) == 0) {
268 struct cr0_32 * dst_reg = (struct cr0_32 *)(dec_instr.dst_operand.operand);
269 struct cr0_32 * shadow_cr0 = (struct cr0_32 *)&(info->ctrl_regs.cr0);
271 PrintDebug("MOVCR2\n");
273 if (info->shdw_pg_mode == SHADOW_PAGING) {
274 struct cr0_32 * guest_cr0 = (struct cr0_32 *)&(info->shdw_pg_state.guest_cr0);
275 *dst_reg = *guest_cr0;
277 *dst_reg = *shadow_cr0;
280 PrintDebug("Shadow CR0: %x\n", *(uint_t*)shadow_cr0);
281 PrintDebug("returned CR0: %x\n", *(uint_t*)dst_reg);
282 } else if (v3_opcode_cmp(V3_OPCODE_SMSW, (const uchar_t *)(dec_instr.opcode)) == 0) {
283 struct cr0_real * shadow_cr0 = (struct cr0_real *)&(info->ctrl_regs.cr0);
284 struct cr0_real * dst_reg = (struct cr0_real *)(dec_instr.dst_operand.operand);
285 char cr0_val = *(char*)shadow_cr0 & 0x0f;
287 PrintDebug("SMSW\n");
289 // The lower 4 bits of the guest/shadow CR0 are mapped through
290 // We can treat nested and shadow paging the same here
291 *(char *)dst_reg &= 0xf0;
292 *(char *)dst_reg |= cr0_val;
295 PrintError("Unhandled opcode in handle_cr0_read\n");
299 info->rip += dec_instr.instr_length;
307 // First Attempt = 256 lines
308 // current = 65 lines
309 int v3_handle_cr3_write(struct guest_info * info) {
312 struct x86_instr dec_instr;
314 if (info->mem_mode == PHYSICAL_MEM) {
315 ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
317 ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
320 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
321 PrintError("Could not decode instruction\n");
325 if (v3_opcode_cmp(V3_OPCODE_MOV2CR, (const uchar_t *)(dec_instr.opcode)) == 0) {
326 PrintDebug("MOV2CR3\n");
328 if (info->mem_mode == PHYSICAL_MEM) {
329 // All we do is update the guest CR3
331 if (info->cpu_mode == LONG) {
332 struct cr3_64 * new_cr3 = (struct cr3_64 *)(dec_instr.src_operand.operand);
333 struct cr3_64 * guest_cr3 = (struct cr3_64 *)&(info->shdw_pg_state.guest_cr3);
334 *guest_cr3 = *new_cr3;
336 struct cr3_32 * new_cr3 = (struct cr3_32 *)(dec_instr.src_operand.operand);
337 struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.guest_cr3);
338 *guest_cr3 = *new_cr3;
343 switch (info->cpu_mode) {
345 if (handle_mov_to_cr3_32(info, &dec_instr) == -1) {
349 if (handle_mov_to_cr3_32pae(info, &dec_instr) == -1) {
353 if (handle_mov_to_cr3_64(info, &dec_instr) == -1) {
357 if (handle_mov_to_cr3_64compat(info, &dec_instr) == -1) {
361 PrintError("Unhandled CPU mode: %d\n", info->cpu_mode);
366 PrintError("Unhandled opcode in handle_cr3_write\n");
370 info->rip += dec_instr.instr_length;
381 static int handle_mov_to_cr3_32(struct guest_info * info, struct x86_instr * dec_instr) {
382 PrintDebug("CR3 at 0x%p\n", &(info->ctrl_regs.cr3));
384 if (info->shdw_pg_mode == SHADOW_PAGING) {
385 struct cr3_32 * new_cr3 = (struct cr3_32 *)(dec_instr->src_operand.operand);
386 struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.guest_cr3);
387 // struct cr3_32 * shadow_cr3 = (struct cr3_32 *)&(info->ctrl_regs.cr3);
390 PrintDebug("Old Shadow CR3=%x; Old Guest CR3=%x\n",
391 *(uint_t*)shadow_cr3, *(uint_t*)guest_cr3);
394 // Store the write value to virtualize CR3
395 *guest_cr3 = *new_cr3;
397 if (v3_activate_shadow_pt(info) == -1) {
398 PrintError("Failed to activate 32 bit shadow page table\n");
402 PrintDebug("New Shadow CR3=%x; New Guest CR3=%x\n",
403 *(uint_t*)shadow_cr3, *(uint_t*)guest_cr3);
409 static int handle_mov_to_cr3_32pae(struct guest_info * info, struct x86_instr * dec_instr) {
410 PrintError("32 Bit PAE mode Mov to CR3 not implemented\n");
414 static int handle_mov_to_cr3_64(struct guest_info * info, struct x86_instr * dec_instr) {
415 PrintError("Long mode Mov to CR3 not implemented\n");
419 static int handle_mov_to_cr3_64compat(struct guest_info * info, struct x86_instr * dec_instr) {
420 PrintError("Long compatiblity mode move to CR3 not implemented\n");
426 // first attempt = 156 lines
427 // current = 36 lines
428 int v3_handle_cr3_read(struct guest_info * info) {
431 struct x86_instr dec_instr;
433 if (info->mem_mode == PHYSICAL_MEM) {
434 ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
436 ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
439 /* The IFetch will already have faulted in the necessary bytes for the full instruction
441 // I think we should inject a GPF into the guest
442 PrintError("Could not read instruction (ret=%d)\n", ret);
447 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
448 PrintError("Could not decode instruction\n");
452 if (v3_opcode_cmp(V3_OPCODE_MOVCR2, (const uchar_t *)(dec_instr.opcode)) == 0) {
453 PrintDebug("MOVCR32\n");
454 struct cr3_32 * dst_reg = (struct cr3_32 *)(dec_instr.dst_operand.operand);
456 PrintDebug("CR3 at 0x%p\n", &(info->ctrl_regs.cr3));
458 if (info->shdw_pg_mode == SHADOW_PAGING) {
459 *dst_reg = *(struct cr3_32 *)&(info->shdw_pg_state.guest_cr3);
461 *dst_reg = *(struct cr3_32 *)&(info->ctrl_regs.cr3);
464 PrintError("Unhandled opcode in handle_cr3_read\n");
468 info->rip += dec_instr.instr_length;
473 int v3_handle_cr4_read(struct guest_info * info) {
474 PrintError("CR4 Read not handled\n");
478 int v3_handle_cr4_write(struct guest_info * info) {
481 struct x86_instr dec_instr;
483 if (info->mem_mode == PHYSICAL_MEM) {
484 ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
486 ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
489 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
490 PrintError("Could not decode instruction\n");
494 if (v3_opcode_cmp(V3_OPCODE_MOV2CR, (const uchar_t *)(dec_instr.opcode)) != 0) {
495 PrintError("Invalid opcode in write to CR4\n");
499 if ((info->cpu_mode == PROTECTED) || (info->cpu_mode == PROTECTED_PAE)) {
500 struct cr4_32 * new_cr4 = (struct cr4_32 *)(dec_instr.src_operand.operand);
501 struct cr4_32 * cr4 = (struct cr4_32 *)&(info->ctrl_regs.cr4);
503 PrintDebug("OperandVal = %x, length = %d\n", *(uint_t *)new_cr4, dec_instr.src_operand.size);
504 PrintDebug("Old CR4=%x\n", *(uint_t *)cr4);
506 if ((info->shdw_pg_mode == SHADOW_PAGING) &&
507 (v3_get_mem_mode(info) == PHYSICAL_MEM)) {
509 if ((cr4->pae == 0) && (new_cr4->pae == 1)) {
510 PrintDebug("Creating PAE passthrough tables\n");
512 // Delete the old 32 bit direct map page tables
513 delete_page_tables_32((pde32_t *)V3_VAddr((void *)(info->direct_map_pt)));
515 // create 32 bit PAE direct map page table
516 info->direct_map_pt = (addr_t)V3_PAddr(create_passthrough_pts_32PAE(info));
518 // reset cr3 to new page tables
519 info->ctrl_regs.cr3 = *(addr_t*)&(info->direct_map_pt);
521 } else if ((cr4->pae == 1) && (new_cr4->pae == 0)) {
522 // Create passthrough standard 32bit pagetables
528 PrintDebug("New CR4=%x\n", *(uint_t *)cr4);
531 PrintError("CR4 write not supported in CPU_MODE: %d\n", info->cpu_mode);
535 info->rip += dec_instr.instr_length;
540 int v3_handle_efer_read(uint_t msr, struct v3_msr * dst, void * priv_data) {
541 struct guest_info * info = (struct guest_info *)(priv_data);
542 PrintDebug("EFER Read\n");
544 dst->value = info->guest_efer.value;
546 info->rip += 2; // WRMSR/RDMSR are two byte operands
551 int v3_handle_efer_write(uint_t msr, struct v3_msr src, void * priv_data) {
552 struct guest_info * info = (struct guest_info *)(priv_data);
553 struct efer_64 * new_efer = (struct efer_64 *)&(src.value);
554 struct efer_64 * shadow_efer = (struct efer_64 *)&(info->ctrl_regs.efer);
555 struct v3_msr * guest_efer = &(info->guest_efer);
557 PrintDebug("EFER Write\n");
558 PrintDebug("Old EFER=%p\n", (void *)*(addr_t*)(shadow_efer));
560 // We virtualize the guests efer to hide the SVME and LMA bits
561 guest_efer->value = src.value;
564 if ((info->shdw_pg_mode == SHADOW_PAGING) &&
565 (v3_get_mem_mode(info) == PHYSICAL_MEM)) {
567 if ((shadow_efer->lme == 0) && (new_efer->lme == 1)) {
568 PrintDebug("Transition to longmode\n");
569 PrintDebug("Creating Passthrough 64 bit page tables\n");
571 // Delete the old 32 bit direct map page tables
574 * Will these page tables always be in PAE format??
576 PrintDebug("Deleting old PAE Page tables\n");
577 PrintError("JRL BUG?: Will the old page tables always be in PAE format??\n");
578 delete_page_tables_32PAE((pdpe32pae_t *)V3_VAddr((void *)(info->direct_map_pt)));
580 // create 64 bit direct map page table
581 info->direct_map_pt = (addr_t)V3_PAddr(create_passthrough_pts_64(info));
583 // reset cr3 to new page tables
584 info->ctrl_regs.cr3 = *(addr_t*)&(info->direct_map_pt);
586 // We mark the Long Mode active because we have paging enabled
587 // We do this in new_efer because we copy the msr in full below
590 } else if ((shadow_efer->lme == 1) && (new_efer->lme == 0)) {
591 // transition out of long mode
592 //((struct efer_64 *)&(info->guest_efer.value))->lme = 0;
593 //((struct efer_64 *)&(info->guest_efer.value))->lma = 0;
598 // accept all changes to the efer, but make sure that the SVME bit is set... (SVM specific)
599 *shadow_efer = *new_efer;
600 shadow_efer->svme = 1;
604 PrintDebug("New EFER=%p\n", (void *)*(addr_t *)(shadow_efer));
606 PrintError("Write to EFER in NESTED_PAGING or VIRTUAL_MEM mode not supported\n");
607 // Should probably just check for a long mode transition, and bomb out if it is
611 info->rip += 2; // WRMSR/RDMSR are two byte operands