2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_mem.h>
21 #include <palacios/vmm.h>
22 #include <palacios/vmcb.h>
23 #include <palacios/vmm_decoder.h>
24 #include <palacios/vm_guest_mem.h>
25 #include <palacios/vmm_ctrl_regs.h>
29 /* Segmentation is a problem here...
31 * When we get a memory operand, presumably we use the default segment (which is?)
32 * unless an alternate segment was specfied in the prefix...
36 #ifndef DEBUG_CTRL_REGS
38 #define PrintDebug(fmt, args...)
42 static int handle_lmsw(struct guest_info * info, struct x86_instr * dec_instr);
43 static int handle_clts(struct guest_info * info, struct x86_instr * dec_instr);
44 static int handle_mov_to_cr0(struct guest_info * info, struct x86_instr * dec_instr);
47 // First Attempt = 494 lines
48 // current = 106 lines
49 int v3_handle_cr0_write(struct guest_info * info) {
52 struct x86_instr dec_instr;
54 if (info->mem_mode == PHYSICAL_MEM) {
55 ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
57 ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
60 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
61 PrintError("Could not decode instruction\n");
65 if (v3_opcode_cmp(V3_OPCODE_LMSW, (const uchar_t *)(dec_instr.opcode)) == 0) {
67 if (handle_lmsw(info, &dec_instr) == -1) {
71 } else if (v3_opcode_cmp(V3_OPCODE_MOV2CR, (const uchar_t *)(dec_instr.opcode)) == 0) {
73 if (handle_mov_to_cr0(info, &dec_instr) == -1) {
77 } else if (v3_opcode_cmp(V3_OPCODE_CLTS, (const uchar_t *)(dec_instr.opcode)) == 0) {
79 if (handle_clts(info, &dec_instr) == -1) {
84 PrintError("Unhandled opcode in handle_cr0_write\n");
88 info->rip += dec_instr.instr_length;
96 // The CR0 register only has flags in the low 32 bits
97 // The hardware does a format check to make sure the high bits are zero
98 // Because of this we can ignore the high 32 bits here
99 static int handle_mov_to_cr0(struct guest_info * info, struct x86_instr * dec_instr) {
101 struct cr0_32 * shadow_cr0 = (struct cr0_32 *)&(info->ctrl_regs.cr0);
102 struct cr0_32 * new_cr0 = (struct cr0_32 *)(dec_instr->src_operand.operand);
103 struct cr0_32 * guest_cr0 = (struct cr0_32 *)&(info->shdw_pg_state.guest_cr0);
104 uint_t paging_transition = 0;
106 PrintDebug("MOV2CR0 (MODE=%s)\n", v3_cpu_mode_to_str(info->cpu_mode));
108 PrintDebug("OperandVal = %x, length=%d\n", *(uint_t *)new_cr0, dec_instr->src_operand.size);
110 PrintDebug("Old CR0=%x\n", *(uint_t *)shadow_cr0);
111 PrintDebug("Old Guest CR0=%x\n", *(uint_t *)guest_cr0);
114 // We detect if this is a paging transition
115 if (guest_cr0->pg != new_cr0->pg) {
116 paging_transition = 1;
119 // Guest always sees the value they wrote
120 *guest_cr0 = *new_cr0;
122 // This value must always be set to 1
125 // Set the shadow register to catch non-virtualized flags
126 *shadow_cr0 = *guest_cr0;
128 // Paging is always enabled
131 // Was there a paging transition
132 // Meaning we need to change the page tables
133 if (paging_transition) {
134 if (v3_get_mem_mode(info) == VIRTUAL_MEM) {
136 PrintDebug("Activating Shadow Page Tables\n");
138 if (v3_activate_shadow_pt(info) == -1) {
139 PrintError("Failed to activate shadow page tables\n");
144 if (v3_activate_passthrough_pt(info) == -1) {
145 PrintError("Failed to activate passthrough page tables\n");
151 PrintDebug("New Guest CR0=%x\n",*(uint_t *)guest_cr0);
152 PrintDebug("New CR0=%x\n", *(uint_t *)shadow_cr0);
160 static int handle_clts(struct guest_info * info, struct x86_instr * dec_instr) {
162 struct cr0_32 * real_cr0 = (struct cr0_32*)&(info->ctrl_regs.cr0);
166 if (info->shdw_pg_mode == SHADOW_PAGING) {
167 struct cr0_32 * guest_cr0 = (struct cr0_32 *)&(info->shdw_pg_state.guest_cr0);
174 static int handle_lmsw(struct guest_info * info, struct x86_instr * dec_instr) {
175 struct cr0_real * real_cr0 = (struct cr0_real*)&(info->ctrl_regs.cr0);
176 struct cr0_real * new_cr0 = (struct cr0_real *)(dec_instr->src_operand.operand);
179 PrintDebug("LMSW\n");
181 new_cr0_val = (*(char*)(new_cr0)) & 0x0f;
183 PrintDebug("OperandVal = %x\n", new_cr0_val);
185 // We can just copy the new value through
186 // we don't need to virtualize the lower 4 bits
187 PrintDebug("Old CR0=%x\n", *(uint_t *)real_cr0);
188 *(uchar_t*)real_cr0 &= 0xf0;
189 *(uchar_t*)real_cr0 |= new_cr0_val;
190 PrintDebug("New CR0=%x\n", *(uint_t *)real_cr0);
193 // If Shadow paging is enabled we push the changes to the virtualized copy of cr0
194 if (info->shdw_pg_mode == SHADOW_PAGING) {
195 struct cr0_real * guest_cr0 = (struct cr0_real*)&(info->shdw_pg_state.guest_cr0);
197 PrintDebug("Old Guest CR0=%x\n", *(uint_t *)guest_cr0);
198 *(uchar_t*)guest_cr0 &= 0xf0;
199 *(uchar_t*)guest_cr0 |= new_cr0_val;
200 PrintDebug("New Guest CR0=%x\n", *(uint_t *)guest_cr0);
209 // First attempt = 253 lines
210 // current = 51 lines
211 int v3_handle_cr0_read(struct guest_info * info) {
214 struct x86_instr dec_instr;
216 if (info->mem_mode == PHYSICAL_MEM) {
217 ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
219 ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
223 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
224 PrintError("Could not decode instruction\n");
228 if (v3_opcode_cmp(V3_OPCODE_MOVCR2, (const uchar_t *)(dec_instr.opcode)) == 0) {
229 struct cr0_32 * dst_reg = (struct cr0_32 *)(dec_instr.dst_operand.operand);
230 struct cr0_32 * shadow_cr0 = (struct cr0_32 *)&(info->ctrl_regs.cr0);
232 PrintDebug("MOVCR2 (mode=%s)\n", v3_cpu_mode_to_str(info->cpu_mode));
234 if (info->shdw_pg_mode == SHADOW_PAGING) {
235 struct cr0_32 * guest_cr0 = (struct cr0_32 *)&(info->shdw_pg_state.guest_cr0);
236 *dst_reg = *guest_cr0;
238 *dst_reg = *shadow_cr0;
241 PrintDebug("Shadow CR0: %x\n", *(uint_t*)shadow_cr0);
242 PrintDebug("returned CR0: %x\n", *(uint_t*)dst_reg);
243 } else if (v3_opcode_cmp(V3_OPCODE_SMSW, (const uchar_t *)(dec_instr.opcode)) == 0) {
244 struct cr0_real * shadow_cr0 = (struct cr0_real *)&(info->ctrl_regs.cr0);
245 struct cr0_real * dst_reg = (struct cr0_real *)(dec_instr.dst_operand.operand);
246 char cr0_val = *(char*)shadow_cr0 & 0x0f;
248 PrintDebug("SMSW\n");
250 // The lower 4 bits of the guest/shadow CR0 are mapped through
251 // We can treat nested and shadow paging the same here
252 *(char *)dst_reg &= 0xf0;
253 *(char *)dst_reg |= cr0_val;
256 PrintError("Unhandled opcode in handle_cr0_read\n");
260 info->rip += dec_instr.instr_length;
268 // First Attempt = 256 lines
269 // current = 65 lines
270 int v3_handle_cr3_write(struct guest_info * info) {
273 struct x86_instr dec_instr;
275 if (info->mem_mode == PHYSICAL_MEM) {
276 ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
278 ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
281 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
282 PrintError("Could not decode instruction\n");
286 if (v3_opcode_cmp(V3_OPCODE_MOV2CR, (const uchar_t *)(dec_instr.opcode)) == 0) {
287 PrintDebug("MOV2CR3 (cpu_mode=%s)\n", v3_cpu_mode_to_str(info->cpu_mode));
289 if (info->shdw_pg_mode == SHADOW_PAGING) {
290 PrintDebug("Old Shadow CR3=%p; Old Guest CR3=%p\n",
291 (void *)(addr_t)(info->ctrl_regs.cr3),
292 (void*)(addr_t)(info->shdw_pg_state.guest_cr3));
295 // We update the guest CR3
296 if (info->cpu_mode == LONG) {
297 struct cr3_64 * new_cr3 = (struct cr3_64 *)(dec_instr.src_operand.operand);
298 struct cr3_64 * guest_cr3 = (struct cr3_64 *)&(info->shdw_pg_state.guest_cr3);
299 *guest_cr3 = *new_cr3;
301 struct cr3_32 * new_cr3 = (struct cr3_32 *)(dec_instr.src_operand.operand);
302 struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.guest_cr3);
303 *guest_cr3 = *new_cr3;
306 // If Paging is enabled in the guest then we need to change the shadow page tables
307 if (info->mem_mode == VIRTUAL_MEM) {
308 if (v3_activate_shadow_pt(info) == -1) {
309 PrintError("Failed to activate 32 bit shadow page table\n");
314 PrintDebug("New Shadow CR3=%p; New Guest CR3=%p\n",
315 (void *)(addr_t)(info->ctrl_regs.cr3),
316 (void*)(addr_t)(info->shdw_pg_state.guest_cr3));
318 } else if (info->shdw_pg_mode == NESTED_PAGING) {
320 // This is just a passthrough operation which we probably don't need here
321 if (info->cpu_mode == LONG) {
322 struct cr3_64 * new_cr3 = (struct cr3_64 *)(dec_instr.src_operand.operand);
323 struct cr3_64 * guest_cr3 = (struct cr3_64 *)&(info->ctrl_regs.cr3);
324 *guest_cr3 = *new_cr3;
326 struct cr3_32 * new_cr3 = (struct cr3_32 *)(dec_instr.src_operand.operand);
327 struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->ctrl_regs.cr3);
328 *guest_cr3 = *new_cr3;
333 PrintError("Unhandled opcode in handle_cr3_write\n");
337 info->rip += dec_instr.instr_length;
344 // first attempt = 156 lines
345 // current = 36 lines
346 int v3_handle_cr3_read(struct guest_info * info) {
349 struct x86_instr dec_instr;
351 if (info->mem_mode == PHYSICAL_MEM) {
352 ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
354 ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
357 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
358 PrintError("Could not decode instruction\n");
362 if (v3_opcode_cmp(V3_OPCODE_MOVCR2, (const uchar_t *)(dec_instr.opcode)) == 0) {
363 PrintDebug("MOVCR32 (mode=%s)\n", v3_cpu_mode_to_str(info->cpu_mode));
365 if (info->shdw_pg_mode == SHADOW_PAGING) {
367 if (info->cpu_mode == LONG) {
368 struct cr3_64 * dst_reg = (struct cr3_64 *)(dec_instr.dst_operand.operand);
369 struct cr3_64 * guest_cr3 = (struct cr3_64 *)&(info->shdw_pg_state.guest_cr3);
370 *dst_reg = *guest_cr3;
372 struct cr3_32 * dst_reg = (struct cr3_32 *)(dec_instr.dst_operand.operand);
373 struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.guest_cr3);
374 *dst_reg = *guest_cr3;
377 } else if (info->shdw_pg_mode == NESTED_PAGING) {
379 // This is just a passthrough operation which we probably don't need here
380 if (info->cpu_mode == LONG) {
381 struct cr3_64 * dst_reg = (struct cr3_64 *)(dec_instr.dst_operand.operand);
382 struct cr3_64 * guest_cr3 = (struct cr3_64 *)&(info->ctrl_regs.cr3);
383 *dst_reg = *guest_cr3;
385 struct cr3_32 * dst_reg = (struct cr3_32 *)(dec_instr.dst_operand.operand);
386 struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->ctrl_regs.cr3);
387 *dst_reg = *guest_cr3;
392 PrintError("Unhandled opcode in handle_cr3_read\n");
396 info->rip += dec_instr.instr_length;
402 // We don't need to virtualize CR4, all we need is to detect the activation of PAE
403 int v3_handle_cr4_read(struct guest_info * info) {
404 // PrintError("CR4 Read not handled\n");
409 int v3_handle_cr4_write(struct guest_info * info) {
412 struct x86_instr dec_instr;
414 if (info->mem_mode == PHYSICAL_MEM) {
415 ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
417 ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
420 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
421 PrintError("Could not decode instruction\n");
425 if (v3_opcode_cmp(V3_OPCODE_MOV2CR, (const uchar_t *)(dec_instr.opcode)) != 0) {
426 PrintError("Invalid opcode in write to CR4\n");
430 if ((info->cpu_mode == PROTECTED) || (info->cpu_mode == PROTECTED_PAE)) {
431 struct cr4_32 * new_cr4 = (struct cr4_32 *)(dec_instr.src_operand.operand);
432 struct cr4_32 * cr4 = (struct cr4_32 *)&(info->ctrl_regs.cr4);
434 PrintDebug("OperandVal = %x, length = %d\n", *(uint_t *)new_cr4, dec_instr.src_operand.size);
435 PrintDebug("Old CR4=%x\n", *(uint_t *)cr4);
437 if ((info->shdw_pg_mode == SHADOW_PAGING) &&
438 (v3_get_mem_mode(info) == PHYSICAL_MEM)) {
440 if ((cr4->pae == 0) && (new_cr4->pae == 1)) {
441 PrintDebug("Creating PAE passthrough tables\n");
443 // Delete the old 32 bit direct map page tables
444 delete_page_tables_32((pde32_t *)V3_VAddr((void *)(info->direct_map_pt)));
446 // create 32 bit PAE direct map page table
447 info->direct_map_pt = (addr_t)V3_PAddr(create_passthrough_pts_32PAE(info));
449 // reset cr3 to new page tables
450 info->ctrl_regs.cr3 = *(addr_t*)&(info->direct_map_pt);
452 } else if ((cr4->pae == 1) && (new_cr4->pae == 0)) {
453 // Create passthrough standard 32bit pagetables
459 PrintDebug("New CR4=%x\n", *(uint_t *)cr4);
462 PrintError("CR4 write not supported in CPU_MODE: %d\n", info->cpu_mode);
466 info->rip += dec_instr.instr_length;
471 int v3_handle_efer_read(uint_t msr, struct v3_msr * dst, void * priv_data) {
472 struct guest_info * info = (struct guest_info *)(priv_data);
473 PrintDebug("EFER Read\n");
475 dst->value = info->guest_efer.value;
477 info->rip += 2; // WRMSR/RDMSR are two byte operands
482 int v3_handle_efer_write(uint_t msr, struct v3_msr src, void * priv_data) {
483 struct guest_info * info = (struct guest_info *)(priv_data);
484 struct efer_64 * new_efer = (struct efer_64 *)&(src.value);
485 struct efer_64 * shadow_efer = (struct efer_64 *)&(info->ctrl_regs.efer);
486 struct v3_msr * guest_efer = &(info->guest_efer);
488 PrintDebug("EFER Write\n");
489 PrintDebug("Old EFER=%p\n", (void *)*(addr_t*)(shadow_efer));
491 // We virtualize the guests efer to hide the SVME and LMA bits
492 guest_efer->value = src.value;
495 if ((info->shdw_pg_mode == SHADOW_PAGING) &&
496 (v3_get_mem_mode(info) == PHYSICAL_MEM)) {
498 if ((shadow_efer->lme == 0) && (new_efer->lme == 1)) {
499 PrintDebug("Transition to longmode\n");
500 PrintDebug("Creating Passthrough 64 bit page tables\n");
502 // Delete the old 32 bit direct map page tables
505 * Will these page tables always be in PAE format??
507 PrintDebug("Deleting old PAE Page tables\n");
508 PrintError("JRL BUG?: Will the old page tables always be in PAE format??\n");
509 delete_page_tables_32PAE((pdpe32pae_t *)V3_VAddr((void *)(info->direct_map_pt)));
511 // create 64 bit direct map page table
512 info->direct_map_pt = (addr_t)V3_PAddr(create_passthrough_pts_64(info));
514 // reset cr3 to new page tables
515 info->ctrl_regs.cr3 = *(addr_t*)&(info->direct_map_pt);
517 // We mark the Long Mode active because we have paging enabled
518 // We do this in new_efer because we copy the msr in full below
521 } else if ((shadow_efer->lme == 1) && (new_efer->lme == 0)) {
522 // transition out of long mode
523 //((struct efer_64 *)&(info->guest_efer.value))->lme = 0;
524 //((struct efer_64 *)&(info->guest_efer.value))->lma = 0;
529 // accept all changes to the efer, but make sure that the SVME bit is set... (SVM specific)
530 *shadow_efer = *new_efer;
531 shadow_efer->svme = 1;
535 PrintDebug("New EFER=%p\n", (void *)*(addr_t *)(shadow_efer));
537 PrintError("Write to EFER in NESTED_PAGING or VIRTUAL_MEM mode not supported\n");
538 // Should probably just check for a long mode transition, and bomb out if it is
542 info->rip += 2; // WRMSR/RDMSR are two byte operands