2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_mem.h>
21 #include <palacios/vmm.h>
22 #include <palacios/vmcb.h>
23 #include <palacios/vmm_decoder.h>
24 #include <palacios/vm_guest_mem.h>
25 #include <palacios/vmm_ctrl_regs.h>
29 /* Segmentation is a problem here...
31 * When we get a memory operand, presumably we use the default segment (which is?)
32 * unless an alternate segment was specfied in the prefix...
36 #ifndef DEBUG_CTRL_REGS
38 #define PrintDebug(fmt, args...)
42 static int handle_lmsw(struct guest_info * info, struct x86_instr * dec_instr);
43 static int handle_clts(struct guest_info * info, struct x86_instr * dec_instr);
45 static int handle_mov_to_cr0(struct guest_info * info, struct x86_instr * dec_instr);
46 static int handle_mov_to_cr0_32(struct guest_info * info, struct x86_instr * dec_instr);
47 static int handle_mov_to_cr0_32pae(struct guest_info * info, struct x86_instr * dec_instr);
48 static int handle_mov_to_cr0_64(struct guest_info * info, struct x86_instr * dec_instr);
49 static int handle_mov_to_cr0_64compat(struct guest_info * info, struct x86_instr * dec_instr);
51 static int handle_mov_to_cr3_32(struct guest_info * info, struct x86_instr * dec_instr);
52 static int handle_mov_to_cr3_32pae(struct guest_info * info, struct x86_instr * dec_instr);
53 static int handle_mov_to_cr3_64(struct guest_info * info, struct x86_instr * dec_instr);
54 static int handle_mov_to_cr3_64compat(struct guest_info * info, struct x86_instr * dec_instr);
58 // First Attempt = 494 lines
59 // current = 106 lines
60 int v3_handle_cr0_write(struct guest_info * info) {
63 struct x86_instr dec_instr;
65 if (info->mem_mode == PHYSICAL_MEM) {
66 ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
68 ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
71 /* The IFetch will already have faulted in the necessary bytes for the full instruction
73 // I think we should inject a GPF into the guest
74 PrintError("Could not read instruction (ret=%d)\n", ret);
79 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
80 PrintError("Could not decode instruction\n");
84 if (v3_opcode_cmp(V3_OPCODE_LMSW, (const uchar_t *)(dec_instr.opcode)) == 0) {
86 if (handle_lmsw(info, &dec_instr) == -1) {
90 } else if (v3_opcode_cmp(V3_OPCODE_MOV2CR, (const uchar_t *)(dec_instr.opcode)) == 0) {
92 if (handle_mov_to_cr0(info, &dec_instr) == -1) {
96 } else if (v3_opcode_cmp(V3_OPCODE_CLTS, (const uchar_t *)(dec_instr.opcode)) == 0) {
98 if (handle_clts(info, &dec_instr) == -1) {
103 PrintError("Unhandled opcode in handle_cr0_write\n");
107 info->rip += dec_instr.instr_length;
115 static int handle_mov_to_cr0(struct guest_info * info, struct x86_instr * dec_instr) {
116 PrintDebug("MOV2CR0\n");
118 switch (info->cpu_mode) {
121 return handle_mov_to_cr0_32(info, dec_instr);
123 return handle_mov_to_cr0_32pae(info, dec_instr);
125 return handle_mov_to_cr0_64(info, dec_instr);
127 return handle_mov_to_cr0_64compat(info, dec_instr);
129 PrintError("Invalid CPU Operating Mode: %d\n", info->cpu_mode);
135 static int handle_mov_to_cr0_32pae(struct guest_info * info, struct x86_instr * dec_instr) {
136 PrintError("32 bit PAE mov to CR0 not implemented\n");
140 static int handle_mov_to_cr0_64(struct guest_info * info, struct x86_instr * dec_instr) {
141 PrintError("64 bit mov to CR0 not implemented\n");
145 static int handle_mov_to_cr0_64compat(struct guest_info * info, struct x86_instr * dec_instr) {
146 PrintError("64 bit compatibility mode move to CR0 not implemented\n");
151 static int handle_mov_to_cr0_32(struct guest_info * info, struct x86_instr * dec_instr) {
154 struct cr0_32 *real_cr0 = (struct cr0_32*)&(info->ctrl_regs.cr0);
155 struct cr0_32 *new_cr0= (struct cr0_32 *)(dec_instr->src_operand.operand);
157 PrintDebug("OperandVal = %x, length=%d\n", *(uint_t *)new_cr0, dec_instr->src_operand.size);
160 PrintDebug("Old CR0=%x\n", *(uint_t *)real_cr0);
161 *real_cr0 = *new_cr0;
164 if (info->shdw_pg_mode == SHADOW_PAGING) {
165 struct cr0_32 * shadow_cr0 = (struct cr0_32 *)&(info->shdw_pg_state.guest_cr0);
167 PrintDebug("Old Shadow CR0=%x\n", *(uint_t *)shadow_cr0);
171 *shadow_cr0 = *new_cr0;
174 if (v3_get_mem_mode(info) == VIRTUAL_MEM) {
175 struct cr3_32 * shadow_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.shadow_cr3);
176 PrintDebug("Setting up Shadow Page Table\n");
177 info->ctrl_regs.cr3 = *(addr_t*)shadow_cr3;
179 info->ctrl_regs.cr3 = *(addr_t*)&(info->direct_map_pt);
183 PrintDebug("New Shadow CR0=%x\n",*(uint_t *)shadow_cr0);
185 PrintDebug("New CR0=%x\n", *(uint_t *)real_cr0);
193 static int handle_clts(struct guest_info * info, struct x86_instr * dec_instr) {
195 struct cr0_32 *real_cr0 = (struct cr0_32*)&(info->ctrl_regs.cr0);
199 if (info->shdw_pg_mode == SHADOW_PAGING) {
200 struct cr0_32 * shadow_cr0 = (struct cr0_32 *)&(info->shdw_pg_state.guest_cr0);
207 static int handle_lmsw(struct guest_info * info, struct x86_instr * dec_instr) {
208 struct cr0_real *real_cr0 = (struct cr0_real*)&(info->ctrl_regs.cr0);
209 struct cr0_real *new_cr0 = (struct cr0_real *)(dec_instr->src_operand.operand);
212 PrintDebug("LMSW\n");
214 new_cr0_val = (*(char*)(new_cr0)) & 0x0f;
216 PrintDebug("OperandVal = %x\n", new_cr0_val);
218 PrintDebug("Old CR0=%x\n", *(uint_t *)real_cr0);
219 *(uchar_t*)real_cr0 &= 0xf0;
220 *(uchar_t*)real_cr0 |= new_cr0_val;
221 PrintDebug("New CR0=%x\n", *(uint_t *)real_cr0);
224 if (info->shdw_pg_mode == SHADOW_PAGING) {
225 struct cr0_real * shadow_cr0 = (struct cr0_real*)&(info->shdw_pg_state.guest_cr0);
227 PrintDebug(" Old Shadow CR0=%x\n", *(uint_t *)shadow_cr0);
228 *(uchar_t*)shadow_cr0 &= 0xf0;
229 *(uchar_t*)shadow_cr0 |= new_cr0_val;
230 PrintDebug("New Shadow CR0=%x\n", *(uint_t *)shadow_cr0);
244 // First attempt = 253 lines
245 // current = 51 lines
246 int v3_handle_cr0_read(struct guest_info * info) {
249 struct x86_instr dec_instr;
251 if (info->mem_mode == PHYSICAL_MEM) {
252 ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
254 ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
257 /* The IFetch will already have faulted in the necessary bytes for the full instruction
259 // I think we should inject a GPF into the guest
260 PrintError("Could not read instruction (ret=%d)\n", ret);
265 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
266 PrintError("Could not decode instruction\n");
270 if (v3_opcode_cmp(V3_OPCODE_MOVCR2, (const uchar_t *)(dec_instr.opcode)) == 0) {
271 struct cr0_32 * virt_cr0 = (struct cr0_32 *)(dec_instr.dst_operand.operand);
272 struct cr0_32 * real_cr0 = (struct cr0_32 *)&(info->ctrl_regs.cr0);
274 PrintDebug("MOVCR2\n");
275 PrintDebug("CR0 at 0x%p\n", (void *)real_cr0);
277 if (info->shdw_pg_mode == SHADOW_PAGING) {
278 *virt_cr0 = *(struct cr0_32 *)&(info->shdw_pg_state.guest_cr0);
280 *virt_cr0 = *real_cr0;
283 PrintDebug("real CR0: %x\n", *(uint_t*)real_cr0);
284 PrintDebug("returned CR0: %x\n", *(uint_t*)virt_cr0);
285 } else if (v3_opcode_cmp(V3_OPCODE_SMSW, (const uchar_t *)(dec_instr.opcode)) == 0) {
286 struct cr0_real *real_cr0= (struct cr0_real*)&(info->ctrl_regs.cr0);
287 struct cr0_real *virt_cr0 = (struct cr0_real *)(dec_instr.dst_operand.operand);
288 char cr0_val = *(char*)real_cr0 & 0x0f;
290 PrintDebug("SMSW\n");
292 PrintDebug("CR0 at 0x%p\n", real_cr0);
294 *(char *)virt_cr0 &= 0xf0;
295 *(char *)virt_cr0 |= cr0_val;
298 PrintError("Unhandled opcode in handle_cr0_read\n");
302 info->rip += dec_instr.instr_length;
310 // First Attempt = 256 lines
311 // current = 65 lines
312 int v3_handle_cr3_write(struct guest_info * info) {
315 struct x86_instr dec_instr;
317 if (info->mem_mode == PHYSICAL_MEM) {
318 ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
320 ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
323 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
324 PrintError("Could not decode instruction\n");
328 if (v3_opcode_cmp(V3_OPCODE_MOV2CR, (const uchar_t *)(dec_instr.opcode)) == 0) {
329 PrintDebug("MOV2CR3\n");
331 if (info->mem_mode == PHYSICAL_MEM) {
332 // All we do is update the guest CR3
334 if (info->cpu_mode == LONG) {
335 struct cr3_64 * new_cr3 = (struct cr3_64 *)(dec_instr.src_operand.operand);
336 struct cr3_64 * guest_cr3 = (struct cr3_64 *)&(info->shdw_pg_state.guest_cr3);
337 *guest_cr3 = *new_cr3;
339 struct cr3_32 * new_cr3 = (struct cr3_32 *)(dec_instr.src_operand.operand);
340 struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.guest_cr3);
341 *guest_cr3 = *new_cr3;
346 switch (info->cpu_mode) {
348 if (handle_mov_to_cr3_32(info, &dec_instr) == -1) {
352 if (handle_mov_to_cr3_32pae(info, &dec_instr) == -1) {
356 if (handle_mov_to_cr3_64(info, &dec_instr) == -1) {
360 if (handle_mov_to_cr3_64compat(info, &dec_instr) == -1) {
364 PrintError("Unhandled CPU mode: %d\n", info->cpu_mode);
369 PrintError("Unhandled opcode in handle_cr3_write\n");
373 info->rip += dec_instr.instr_length;
384 static int handle_mov_to_cr3_32(struct guest_info * info, struct x86_instr * dec_instr) {
385 PrintDebug("CR3 at 0x%p\n", &(info->ctrl_regs.cr3));
387 if (info->shdw_pg_mode == SHADOW_PAGING) {
388 struct cr3_32 * new_cr3 = (struct cr3_32 *)(dec_instr->src_operand.operand);
389 struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.guest_cr3);
390 struct cr3_32 * shadow_cr3 = (struct cr3_32 *)&(info->shdw_pg_state.shadow_cr3);
394 PrintDebug("Old Shadow CR3=%x; Old Guest CR3=%x\n",
395 *(uint_t*)shadow_cr3, *(uint_t*)guest_cr3);
399 cached = v3_cache_page_tables32(info, (addr_t)V3_PAddr((void *)(addr_t)CR3_TO_PDE32((void *)*(addr_t *)new_cr3)));
402 PrintError("CR3 Cache failed\n");
404 } else if (cached == 0) {
407 if(info->mem_mode == VIRTUAL_MEM) {
408 PrintDebug("New CR3 is different - flushing shadow page table %p\n", shadow_cr3 );
409 delete_page_tables_32((pde32_t *)CR3_TO_PDE32(*(uint_t*)shadow_cr3));
412 shadow_pt = v3_create_new_shadow_pt();
414 shadow_cr3->pdt_base_addr = (addr_t)V3_PAddr((void *)(addr_t)PD32_BASE_ADDR(shadow_pt));
415 PrintDebug( "Created new shadow page table %p\n", (void *)(addr_t)shadow_cr3->pdt_base_addr );
416 //PrintDebugPageTables( (pde32_t *)CR3_TO_PDE32(*(uint_t*)shadow_cr3) );
420 PrintDebug("Reusing cached shadow Page table\n");
424 shadow_cr3->pwt = new_cr3->pwt;
425 shadow_cr3->pcd = new_cr3->pcd;
428 *guest_cr3 = *new_cr3;
430 PrintDebug("New Shadow CR3=%x; New Guest CR3=%x\n",
431 *(uint_t*)shadow_cr3, *(uint_t*)guest_cr3);
433 if (info->mem_mode == VIRTUAL_MEM) {
434 // If we aren't in paged mode then we have to preserve the identity mapped CR3
435 info->ctrl_regs.cr3 = *(addr_t*)shadow_cr3;
442 static int handle_mov_to_cr3_32pae(struct guest_info * info, struct x86_instr * dec_instr) {
443 PrintError("32 Bit PAE mode Mov to CR3 not implemented\n");
447 static int handle_mov_to_cr3_64(struct guest_info * info, struct x86_instr * dec_instr) {
448 PrintError("Long mode Mov to CR3 not implemented\n");
452 static int handle_mov_to_cr3_64compat(struct guest_info * info, struct x86_instr * dec_instr) {
453 PrintError("Long compatiblity mode move to CR3 not implemented\n");
459 // first attempt = 156 lines
460 // current = 36 lines
461 int v3_handle_cr3_read(struct guest_info * info) {
464 struct x86_instr dec_instr;
466 if (info->mem_mode == PHYSICAL_MEM) {
467 ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
469 ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
472 /* The IFetch will already have faulted in the necessary bytes for the full instruction
474 // I think we should inject a GPF into the guest
475 PrintError("Could not read instruction (ret=%d)\n", ret);
480 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
481 PrintError("Could not decode instruction\n");
485 if (v3_opcode_cmp(V3_OPCODE_MOVCR2, (const uchar_t *)(dec_instr.opcode)) == 0) {
486 PrintDebug("MOVCR32\n");
487 struct cr3_32 * virt_cr3 = (struct cr3_32 *)(dec_instr.dst_operand.operand);
489 PrintDebug("CR3 at 0x%p\n", &(info->ctrl_regs.cr3));
491 if (info->shdw_pg_mode == SHADOW_PAGING) {
492 *virt_cr3 = *(struct cr3_32 *)&(info->shdw_pg_state.guest_cr3);
494 *virt_cr3 = *(struct cr3_32 *)&(info->ctrl_regs.cr3);
497 PrintError("Unhandled opcode in handle_cr3_read\n");
501 info->rip += dec_instr.instr_length;
506 int v3_handle_cr4_read(struct guest_info * info) {
507 PrintError("CR4 Read not handled\n");
511 int v3_handle_cr4_write(struct guest_info * info) {
514 struct x86_instr dec_instr;
516 if (info->mem_mode == PHYSICAL_MEM) {
517 ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
519 ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr);
522 if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) {
523 PrintError("Could not decode instruction\n");
527 if (v3_opcode_cmp(V3_OPCODE_MOV2CR, (const uchar_t *)(dec_instr.opcode)) != 0) {
528 PrintError("Invalid opcode in write to CR4\n");
532 if ((info->cpu_mode == PROTECTED) || (info->cpu_mode == PROTECTED_PAE)) {
533 struct cr4_32 * new_cr4 = (struct cr4_32 *)(dec_instr.src_operand.operand);
534 struct cr4_32 * old_cr4 = (struct cr4_32 *)&(info->ctrl_regs.cr4);
536 PrintDebug("OperandVal = %x, length = %d\n", *(uint_t *)new_cr4, dec_instr.src_operand.size);
537 PrintDebug("Old CR4=%x\n", *(uint_t *)old_cr4);
542 if ((info->shdw_pg_mode == SHADOW_PAGING) &&
543 (v3_get_mem_mode(info) == PHYSICAL_MEM)) {
545 if ((old_cr4->pae == 0) && (new_cr4->pae == 1)) {
546 PrintDebug("Creating PAE passthrough tables\n");
548 // Delete the old 32 bit direct map page tables
549 delete_page_tables_32((pde32_t *)V3_VAddr((void *)(info->direct_map_pt)));
551 // create 32 bit PAE direct map page table
552 info->direct_map_pt = (addr_t)V3_PAddr(create_passthrough_pts_32PAE(info));
554 // reset cr3 to new page tables
555 info->ctrl_regs.cr3 = *(addr_t*)&(info->direct_map_pt);
557 } else if ((old_cr4->pae == 1) && (new_cr4->pae == 0)) {
558 // Create passthrough standard 32bit pagetables
564 PrintDebug("New CR4=%x\n", *(uint_t *)old_cr4);
570 info->rip += dec_instr.instr_length;
575 int v3_handle_efer_read(uint_t msr, struct v3_msr * dst, void * priv_data) {
576 struct guest_info * info = (struct guest_info *)(priv_data);
577 PrintDebug("EFER Read\n");
579 dst->value = info->guest_efer.value;
581 info->rip += 2; // WRMSR/RDMSR are two byte operands
586 int v3_handle_efer_write(uint_t msr, struct v3_msr src, void * priv_data) {
587 struct guest_info * info = (struct guest_info *)(priv_data);
588 struct efer_64 * new_efer = (struct efer_64 *)&(src.value);
589 struct efer_64 * old_efer = (struct efer_64 *)&(info->ctrl_regs.efer);
591 PrintDebug("EFER Write\n");
592 PrintDebug("Old EFER=%p\n", (void *)*(addr_t*)(old_efer));
594 // We virtualize the guests efer to hide the SVME and LMA bits
595 info->guest_efer.value = src.value;
597 if ((info->shdw_pg_mode == SHADOW_PAGING) &&
598 (v3_get_mem_mode(info) == PHYSICAL_MEM)) {
600 if ((old_efer->lme == 0) && (new_efer->lme == 1)) {
601 PrintDebug("Transition to longmode\n");
602 PrintDebug("Creating Passthrough 64 bit page tables\n");
604 // Delete the old 32 bit direct map page tables
607 * Will these page tables always be in PAE format??
609 PrintDebug("Deleting old PAE Page tables\n");
610 PrintError("JRL BUG?: Will the old page tables always be in PAE format??\n");
611 delete_page_tables_32PAE((pdpe32pae_t *)V3_VAddr((void *)(info->direct_map_pt)));
613 // create 64 bit direct map page table
614 info->direct_map_pt = (addr_t)V3_PAddr(create_passthrough_pts_64(info));
616 // reset cr3 to new page tables
617 info->ctrl_regs.cr3 = *(addr_t*)&(info->direct_map_pt);
620 // Does this mean we will have to fully virtualize a shadow EFER?? (yes it does)
623 } else if ((old_efer->lme == 1) && (new_efer->lme == 0)) {
624 // transition out of long mode
625 //((struct efer_64 *)&(info->guest_efer.value))->lme = 0;
626 //((struct efer_64 *)&(info->guest_efer.value))->lma = 0;
631 *old_efer = *new_efer;
632 PrintDebug("New EFER=%p\n", (void *)*(addr_t *)(old_efer));
637 info->rip += 2; // WRMSR/RDMSR are two byte operands