From: Jack Lange Date: Thu, 5 Feb 2009 20:42:49 +0000 (-0600) Subject: 64 bit guests initially supported X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=3574c981404a1c812d369132db09ec2a27d59a5d 64 bit guests initially supported --- diff --git a/palacios/build/Makefile b/palacios/build/Makefile index 1b2cf5d..aabd894 100644 --- a/palacios/build/Makefile +++ b/palacios/build/Makefile @@ -296,6 +296,7 @@ DEVICES_OBJS := \ devices/ramdisk.o \ devices/cdrom.o \ devices/bochs_debug.o \ + devices/os_debug.o \ $(DEVICES_OBJS) :: EXTRA_CFLAGS = \ $(JRLDEBUG) \ diff --git a/palacios/include/palacios/vm_guest.h b/palacios/include/palacios/vm_guest.h index 74e986e..bb3bad9 100644 --- a/palacios/include/palacios/vm_guest.h +++ b/palacios/include/palacios/vm_guest.h @@ -45,6 +45,16 @@ struct v3_gprs { v3_reg_t rdx; v3_reg_t rcx; v3_reg_t rax; + + v3_reg_t r8; + v3_reg_t r9; + v3_reg_t r10; + v3_reg_t r11; + v3_reg_t r12; + v3_reg_t r13; + v3_reg_t r14; + v3_reg_t r15; + }; diff --git a/palacios/include/palacios/vmm_paging.h b/palacios/include/palacios/vmm_paging.h index 22f956d..dd32c64 100644 --- a/palacios/include/palacios/vmm_paging.h +++ b/palacios/include/palacios/vmm_paging.h @@ -189,7 +189,7 @@ typedef enum {PAGE_4KB, PAGE_2MB, PAGE_4MB, PAGE_1GB, /* Page Table Flag Values */ #define PT32_HOOK 0x1 -#define PT32_GUEST_PT 0x2 +#define V3_LARGE_PG 0x2 @@ -483,6 +483,8 @@ int v3_find_guest_pt_64_page(struct guest_info * info, v3_reg_t guest_cr3, page_type_t type, addr_t vaddr, addr_t * page_ptr, addr_t * page_pa); + + pt_access_status_t inline v3_can_access_pde32(pde32_t * pde, addr_t addr, pf_error_t access_type); pt_access_status_t inline v3_can_access_pte32(pte32_t * pte, addr_t addr, pf_error_t access_type); @@ -511,6 +513,13 @@ int v3_check_guest_pt_64(struct guest_info * info, v3_reg_t guest_cr3, addr_t va +page_type_t v3_get_guest_data_page_type_32(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr); +page_type_t v3_get_guest_data_page_type_32pae(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr); +page_type_t v3_get_guest_data_page_type_64(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr); +page_type_t v3_get_host_data_page_type_32(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr); +page_type_t v3_get_host_data_page_type_32pae(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr); +page_type_t v3_get_host_data_page_type_64(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr); + int v3_drill_host_pt_32(struct guest_info * info, v3_reg_t host_cr3, addr_t vaddr, int (*callback)(struct guest_info * info, page_type_t type, addr_t vaddr, addr_t page_ptr, addr_t page_pa, void * private_data), @@ -534,6 +543,7 @@ int v3_drill_guest_pt_64(struct guest_info * info, v3_reg_t guest_cr3, addr_t va + int v3_walk_host_pt_32(struct guest_info * info, v3_reg_t host_cr3, int (*callback)(struct guest_info * info, page_type_t type, addr_t vaddr, addr_t page_va, addr_t page_pa, void * private_data), void * private_data); diff --git a/palacios/src/palacios/svm_handler.c b/palacios/src/palacios/svm_handler.c index b13b761..a4b0597 100644 --- a/palacios/src/palacios/svm_handler.c +++ b/palacios/src/palacios/svm_handler.c @@ -79,13 +79,17 @@ int v3_handle_svm_exit(struct guest_info * info) { // PrintDebug("SVM Returned: Exit Code: 0x%x \t\t(tsc=%ul)\n",exit_code, (uint_t)info->time_state.guest_tsc); - if ((0) && (exit_code < 0x4f)) { + if ((0) && (exit_code <= VMEXIT_EXCP14)) { uchar_t instr[32]; int ret; // Dump out the instr stream //PrintDebug("RIP: %x\n", guest_state->rip); - PrintDebug("RIP Linear: %p\n", (void *)get_addr_linear(info, info->rip, &(info->segments.cs))); + PrintDebug("\n\n\nRIP Linear: %p\n", (void *)get_addr_linear(info, info->rip, &(info->segments.cs))); + + v3_print_GPRs(info); + v3_print_ctrl_regs(info); + // OK, now we will read the instruction // The only difference between PROTECTED and PROTECTED_PG is whether we read @@ -97,6 +101,8 @@ int v3_handle_svm_exit(struct guest_info * info) { ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 32, instr); } + + if (ret != 32) { // I think we should inject a GPF into the guest PrintDebug("Could not read instruction (ret=%d)\n", ret); @@ -384,7 +390,12 @@ int v3_handle_svm_exit(struct guest_info * info) { PrintError("io_info2 low = 0x%.8x\n", *(uint_t*)&(guest_ctrl->exit_info2)); PrintError("io_info2 high = 0x%.8x\n", *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4)); - + + if (info->shdw_pg_mode == SHADOW_PAGING) { + PrintHostPageTables(info, info->ctrl_regs.cr3); + //PrintGuestPageTables(info, info->shdw_pg_state.guest_cr3); + } + return -1; } diff --git a/palacios/src/palacios/svm_lowlevel.S b/palacios/src/palacios/svm_lowlevel.S index 1e36bc9..5616b7f 100644 --- a/palacios/src/palacios/svm_lowlevel.S +++ b/palacios/src/palacios/svm_lowlevel.S @@ -83,20 +83,29 @@ v3_svm_launch: #elif __V3_64BIT__ #define Save_SVM_Registers(location) \ - push %rax; \ - mov location, %rax; \ - mov %rdi, (%rax); \ - mov %rsi, 8(%rax); \ - mov %rbp, 16(%rax); \ + pushq %rax; \ + movq location, %rax; \ + movq %rdi, (%rax); \ + movq %rsi, 8(%rax); \ + movq %rbp, 16(%rax); \ movq $0, 24(%rax); \ - mov %rbx, 32(%rax); \ - mov %rdx, 40(%rax); \ - mov %rcx, 48(%rax); \ - push %rbx; \ - mov 16(%rsp), %rbx; \ - mov %rbx, 56(%rax); \ - pop %rbx; \ - pop %rax; + movq %rbx, 32(%rax); \ + movq %rdx, 40(%rax); \ + movq %rcx, 48(%rax); \ + pushq %rbx; \ + movq 16(%rsp), %rbx; \ + movq %rbx, 56(%rax); \ + popq %rbx; \ + \ + movq %r8, 64(%rax); \ + movq %r9, 72(%rax); \ + movq %r10, 80(%rax); \ + movq %r11, 88(%rax); \ + movq %r12, 96(%rax); \ + movq %r13, 104(%rax); \ + movq %r14, 112(%rax); \ + movq %r15, 120(%rax); \ + popq %rax; #define Restore_SVM_Registers(location) \ @@ -108,6 +117,15 @@ v3_svm_launch: mov 32(%rax), %rbx; \ mov 40(%rax), %rdx; \ mov 48(%rax), %rcx; \ + \ + mov 64(%rax), %r8; \ + mov 72(%rax), %r9; \ + mov 80(%rax), %r10; \ + mov 88(%rax), %r11; \ + mov 96(%rax), %r12; \ + mov 104(%rax), %r13; \ + mov 112(%rax), %r14; \ + mov 120(%rax), %r15; \ pop %rax; @@ -116,6 +134,10 @@ v3_svm_launch: #define PUSHA \ pushq %rbp; \ pushq %rbx; \ + pushq %r8; \ + pushq %r9; \ + pushq %r10; \ + pushq %r11; \ pushq %r12; \ pushq %r13; \ pushq %r14; \ @@ -127,6 +149,10 @@ v3_svm_launch: popq %r14; \ popq %r13; \ popq %r12; \ + popq %r11; \ + popq %r10; \ + popq %r9; \ + popq %r8; \ popq %rbx; \ popq %rbp; diff --git a/palacios/src/palacios/vm_guest.c b/palacios/src/palacios/vm_guest.c index 614c6a5..a0a0889 100644 --- a/palacios/src/palacios/vm_guest.c +++ b/palacios/src/palacios/vm_guest.c @@ -29,7 +29,7 @@ v3_vm_cpu_mode_t v3_get_cpu_mode(struct guest_info * info) { struct cr0_32 * cr0; struct cr4_32 * cr4 = (struct cr4_32 *)&(info->ctrl_regs.cr4); - struct efer_64 * efer = (struct efer_64 *)&(info->ctrl_regs.efer); + struct efer_64 * efer = (struct efer_64 *)&(info->guest_efer); struct v3_segment * cs = &(info->segments.cs); if (info->shdw_pg_mode == SHADOW_PAGING) { @@ -130,8 +130,9 @@ void v3_print_segments(struct guest_info * info) { for (i = 0; seg_names[i] != NULL; i++) { - PrintDebug("\t%s: Sel=%x, base=%p, limit=%x\n", seg_names[i], seg_ptr[i].selector, - (void *)(addr_t)seg_ptr[i].base, seg_ptr[i].limit); + PrintDebug("\t%s: Sel=%x, base=%p, limit=%x (long_mode=%d, db=%d)\n", seg_names[i], seg_ptr[i].selector, + (void *)(addr_t)seg_ptr[i].base, seg_ptr[i].limit, + seg_ptr[i].long_mode, seg_ptr[i].db); } @@ -158,6 +159,7 @@ void v3_print_ctrl_regs(struct guest_info * info) { } +#ifdef __V3_32BIT__ void v3_print_GPRs(struct guest_info * info) { struct v3_gprs * regs = &(info->vm_regs); int i = 0; @@ -172,3 +174,23 @@ void v3_print_GPRs(struct guest_info * info) { PrintDebug("\t%s=0x%p\n", reg_names[i], (void *)(addr_t)reg_ptr[i]); } } +#elif __V3_64BIT__ +void v3_print_GPRs(struct guest_info * info) { + struct v3_gprs * regs = &(info->vm_regs); + int i = 0; + v3_reg_t * reg_ptr; + char * reg_names[] = { "RDI", "RSI", "RBP", "RSP", "RBX", "RDX", "RCX", "RAX", \ + "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", NULL}; + + reg_ptr= (v3_reg_t *)regs; + + PrintDebug("64 bit GPRs:\n"); + + for (i = 0; reg_names[i] != NULL; i++) { + PrintDebug("\t%s=0x%p\n", reg_names[i], (void *)(addr_t)reg_ptr[i]); + } +} + + + +#endif diff --git a/palacios/src/palacios/vmm_config.c b/palacios/src/palacios/vmm_config.c index 2a93ce1..9362504 100644 --- a/palacios/src/palacios/vmm_config.c +++ b/palacios/src/palacios/vmm_config.c @@ -34,6 +34,7 @@ #include #include #include +#include @@ -195,7 +196,7 @@ static int setup_memory_map(struct guest_info * info, struct v3_vm_config * conf v3_add_shadow_mem(info, 0x1000000, 0x8000000, (addr_t)V3_AllocPages(32768)); // test - give linux accesss to PCI space - PAD - v3_add_shadow_mem(info, 0xc0000000,0xffffffff,0xc0000000); + // v3_add_shadow_mem(info, 0xc0000000,0xffffffff,0xc0000000); print_shadow_map(info); @@ -214,6 +215,7 @@ static int setup_devices(struct guest_info * info, struct v3_vm_config * config_ struct vm_device * keyboard = v3_create_keyboard(); struct vm_device * pit = v3_create_pit(); struct vm_device * bochs_debug = v3_create_bochs_debug(); + struct vm_device * os_debug = v3_create_os_debug(); //struct vm_device * serial = v3_create_serial(); struct vm_device * generic = NULL; @@ -241,6 +243,7 @@ static int setup_devices(struct guest_info * info, struct v3_vm_config * config_ v3_attach_device(info, keyboard); // v3_attach_device(info, serial); v3_attach_device(info, bochs_debug); + v3_attach_device(info, os_debug); if (use_ramdisk) { v3_attach_device(info, ramdisk); diff --git a/palacios/src/palacios/vmm_ctrl_regs.c b/palacios/src/palacios/vmm_ctrl_regs.c index 7b637ed..95b36d5 100644 --- a/palacios/src/palacios/vmm_ctrl_regs.c +++ b/palacios/src/palacios/vmm_ctrl_regs.c @@ -25,14 +25,6 @@ #include - -/* Segmentation is a problem here... - * - * When we get a memory operand, presumably we use the default segment (which is?) - * unless an alternate segment was specfied in the prefix... - */ - - #ifndef DEBUG_CTRL_REGS #undef PrintDebug #define PrintDebug(fmt, args...) @@ -62,26 +54,19 @@ int v3_handle_cr0_write(struct guest_info * info) { return -1; } - if (dec_instr.op_type == V3_OP_LMSW) { - // if (v3_opcode_cmp(V3_OPCODE_LMSW, (const uchar_t *)(dec_instr.opcode)) == 0) { + if (dec_instr.op_type == V3_OP_LMSW) { if (handle_lmsw(info, &dec_instr) == -1) { return -1; } - - // } else if (v3_opcode_cmp(V3_OPCODE_MOV2CR, (const uchar_t *)(dec_instr.opcode)) == 0) { } else if (dec_instr.op_type == V3_OP_MOV2CR) { if (handle_mov_to_cr0(info, &dec_instr) == -1) { return -1; } - - // } else if (v3_opcode_cmp(V3_OPCODE_CLTS, (const uchar_t *)(dec_instr.opcode)) == 0) { } else if (dec_instr.op_type == V3_OP_CLTS) { - if (handle_clts(info, &dec_instr) == -1) { return -1; } - } else { PrintError("Unhandled opcode in handle_cr0_write\n"); return -1; @@ -135,6 +120,22 @@ static int handle_mov_to_cr0(struct guest_info * info, struct x86_instr * dec_in if (paging_transition) { if (v3_get_mem_mode(info) == VIRTUAL_MEM) { + struct efer_64 * guest_efer = (struct efer_64 *)&(info->guest_efer); + struct efer_64 * shadow_efer = (struct efer_64 *)&(info->ctrl_regs.efer); + + // Check long mode LME to set LME + if (guest_efer->lme == 1) { + PrintDebug("Enabing Long Mode\n"); + guest_efer->lma = 1; + + shadow_efer->lma = 1; + shadow_efer->lme = 1; + + v3_print_segments(info); + + PrintDebug("New EFER %p\n", (void *)*(addr_t *)(shadow_efer)); + } + PrintDebug("Activating Shadow Page Tables\n"); if (v3_activate_shadow_pt(info) == -1) { @@ -150,6 +151,7 @@ static int handle_mov_to_cr0(struct guest_info * info, struct x86_instr * dec_in } } + PrintDebug("New Guest CR0=%x\n",*(uint_t *)guest_cr0); PrintDebug("New CR0=%x\n", *(uint_t *)shadow_cr0); @@ -227,7 +229,6 @@ int v3_handle_cr0_read(struct guest_info * info) { return -1; } - // if (v3_opcode_cmp(V3_OPCODE_MOVCR2, (const uchar_t *)(dec_instr.opcode)) == 0) { if (dec_instr.op_type == V3_OP_MOVCR2) { struct cr0_32 * dst_reg = (struct cr0_32 *)(dec_instr.dst_operand.operand); struct cr0_32 * shadow_cr0 = (struct cr0_32 *)&(info->ctrl_regs.cr0); @@ -243,7 +244,6 @@ int v3_handle_cr0_read(struct guest_info * info) { PrintDebug("Shadow CR0: %x\n", *(uint_t*)shadow_cr0); PrintDebug("returned CR0: %x\n", *(uint_t*)dst_reg); - // } else if (v3_opcode_cmp(V3_OPCODE_SMSW, (const uchar_t *)(dec_instr.opcode)) == 0) { } else if (dec_instr.op_type == V3_OP_SMSW) { struct cr0_real * shadow_cr0 = (struct cr0_real *)&(info->ctrl_regs.cr0); struct cr0_real * dst_reg = (struct cr0_real *)(dec_instr.dst_operand.operand); @@ -287,7 +287,6 @@ int v3_handle_cr3_write(struct guest_info * info) { return -1; } - // if (v3_opcode_cmp(V3_OPCODE_MOV2CR, (const uchar_t *)(dec_instr.opcode)) == 0) { if (dec_instr.op_type == V3_OP_MOV2CR) { PrintDebug("MOV2CR3 (cpu_mode=%s)\n", v3_cpu_mode_to_str(info->cpu_mode)); @@ -364,13 +363,13 @@ int v3_handle_cr3_read(struct guest_info * info) { return -1; } - // if (v3_opcode_cmp(V3_OPCODE_MOVCR2, (const uchar_t *)(dec_instr.opcode)) == 0) { if (dec_instr.op_type == V3_OP_MOVCR2) { PrintDebug("MOVCR32 (mode=%s)\n", v3_cpu_mode_to_str(info->cpu_mode)); if (info->shdw_pg_mode == SHADOW_PAGING) { - if (info->cpu_mode == LONG) { + if ((v3_get_cpu_mode(info) == LONG) || + (v3_get_cpu_mode(info) == LONG_32_COMPAT)) { struct cr3_64 * dst_reg = (struct cr3_64 *)(dec_instr.dst_operand.operand); struct cr3_64 * guest_cr3 = (struct cr3_64 *)&(info->shdw_pg_state.guest_cr3); *dst_reg = *guest_cr3; @@ -383,7 +382,8 @@ int v3_handle_cr3_read(struct guest_info * info) { } else if (info->shdw_pg_mode == NESTED_PAGING) { // This is just a passthrough operation which we probably don't need here - if (info->cpu_mode == LONG) { + if ((v3_get_cpu_mode(info) == LONG) || + (v3_get_cpu_mode(info) == LONG_32_COMPAT)) { struct cr3_64 * dst_reg = (struct cr3_64 *)(dec_instr.dst_operand.operand); struct cr3_64 * guest_cr3 = (struct cr3_64 *)&(info->ctrl_regs.cr3); *dst_reg = *guest_cr3; @@ -416,6 +416,7 @@ int v3_handle_cr4_write(struct guest_info * info) { uchar_t instr[15]; int ret; struct x86_instr dec_instr; + v3_vm_cpu_mode_t cpu_mode = v3_get_cpu_mode(info); if (info->mem_mode == PHYSICAL_MEM) { ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr); @@ -428,13 +429,12 @@ int v3_handle_cr4_write(struct guest_info * info) { return -1; } - // if (v3_opcode_cmp(V3_OPCODE_MOV2CR, (const uchar_t *)(dec_instr.opcode)) != 0) { if (dec_instr.op_type != V3_OP_MOV2CR) { PrintError("Invalid opcode in write to CR4\n"); return -1; } - if ((info->cpu_mode == PROTECTED) || (info->cpu_mode == PROTECTED_PAE)) { + if ((cpu_mode == PROTECTED) || (cpu_mode == PROTECTED_PAE)) { struct cr4_32 * new_cr4 = (struct cr4_32 *)(dec_instr.src_operand.operand); struct cr4_32 * cr4 = (struct cr4_32 *)&(info->ctrl_regs.cr4); @@ -465,8 +465,23 @@ int v3_handle_cr4_write(struct guest_info * info) { *cr4 = *new_cr4; PrintDebug("New CR4=%x\n", *(uint_t *)cr4); + } else if ((cpu_mode == LONG) || (cpu_mode == LONG_32_COMPAT)) { + struct cr4_64 * new_cr4 = (struct cr4_64 *)(dec_instr.src_operand.operand); + struct cr4_64 * cr4 = (struct cr4_64 *)&(info->ctrl_regs.cr4); + + PrintDebug("Old CR4=%p\n", (void *)*(addr_t *)cr4); + PrintDebug("New CR4=%p\n", (void *)*(addr_t *)new_cr4); + + if (new_cr4->pae == 0) { + // cannot turn off PAE in long mode GPF the guest + PrintError("Cannot disable PAE in long mode, sending GPF\n"); + return -1; + } + + *cr4 = *new_cr4; + } else { - PrintError("CR4 write not supported in CPU_MODE: %d\n", info->cpu_mode); + PrintError("CR4 write not supported in CPU_MODE: %s\n", v3_cpu_mode_to_str(cpu_mode)); return -1; } @@ -477,7 +492,7 @@ int v3_handle_cr4_write(struct guest_info * info) { int v3_handle_efer_read(uint_t msr, struct v3_msr * dst, void * priv_data) { struct guest_info * info = (struct guest_info *)(priv_data); - PrintDebug("EFER Read\n"); + PrintDebug("EFER Read HI=%x LO=%x\n", info->guest_efer.hi, info->guest_efer.lo); dst->value = info->guest_efer.value; @@ -488,64 +503,66 @@ int v3_handle_efer_read(uint_t msr, struct v3_msr * dst, void * priv_data) { int v3_handle_efer_write(uint_t msr, struct v3_msr src, void * priv_data) { struct guest_info * info = (struct guest_info *)(priv_data); - struct efer_64 * new_efer = (struct efer_64 *)&(src.value); + //struct efer_64 * new_efer = (struct efer_64 *)&(src.value); struct efer_64 * shadow_efer = (struct efer_64 *)&(info->ctrl_regs.efer); struct v3_msr * guest_efer = &(info->guest_efer); PrintDebug("EFER Write\n"); + PrintDebug("EFER Write Values: HI=%x LO=%x\n", src.hi, src.lo); PrintDebug("Old EFER=%p\n", (void *)*(addr_t*)(shadow_efer)); // We virtualize the guests efer to hide the SVME and LMA bits guest_efer->value = src.value; - - if ((info->shdw_pg_mode == SHADOW_PAGING) && - (v3_get_mem_mode(info) == PHYSICAL_MEM)) { - - if ((shadow_efer->lme == 0) && (new_efer->lme == 1)) { - PrintDebug("Transition to longmode\n"); - PrintDebug("Creating Passthrough 64 bit page tables\n"); - - // Delete the old 32 bit direct map page tables - /* - * JRL BUG? - * Will these page tables always be in PAE format?? - */ - PrintDebug("Deleting old PAE Page tables\n"); - PrintError("JRL BUG?: Will the old page tables always be in PAE format??\n"); - delete_page_tables_32PAE((pdpe32pae_t *)V3_VAddr((void *)(info->direct_map_pt))); - - // create 64 bit direct map page table - info->direct_map_pt = (addr_t)V3_PAddr(create_passthrough_pts_64(info)); - - // reset cr3 to new page tables - info->ctrl_regs.cr3 = *(addr_t*)&(info->direct_map_pt); - - // We mark the Long Mode active because we have paging enabled - // We do this in new_efer because we copy the msr in full below - new_efer->lma = 1; - - } else if ((shadow_efer->lme == 1) && (new_efer->lme == 0)) { - // transition out of long mode - //((struct efer_64 *)&(info->guest_efer.value))->lme = 0; - //((struct efer_64 *)&(info->guest_efer.value))->lma = 0; - - return -1; - } - - // accept all changes to the efer, but make sure that the SVME bit is set... (SVM specific) - *shadow_efer = *new_efer; - shadow_efer->svme = 1; - - - - PrintDebug("New EFER=%p\n", (void *)*(addr_t *)(shadow_efer)); - } else { - PrintError("Write to EFER in NESTED_PAGING or VIRTUAL_MEM mode not supported\n"); - // Should probably just check for a long mode transition, and bomb out if it is - return -1; - } - + + v3_print_segments(info); + // We have to handle long mode writes.... + + /* + if ((info->shdw_pg_mode == SHADOW_PAGING) && + (v3_get_mem_mode(info) == PHYSICAL_MEM)) { + + if ((shadow_efer->lme == 0) && (new_efer->lme == 1)) { + PrintDebug("Transition to longmode\n"); + PrintDebug("Creating Passthrough 64 bit page tables\n"); + + // Delete the old 32 bit direct map page tables + + PrintDebug("Deleting old PAE Page tables\n"); + PrintError("JRL BUG?: Will the old page tables always be in PAE format??\n"); + delete_page_tables_32PAE((pdpe32pae_t *)V3_VAddr((void *)(info->direct_map_pt))); + + // create 64 bit direct map page table + info->direct_map_pt = (addr_t)V3_PAddr(create_passthrough_pts_64(info)); + + // reset cr3 to new page tables + info->ctrl_regs.cr3 = *(addr_t*)&(info->direct_map_pt); + + // We mark the Long Mode active because we have paging enabled + // We do this in new_efer because we copy the msr in full below + // new_efer->lma = 1; + + } else if ((shadow_efer->lme == 1) && (new_efer->lme == 0)) { + // transition out of long mode + //((struct efer_64 *)&(info->guest_efer.value))->lme = 0; + //((struct efer_64 *)&(info->guest_efer.value))->lma = 0; + + return -1; + } + + // accept all changes to the efer, but make sure that the SVME bit is set... (SVM specific) + *shadow_efer = *new_efer; + shadow_efer->svme = 1; + + + + PrintDebug("New EFER=%p\n", (void *)*(addr_t *)(shadow_efer)); + } else { + PrintError("Write to EFER in NESTED_PAGING or VIRTUAL_MEM mode not supported\n"); + // Should probably just check for a long mode transition, and bomb out if it is + return -1; + } + */ info->rip += 2; // WRMSR/RDMSR are two byte operands return 0; diff --git a/palacios/src/palacios/vmm_paging.c b/palacios/src/palacios/vmm_paging.c index 5089c5c..b492d5b 100644 --- a/palacios/src/palacios/vmm_paging.c +++ b/palacios/src/palacios/vmm_paging.c @@ -59,19 +59,20 @@ void delete_page_tables_32(pde32_t * pde) { if (pde == NULL) { return; } + PrintDebug("Deleting Page Tables -- PDE (%p)\n", pde); for (i = 0; (i < MAX_PDE32_ENTRIES); i++) { if (pde[i].present) { // We double cast, first to an addr_t to handle 64 bit issues, then to the pointer - PrintDebug("PTE base addr %x \n", pde[i].pt_base_addr); + pte32_t * pte = (pte32_t *)((addr_t)(uint_t)(pde[i].pt_base_addr << PAGE_POWER)); - PrintDebug("Deleting PTE %d (%p)\n", i, pte); + V3_FreePage(pte); } } - PrintDebug("Deleting PDE (%p)\n", pde); + V3_FreePage(V3_PAddr(pde)); } @@ -434,6 +435,40 @@ int v3_check_guest_pt_64(struct guest_info * info, v3_reg_t guest_cr3, addr_t va } +static int get_data_page_type_cb(struct guest_info * info, page_type_t type, + addr_t vaddr, addr_t page_ptr, addr_t page_pa, void * private_data) { + switch (type) { + case PAGE_4KB: + case PAGE_2MB: + case PAGE_4MB: + case PAGE_1GB: + return 1; + default: + return 0; + } +} + + + +page_type_t v3_get_guest_data_page_type_32(struct guest_info * info, v3_reg_t cr3, addr_t vaddr) { + return v3_drill_guest_pt_32(info, cr3, vaddr, get_data_page_type_cb, NULL); +} +page_type_t v3_get_guest_data_page_type_32pae(struct guest_info * info, v3_reg_t cr3, addr_t vaddr) { + return v3_drill_guest_pt_32pae(info, cr3, vaddr, get_data_page_type_cb, NULL); +} +page_type_t v3_get_guest_data_page_type_64(struct guest_info * info, v3_reg_t cr3, addr_t vaddr) { + return v3_drill_guest_pt_64(info, cr3, vaddr, get_data_page_type_cb, NULL); +} +page_type_t v3_get_host_data_page_type_32(struct guest_info * info, v3_reg_t cr3, addr_t vaddr) { + return v3_drill_host_pt_32(info, cr3, vaddr, get_data_page_type_cb, NULL); +} +page_type_t v3_get_host_data_page_type_32pae(struct guest_info * info, v3_reg_t cr3, addr_t vaddr) { + return v3_drill_host_pt_32pae(info, cr3, vaddr, get_data_page_type_cb, NULL); +} +page_type_t v3_get_host_data_page_type_64(struct guest_info * info, v3_reg_t cr3, addr_t vaddr) { + return v3_drill_host_pt_64(info, cr3, vaddr, get_data_page_type_cb, NULL); +} + /* * PAGE TABLE LOOKUP FUNCTIONS diff --git a/palacios/src/palacios/vmm_shadow_paging.c b/palacios/src/palacios/vmm_shadow_paging.c index 67b5a77..968a840 100644 --- a/palacios/src/palacios/vmm_shadow_paging.c +++ b/palacios/src/palacios/vmm_shadow_paging.c @@ -92,7 +92,7 @@ int v3_init_shadow_page_state(struct guest_info * info) { // creates new shadow page tables // updates the shadow CR3 register to point to the new pts int v3_activate_shadow_pt(struct guest_info * info) { - switch (info->cpu_mode) { + switch (v3_get_cpu_mode(info)) { case PROTECTED: return activate_shadow_pt_32(info); @@ -103,7 +103,7 @@ int v3_activate_shadow_pt(struct guest_info * info) { case LONG_16_COMPAT: return activate_shadow_pt_64(info); default: - PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(info->cpu_mode)); + PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_cpu_mode(info))); return -1; } @@ -125,12 +125,12 @@ int v3_activate_passthrough_pt(struct guest_info * info) { int v3_handle_shadow_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code) { - if (info->mem_mode == PHYSICAL_MEM) { + if (v3_get_mem_mode(info) == PHYSICAL_MEM) { // If paging is not turned on we need to handle the special cases return handle_special_page_fault(info, fault_addr, fault_addr, error_code); - } else if (info->mem_mode == VIRTUAL_MEM) { + } else if (v3_get_mem_mode(info) == VIRTUAL_MEM) { - switch (info->cpu_mode) { + switch (v3_get_cpu_mode(info)) { case PROTECTED: return handle_shadow_pagefault_32(info, fault_addr, error_code); break; @@ -142,7 +142,7 @@ int v3_handle_shadow_pagefault(struct guest_info * info, addr_t fault_addr, pf_e return handle_shadow_pagefault_64(info, fault_addr, error_code); break; default: - PrintError("Unhandled CPU Mode: %s\n", v3_cpu_mode_to_str(info->cpu_mode)); + PrintError("Unhandled CPU Mode: %s\n", v3_cpu_mode_to_str(v3_get_cpu_mode(info))); return -1; } } else { @@ -158,14 +158,14 @@ int v3_handle_shadow_invlpg(struct guest_info * info) { int ret = 0; addr_t vaddr = 0; - if (info->mem_mode != VIRTUAL_MEM) { + if (v3_get_mem_mode(info) != VIRTUAL_MEM) { // Paging must be turned on... // should handle with some sort of fault I think PrintError("ERROR: INVLPG called in non paged mode\n"); return -1; } - if (info->mem_mode == PHYSICAL_MEM) { + if (v3_get_mem_mode(info) == PHYSICAL_MEM) { ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr); } else { ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr); @@ -192,7 +192,7 @@ int v3_handle_shadow_invlpg(struct guest_info * info) { info->rip += dec_instr.instr_length; - switch (info->cpu_mode) { + switch (v3_get_cpu_mode(info)) { case PROTECTED: return handle_shadow_invlpg_32(info, vaddr); case PROTECTED_PAE: @@ -202,7 +202,7 @@ int v3_handle_shadow_invlpg(struct guest_info * info) { case LONG_16_COMPAT: return handle_shadow_invlpg_64(info, vaddr); default: - PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(info->cpu_mode)); + PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_cpu_mode(info))); return -1; } } diff --git a/palacios/src/palacios/vmm_shadow_paging_64.h b/palacios/src/palacios/vmm_shadow_paging_64.h index 0d45ca7..dcb7faf 100644 --- a/palacios/src/palacios/vmm_shadow_paging_64.h +++ b/palacios/src/palacios/vmm_shadow_paging_64.h @@ -300,6 +300,9 @@ static int handle_pde_shadow_pagefault_64(struct guest_info * info, addr_t fault if (guest_pde->large_page == 0) { shadow_pde->writable = guest_pde->writable; } else { + // This large page flag is temporary until we can get a working cache.... + ((pde64_2MB_t *)guest_pde)->vmm_info = V3_LARGE_PG; + if (error_code.write) { shadow_pde->writable = guest_pde->writable; ((pde64_2MB_t *)guest_pde)->dirty = 1; @@ -506,7 +509,7 @@ static int handle_2MB_shadow_pagefault_64(struct guest_info * info, // Inconsistent state... // Guest Re-Entry will flush tables and everything should now workd PrintDebug("Inconsistent state... Guest re-entry should flush tlb\n"); - PrintHostPageTree(info, fault_addr, info->ctrl_regs.cr3); + //PrintHostPageTree(info, fault_addr, info->ctrl_regs.cr3); return 0; } @@ -583,7 +586,7 @@ static int handle_2MB_shadow_pagefault_64(struct guest_info * info, return -1; } - PrintHostPageTree(info, fault_addr, info->ctrl_regs.cr3); + // PrintHostPageTree(info, fault_addr, info->ctrl_regs.cr3); PrintDebug("Returning from large page fault handler\n"); return 0; } @@ -591,8 +594,81 @@ static int handle_2MB_shadow_pagefault_64(struct guest_info * info, +static int invalidation_cb_64(struct guest_info * info, page_type_t type, + addr_t vaddr, addr_t page_ptr, addr_t page_pa, + void * private_data) { -static inline int handle_shadow_invlpg_64(struct guest_info * info, addr_t vaddr) { - PrintError("64 bit shadow paging not implemented\n"); + switch (type) { + case PAGE_PML464: + { + pml4e64_t * pml = (pml4e64_t *)page_ptr; + + if (pml[PML4E64_INDEX(vaddr)].present == 0) { + return 1; + } + return 0; + } + case PAGE_PDP64: + { + pdpe64_t * pdp = (pdpe64_t *)page_ptr; + pdpe64_t * pdpe = &(pdp[PDPE64_INDEX(vaddr)]); + + if (pdpe->present == 0) { + return 1; + } + + if (pdpe->vmm_info == V3_LARGE_PG) { + PrintError("1 Gigabyte pages not supported\n"); + return -1; + + pdpe->present = 0; + return 1; + } + + return 0; + } + case PAGE_PD64: + { + pde64_t * pd = (pde64_t *)page_ptr; + pde64_t * pde = &(pd[PDE64_INDEX(vaddr)]); + + if (pde->present == 0) { + return 1; + } + + if (pde->vmm_info == V3_LARGE_PG) { + pde->present = 0; + return 1; + } + + return 0; + } + case PAGE_PT64: + { + pte64_t * pt = (pte64_t *)page_ptr; + + pt[PTE64_INDEX(vaddr)].present = 0; + + return 1; + } + default: + PrintError("Invalid Page Type\n"); + return -1; + + } + + // should not get here + PrintError("Should not get here....\n"); return -1; } + + +static inline int handle_shadow_invlpg_64(struct guest_info * info, addr_t vaddr) { + int ret = v3_drill_host_pt_64(info, info->ctrl_regs.cr3, vaddr, invalidation_cb_64, NULL); + if (ret == -1) { + PrintError("Page table drill returned error.... \n"); + PrintHostPageTree(info, vaddr, info->ctrl_regs.cr3); + } + + return (ret == -1) ? -1 : 0; +} diff --git a/palacios/src/palacios/vmm_xed.c b/palacios/src/palacios/vmm_xed.c index 73dd4ce..6d5a97c 100644 --- a/palacios/src/palacios/vmm_xed.c +++ b/palacios/src/palacios/vmm_xed.c @@ -608,6 +608,8 @@ static int get_memory_operand(struct guest_info * info, xed_decoded_inst_t * xe static int xed_reg_to_v3_reg(struct guest_info * info, xed_reg_enum_t xed_reg, addr_t * v3_reg, uint_t * reg_len) { + PrintError("Xed Register: %s\n", xed_reg_enum_t2str(xed_reg)); + switch (xed_reg) { case XED_REG_INVALID: *v3_reg = 0; @@ -774,6 +776,146 @@ static int xed_reg_to_v3_reg(struct guest_info * info, xed_reg_enum_t xed_reg, a return GPR_REGISTER; + + + + case XED_REG_R8: + *v3_reg = (addr_t)&(info->vm_regs.r8); + *reg_len = 8; + return GPR_REGISTER; + case XED_REG_R8D: + *v3_reg = (addr_t)&(info->vm_regs.r8); + *reg_len = 4; + return GPR_REGISTER; + case XED_REG_R8W: + *v3_reg = (addr_t)&(info->vm_regs.r8); + *reg_len = 2; + return GPR_REGISTER; + case XED_REG_R8B: + *v3_reg = (addr_t)&(info->vm_regs.r8); + *reg_len = 1; + return GPR_REGISTER; + + case XED_REG_R9: + *v3_reg = (addr_t)&(info->vm_regs.r9); + *reg_len = 8; + return GPR_REGISTER; + case XED_REG_R9D: + *v3_reg = (addr_t)&(info->vm_regs.r9); + *reg_len = 4; + return GPR_REGISTER; + case XED_REG_R9W: + *v3_reg = (addr_t)&(info->vm_regs.r9); + *reg_len = 2; + return GPR_REGISTER; + case XED_REG_R9B: + *v3_reg = (addr_t)&(info->vm_regs.r9); + *reg_len = 1; + return GPR_REGISTER; + + case XED_REG_R10: + *v3_reg = (addr_t)&(info->vm_regs.r10); + *reg_len = 8; + return GPR_REGISTER; + case XED_REG_R10D: + *v3_reg = (addr_t)&(info->vm_regs.r10); + *reg_len = 4; + return GPR_REGISTER; + case XED_REG_R10W: + *v3_reg = (addr_t)&(info->vm_regs.r10); + *reg_len = 2; + return GPR_REGISTER; + case XED_REG_R10B: + *v3_reg = (addr_t)&(info->vm_regs.r10); + *reg_len = 1; + return GPR_REGISTER; + + case XED_REG_R11: + *v3_reg = (addr_t)&(info->vm_regs.r11); + *reg_len = 8; + return GPR_REGISTER; + case XED_REG_R11D: + *v3_reg = (addr_t)&(info->vm_regs.r11); + *reg_len = 4; + return GPR_REGISTER; + case XED_REG_R11W: + *v3_reg = (addr_t)&(info->vm_regs.r11); + *reg_len = 2; + return GPR_REGISTER; + case XED_REG_R11B: + *v3_reg = (addr_t)&(info->vm_regs.r11); + *reg_len = 1; + return GPR_REGISTER; + + case XED_REG_R12: + *v3_reg = (addr_t)&(info->vm_regs.r12); + *reg_len = 8; + return GPR_REGISTER; + case XED_REG_R12D: + *v3_reg = (addr_t)&(info->vm_regs.r12); + *reg_len = 4; + return GPR_REGISTER; + case XED_REG_R12W: + *v3_reg = (addr_t)&(info->vm_regs.r12); + *reg_len = 2; + return GPR_REGISTER; + case XED_REG_R12B: + *v3_reg = (addr_t)&(info->vm_regs.r12); + *reg_len = 1; + return GPR_REGISTER; + + case XED_REG_R13: + *v3_reg = (addr_t)&(info->vm_regs.r13); + *reg_len = 8; + return GPR_REGISTER; + case XED_REG_R13D: + *v3_reg = (addr_t)&(info->vm_regs.r13); + *reg_len = 4; + return GPR_REGISTER; + case XED_REG_R13W: + *v3_reg = (addr_t)&(info->vm_regs.r13); + *reg_len = 2; + return GPR_REGISTER; + case XED_REG_R13B: + *v3_reg = (addr_t)&(info->vm_regs.r13); + *reg_len = 1; + return GPR_REGISTER; + + case XED_REG_R14: + *v3_reg = (addr_t)&(info->vm_regs.r14); + *reg_len = 8; + return GPR_REGISTER; + case XED_REG_R14D: + *v3_reg = (addr_t)&(info->vm_regs.r14); + *reg_len = 4; + return GPR_REGISTER; + case XED_REG_R14W: + *v3_reg = (addr_t)&(info->vm_regs.r14); + *reg_len = 2; + return GPR_REGISTER; + case XED_REG_R14B: + *v3_reg = (addr_t)&(info->vm_regs.r14); + *reg_len = 1; + return GPR_REGISTER; + + case XED_REG_R15: + *v3_reg = (addr_t)&(info->vm_regs.r15); + *reg_len = 8; + return GPR_REGISTER; + case XED_REG_R15D: + *v3_reg = (addr_t)&(info->vm_regs.r15); + *reg_len = 4; + return GPR_REGISTER; + case XED_REG_R15W: + *v3_reg = (addr_t)&(info->vm_regs.r15); + *reg_len = 2; + return GPR_REGISTER; + case XED_REG_R15B: + *v3_reg = (addr_t)&(info->vm_regs.r15); + *reg_len = 1; + return GPR_REGISTER; + + /* * CTRL REGS */ @@ -894,45 +1036,7 @@ static int xed_reg_to_v3_reg(struct guest_info * info, xed_reg_enum_t xed_reg, a - case XED_REG_R8: - case XED_REG_R8D: - case XED_REG_R8W: - case XED_REG_R8B: - - case XED_REG_R9: - case XED_REG_R9D: - case XED_REG_R9W: - case XED_REG_R9B: - - case XED_REG_R10: - case XED_REG_R10D: - case XED_REG_R10W: - case XED_REG_R10B: - - case XED_REG_R11: - case XED_REG_R11D: - case XED_REG_R11W: - case XED_REG_R11B: - - case XED_REG_R12: - case XED_REG_R12D: - case XED_REG_R12W: - case XED_REG_R12B: - - case XED_REG_R13: - case XED_REG_R13D: - case XED_REG_R13W: - case XED_REG_R13B: - case XED_REG_R14: - case XED_REG_R14D: - case XED_REG_R14W: - case XED_REG_R14B: - - case XED_REG_R15: - case XED_REG_R15D: - case XED_REG_R15W: - case XED_REG_R15B: case XED_REG_XMM0: case XED_REG_XMM1: