From: Jack Lange Date: Mon, 27 Oct 2008 21:05:51 +0000 (-0500) Subject: added MSR hook framework X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=5b6278751429f59297ce74e614d50632daea3748 added MSR hook framework added preliminary CR4 write handler, to handle PAE mode switch added passthrough 32bit PAE page tables for non-paged 32 bit PAE mode --- diff --git a/build/Makefile b/build/Makefile index 4c190c8..e4ee438 100644 --- a/build/Makefile +++ b/build/Makefile @@ -203,7 +203,10 @@ palacios-full64: palacios-lean: (cd $(PALACIOS_BUILD_DIR) && make LEAN_AND_MEAN=1 world) -kitten: +kitten: palacios64 + cp $(PALACIOS_BUILD_DIR)/libv3vee.a $(KITTEN_TOP_DIR)/palacios/ + cp $(PALACIOS_BUILD_DIR)/../lib/xed/libxed32e.a $(KITTEN_TOP_DIR)/palacios/ + cp $(PALACIOS_BUILD_DIR)/vm_kernel $(KITTEN_TOP_DIR)/palacios/ (cd $(KITTEN_TOP_DIR) && make) kitten-full: palacios-full64 diff --git a/kitten/arch/x86_64/boot/Makefile b/kitten/arch/x86_64/boot/Makefile index 783f48d..ad8a6e1 100644 --- a/kitten/arch/x86_64/boot/Makefile +++ b/kitten/arch/x86_64/boot/Makefile @@ -58,7 +58,7 @@ $(obj)/compressed/vmlwk: FORCE FDARGS = console=serial debug_mptable=1 init_argv="one two three" init_envp="one=1 two=2 three=3" # Set this if you want an initrd included with the isoimage kernel ifdef CONFIG_V3VEE -FDINITRD = /opt/vmm-tools/isos/puppy.iso +FDINITRD = /opt/vmm-tools/isos/finnix.iso else FDINITRD = init_task endif diff --git a/kitten/boot-kitten b/kitten/boot-kitten index dc5b9f7..cf5dfe4 100755 --- a/kitten/boot-kitten +++ b/kitten/boot-kitten @@ -1,8 +1,9 @@ #!/bin/sh exec /usr/local/qemu/bin/qemu-system-x86_64 \ + -smp 1 \ -m 1024 \ - -nographic \ + -serial file:./serial.out \ -cdrom ./arch/x86_64/boot/image.iso \ < /dev/null diff --git a/palacios/build/Makefile b/palacios/build/Makefile index 8712934..2ea49e2 100644 --- a/palacios/build/Makefile +++ b/palacios/build/Makefile @@ -260,6 +260,8 @@ VMM_OBJS := \ palacios/vmm_queue.o \ palacios/vmm_host_events.o \ palacios/svm_lowlevel.o \ + palacios/vmm_msr.o \ + palacios/svm_msr.o \ # vmx.c vmcs_gen.c vmcs.c diff --git a/palacios/include/palacios/svm_msr.h b/palacios/include/palacios/svm_msr.h new file mode 100644 index 0000000..30975f1 --- /dev/null +++ b/palacios/include/palacios/svm_msr.h @@ -0,0 +1,39 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#ifndef __SVM_MSR_H__ +#define __SVM_MSR_H__ + + +#ifdef __V3VEE__ + +#include + +addr_t v3_init_svm_msr_map(struct guest_info * info); + +int v3_handle_msr_write(struct guest_info * info); + +int v3_handle_msr_read(struct guest_info * info); + + + + +#endif // ! __V3VEE__ + +#endif diff --git a/palacios/include/palacios/vm_guest.h b/palacios/include/palacios/vm_guest.h index 010fc76..908eb28 100644 --- a/palacios/include/palacios/vm_guest.h +++ b/palacios/include/palacios/vm_guest.h @@ -31,7 +31,7 @@ #include #include #include - +#include @@ -133,6 +133,8 @@ struct guest_info { struct v3_intr_state intr_state; struct vmm_io_map io_map; + + struct v3_msr_map msr_map; // device_map struct vmm_dev_mgr dev_mgr; diff --git a/palacios/include/palacios/vmm_ctrl_regs.h b/palacios/include/palacios/vmm_ctrl_regs.h index 1633217..c57171c 100644 --- a/palacios/include/palacios/vmm_ctrl_regs.h +++ b/palacios/include/palacios/vmm_ctrl_regs.h @@ -206,6 +206,13 @@ int v3_handle_cr0_read(struct guest_info * info); int v3_handle_cr3_write(struct guest_info * info); int v3_handle_cr3_read(struct guest_info * info); +int v3_handle_cr4_write(struct guest_info * info); +int v3_handle_cr4_read(struct guest_info * info); + + +int v3_handle_efer_write(uint_t msr, struct v3_msr src, void * priv_data); +int v3_handle_efer_read(uint_t msr, struct v3_msr * dst, void * priv_data); + #endif // ! __V3VEE__ diff --git a/palacios/include/palacios/vmm_decoder.h b/palacios/include/palacios/vmm_decoder.h index c9636d3..df10150 100644 --- a/palacios/include/palacios/vmm_decoder.h +++ b/palacios/include/palacios/vmm_decoder.h @@ -241,6 +241,7 @@ static inline v3_reg_t get_gpr_mask(struct guest_info * info) { return 0xffff; break; case PROTECTED: + case PROTECTED_PAE: return 0xffffffff; case LONG: case LONG_32_COMPAT: @@ -261,6 +262,7 @@ static inline addr_t get_addr_linear(struct guest_info * info, addr_t addr, stru break;*/ case PROTECTED: + case PROTECTED_PAE: return addr + seg->base; break; diff --git a/palacios/include/palacios/vmm_msr.h b/palacios/include/palacios/vmm_msr.h new file mode 100644 index 0000000..56a1b30 --- /dev/null +++ b/palacios/include/palacios/vmm_msr.h @@ -0,0 +1,84 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#ifndef __VMM_MSR_H__ +#define __VMM_MSR_H__ + + +#ifdef __V3VEE__ + +#include +#include + +struct guest_info; + +struct v3_msr { + + union { + ullong_t value; + + struct { + uint_t lo; + uint_t hi; + } ; + + }; +}; + + +struct v3_msr_hook { + uint_t msr; + + int (*read)(uint_t msr, struct v3_msr * dst, void * priv_data); + int (*write)(uint_t msr, struct v3_msr src, void * priv_data); + + void * priv_data; + + struct list_head link; +}; + + + +struct v3_msr_hook; + +struct v3_msr_map { + uint_t num_hooks; + struct list_head hook_list; +}; + + +void v3_init_msr_map(struct guest_info * info); + +int v3_unhook_msr(struct guest_info * info, uint_t msr); + +int v3_hook_msr(struct guest_info * info, uint_t msr, + int (*read)(uint_t msr, struct v3_msr * dst, void * priv_data), + int (*write)(uint_t msr, struct v3_msr src, void * priv_data), + void * priv_data); + + +struct v3_msr_hook * v3_get_msr_hook(struct guest_info * info, uint_t msr); + +void v3_print_msr_map(struct guest_info * info); + + + +#endif // ! __V3VEE__ + +#endif diff --git a/palacios/include/palacios/vmm_paging.h b/palacios/include/palacios/vmm_paging.h index 313afc7..41d4b6f 100644 --- a/palacios/include/palacios/vmm_paging.h +++ b/palacios/include/palacios/vmm_paging.h @@ -95,6 +95,10 @@ the host state in the vmcs before entering the guest. #define MAX_PTE32_ENTRIES 1024 #define MAX_PDE32_ENTRIES 1024 +#define MAX_PTE32PAE_ENTRIES 512 +#define MAX_PDE32PAE_ENTRIES 512 +#define MAX_PDPE32PAE_ENTRIES 4 + #define MAX_PTE64_ENTRIES 512 #define MAX_PDE64_ENTRIES 512 #define MAX_PDPE64_ENTRIES 512 @@ -105,16 +109,29 @@ the host state in the vmcs before entering the guest. #define PDE32_INDEX(x) ((((uint_t)x) >> 22) & 0x3ff) #define PTE32_INDEX(x) ((((uint_t)x) >> 12) & 0x3ff) + /* Gets the base address needed for a Page Table entry */ +/* Deprecate these :*/ #define PD32_BASE_ADDR(x) (((uint_t)x) >> 12) #define PT32_BASE_ADDR(x) (((uint_t)x) >> 12) #define PD32_4MB_BASE_ADDR(x) (((uint_t)x) >> 22) - #define PML4E64_BASE_ADDR(x) (((ullong_t)x) >> 12) #define PDPE64_BASE_ADDR(x) (((ullong_t)x) >> 12) #define PDE64_BASE_ADDR(x) (((ullong_t)x) >> 12) #define PTE64_BASE_ADDR(x) (((ullong_t)x) >> 12) +/* Accessor functions for the page table structures */ +#define PDE32_T_ADDR(x) (((x).pt_base_addr) << 12) +#define PTE32_T_ADDR(x) (((x).page_base_addr) << 12) +#define PDE32_4MB_T_ADDR(x) (((x).page_base_addr) << 22) + +/* Replace The above with these... */ +#define PAGE_BASE_ADDR(x) (((uint_t)x) >> 12) +#define LARGE_PAGE_BASE_ADDR(x) (((uint_t)x) >> 22) +#define BASE_TO_PAGE_ADDR(x) (((uint_t)x) << 12) +#define LARGE_BASE_TO_PAGE_ADDR(x) (((uint_t)x) << 22) + + #define PT32_PAGE_ADDR(x) (((uint_t)x) & 0xfffff000) #define PT32_PAGE_OFFSET(x) (((uint_t)x) & 0xfff) @@ -142,10 +159,7 @@ the host state in the vmcs before entering the guest. -/* Accessor functions for the page table structures */ -#define PDE32_T_ADDR(x) (((x).pt_base_addr) << 12) -#define PTE32_T_ADDR(x) (((x).page_base_addr) << 12) -#define PDE32_4MB_T_ADDR(x) (((x).page_base_addr) << 22) + /* Page Table Flag Values */ #define PT32_HOOK 0x1 @@ -170,7 +184,7 @@ typedef struct pde32 { uint_t global_page : 1; uint_t vmm_info : 3; uint_t pt_base_addr : 20; -} pde32_t; +} __attribute__((packed)) pde32_t; typedef struct pde32_4MB { uint_t present : 1; @@ -187,7 +201,7 @@ typedef struct pde32_4MB { uint_t rsvd : 9; uint_t page_base_addr : 10; -} pde32_4MB_t; +} __attribute__((packed)) pde32_4MB_t; typedef struct pte32 { uint_t present : 1; @@ -201,14 +215,76 @@ typedef struct pte32 { uint_t global_page : 1; uint_t vmm_info : 3; uint_t page_base_addr : 20; -} pte32_t; +} __attribute__((packed)) pte32_t; /* ***** */ /* 32 bit PAE PAGE STRUCTURES */ +typedef struct pdpe32pae { + uint_t present : 1; + uint_t rsvd : 2; // MBZ + uint_t write_through : 1; + uint_t cache_disable : 1; + uint_t accessed : 1; + uint_t avail : 1; + uint_t rsvd2 : 2; // MBZ + uint_t vmm_info : 3; + uint_t pd_base_addr : 24; + uint_t rsvd3 : 28; // MBZ +} __attribute__((packed)) pdpe32pae_t; + + + +typedef struct pde32pae { + uint_t present : 1; + uint_t writable : 1; + uint_t user_page : 1; + uint_t write_through : 1; + uint_t cache_disable : 1; + uint_t accessed : 1; + uint_t avail : 1; + uint_t large_page : 1; + uint_t global_page : 1; + uint_t vmm_info : 3; + uint_t pt_base_addr : 24; + uint_t rsvd : 28; +} __attribute__((packed)) pde32pae_t; + +typedef struct pde32pae_4MB { + uint_t present : 1; + uint_t writable : 1; + uint_t user_page : 1; + uint_t write_through : 1; + uint_t cache_disable : 1; + uint_t accessed : 1; + uint_t dirty : 1; + uint_t one : 1; + uint_t global_page : 1; + uint_t vmm_info : 3; + uint_t pat : 1; + uint_t rsvd : 9; + uint_t page_base_addr : 14; + uint_t rsvd2 : 28; + +} __attribute__((packed)) pde32pae_4MB_t; + +typedef struct pte32pae { + uint_t present : 1; + uint_t writable : 1; + uint_t user_page : 1; + uint_t write_through : 1; + uint_t cache_disable : 1; + uint_t accessed : 1; + uint_t dirty : 1; + uint_t pte_attr : 1; + uint_t global_page : 1; + uint_t vmm_info : 3; + uint_t page_base_addr : 24; + uint_t rsvd : 28; +} __attribute__((packed)) pte32pae_t; + + + -// -// Fill in -// /* ********** */ @@ -227,7 +303,7 @@ typedef struct pml4e64 { ullong_t pdp_base_addr : 40; uint_t available : 11; uint_t no_execute : 1; -} pml4e64_t; +} __attribute__((packed)) pml4e64_t; typedef struct pdpe64 { @@ -244,7 +320,7 @@ typedef struct pdpe64 { ullong_t pd_base_addr : 40; uint_t available : 11; uint_t no_execute : 1; -} pdpe64_t; +} __attribute__((packed)) pdpe64_t; @@ -263,7 +339,7 @@ typedef struct pde64 { ullong_t pt_base_addr : 40; uint_t available : 11; uint_t no_execute : 1; -} pde64_t; +} __attribute__((packed)) pde64_t; typedef struct pte64 { uint_t present : 1; @@ -279,7 +355,7 @@ typedef struct pte64 { ullong_t page_base_addr : 40; uint_t available : 11; uint_t no_execute : 1; -} pte64_t; +} __attribute__((packed)) pte64_t; /* *************** */ @@ -290,7 +366,7 @@ typedef struct pf_error_code { uint_t rsvd_access : 1; // if 1, fault from reading a 1 from a reserved field (?) uint_t ifetch : 1; // if 1, faulting access was an instr fetch (only with NX) uint_t rsvd : 27; -} pf_error_t; +} __attribute__((packed)) pf_error_t; @@ -317,6 +393,7 @@ pt_access_status_t can_access_pte32(pte32_t * pte, addr_t addr, pf_error_t acces struct guest_info; pde32_t * create_passthrough_pts_32(struct guest_info * guest_info); +pdpe32pae_t * create_passthrough_pts_PAE32(struct guest_info * guest_info); pml4e64_t * create_passthrough_pts_64(struct guest_info * info); @@ -332,6 +409,10 @@ void PrintPT32(addr_t starting_address, pte32_t * pte); void PrintPD32(pde32_t * pde); void PrintPTE32(addr_t virtual_address, pte32_t * pte); void PrintPDE32(addr_t virtual_address, pde32_t * pde); + +void PrintDebugPageTables32PAE(pdpe32pae_t * pde); +void PrintPTE32PAE(addr_t virtual_address, pte32pae_t * pte); +void PrintPDE32PAE(addr_t virtual_address, pde32pae_t * pde); void PrintPTE64(addr_t virtual_address, pte64_t * pte); #endif // !__V3VEE__ diff --git a/palacios/src/palacios/svm.c b/palacios/src/palacios/svm.c index d533c92..ff883ff 100644 --- a/palacios/src/palacios/svm.c +++ b/palacios/src/palacios/svm.c @@ -35,6 +35,7 @@ #include #include #include +#include @@ -72,6 +73,8 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { //ctrl_area->instrs.instrs.CR0 = 1; ctrl_area->cr_reads.cr0 = 1; ctrl_area->cr_writes.cr0 = 1; + ctrl_area->cr_reads.cr4 = 1; + ctrl_area->cr_writes.cr4 = 1; /* Set up the efer to enable 64 bit page tables */ @@ -88,6 +91,11 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { guest_state->efer |= EFER_MSR_svm_enable; + v3_hook_msr(vm_info, EFER_MSR, + &v3_handle_efer_read, + &v3_handle_efer_write, + vm_info); + guest_state->rflags = 0x00000002; // The reserved bit is always 1 @@ -126,6 +134,8 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { ctrl_area->exceptions.nmi = 1; */ + + // Debug of boot on physical machines - 7/14/08 ctrl_area->instrs.NMI=1; ctrl_area->instrs.SMI=1; @@ -133,8 +143,6 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { ctrl_area->instrs.PAUSE=1; ctrl_area->instrs.shutdown_evts=1; - - vm_info->vm_regs.rdx = 0x00000f00; guest_state->cr0 = 0x60000010; @@ -176,6 +184,11 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { guest_state->dr6 = 0x00000000ffff0ff0LL; guest_state->dr7 = 0x0000000000000400LL; + + + + + if (vm_info->io_map.num_ports > 0) { struct vmm_io_hook * iter; addr_t io_port_bitmap; @@ -201,8 +214,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { ctrl_area->instrs.IOIO_PROT = 1; } - - + PrintDebug("Exiting on interrupts\n"); ctrl_area->guest_ctrl.V_INTR_MASKING = 1; @@ -245,7 +257,6 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { ctrl_area->TLB_CONTROL = 1; - guest_state->g_pat = 0x7040600070406ULL; guest_state->cr0 |= 0x80000000; @@ -270,6 +281,13 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) { } + if (vm_info->msr_map.num_hooks > 0) { + ctrl_area->MSRPM_BASE_PA = v3_init_svm_msr_map(vm_info); + ctrl_area->instrs.MSR_PROT = 1; + + } + + } @@ -346,13 +364,6 @@ static int start_svm_guest(struct guest_info *info) { //PrintDebug("SVM Returned\n"); -#if PrintDebug - { - uint_t x = 0; - PrintDebug("RSP=%p\n", (void *)&x); - } -#endif - v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc); num_exits++; @@ -361,12 +372,13 @@ static int start_svm_guest(struct guest_info *info) { v3_stgi(); - //PrintDebug("SVM Exit number %d\n", num_exits); - + if (num_exits % 25 == 0) { + PrintDebug("SVM Exit number %d\n", num_exits); + } if (v3_handle_svm_exit(info) != 0) { - + vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data)); addr_t host_addr; addr_t linear_addr = 0; @@ -384,6 +396,16 @@ static int start_svm_guest(struct guest_info *info) { v3_print_segments(info); v3_print_ctrl_regs(info); v3_print_GPRs(info); + + + + PrintDebug("SVM Exit Code: %p\n", (void *)(addr_t)guest_ctrl->exit_code); + + PrintDebug("exit_info1 low = 0x%.8x\n", *(uint_t*)&(guest_ctrl->exit_info1)); + PrintDebug("exit_info1 high = 0x%.8x\n", *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4)); + + PrintDebug("exit_info2 low = 0x%.8x\n", *(uint_t*)&(guest_ctrl->exit_info2)); + PrintDebug("exit_info2 high = 0x%.8x\n", *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4)); if (info->mem_mode == PHYSICAL_MEM) { guest_pa_to_host_va(info, linear_addr, &host_addr); diff --git a/palacios/src/palacios/svm_handler.c b/palacios/src/palacios/svm_handler.c index 798177e..8495a17 100644 --- a/palacios/src/palacios/svm_handler.c +++ b/palacios/src/palacios/svm_handler.c @@ -29,7 +29,7 @@ #include #include #include - +#include @@ -118,140 +118,173 @@ int v3_handle_svm_exit(struct guest_info * info) { switch (exit_code) { - case VMEXIT_IOIO: { - struct svm_io_info * io_info = (struct svm_io_info *)&(guest_ctrl->exit_info1); - - if (io_info->type == 0) { - if (io_info->str) { - if (v3_handle_svm_io_outs(info) == -1 ) { - return -1; + case VMEXIT_IOIO: + { + struct svm_io_info * io_info = (struct svm_io_info *)&(guest_ctrl->exit_info1); + + if (io_info->type == 0) { + if (io_info->str) { + if (v3_handle_svm_io_outs(info) == -1 ) { + return -1; + } + } else { + if (v3_handle_svm_io_out(info) == -1) { + return -1; + } } } else { - if (v3_handle_svm_io_out(info) == -1) { - return -1; + if (io_info->str) { + if (v3_handle_svm_io_ins(info) == -1) { + return -1; + } + } else { + if (v3_handle_svm_io_in(info) == -1) { + return -1; + } } } - } else { - if (io_info->str) { - if (v3_handle_svm_io_ins(info) == -1) { + break; + } + case VMEXIT_MSR: + { + + if (guest_ctrl->exit_info1 == 0) { + if (v3_handle_msr_read(info) == -1) { return -1; } - } else { - if (v3_handle_svm_io_in(info) == -1) { + } else if (guest_ctrl->exit_info1 == 1) { + if (v3_handle_msr_write(info) == -1) { return -1; } + } else { + PrintError("Invalid MSR Operation\n"); + return -1; } - } - } - break; - - case VMEXIT_CR0_WRITE: { + break; + } + case VMEXIT_CR0_WRITE: + { #ifdef DEBUG_CTRL_REGS - PrintDebug("CR0 Write\n"); + PrintDebug("CR0 Write\n"); #endif - if (v3_handle_cr0_write(info) == -1) { - return -1; + if (v3_handle_cr0_write(info) == -1) { + return -1; + } + break; + } + case VMEXIT_CR0_READ: + { +#ifdef DEBUG_CTRL_REGS + PrintDebug("CR0 Read\n"); +#endif + if (v3_handle_cr0_read(info) == -1) { + return -1; + } + break; + } + case VMEXIT_CR3_WRITE: + { +#ifdef DEBUG_CTRL_REGS + PrintDebug("CR3 Write\n"); +#endif + if (v3_handle_cr3_write(info) == -1) { + return -1; + } + break; } - } - break; - - case VMEXIT_CR0_READ: { + case VMEXIT_CR3_READ: + { #ifdef DEBUG_CTRL_REGS - PrintDebug("CR0 Read\n"); + PrintDebug("CR3 Read\n"); #endif - if (v3_handle_cr0_read(info) == -1) { - return -1; + if (v3_handle_cr3_read(info) == -1) { + return -1; + } + break; } - } - break; - - case VMEXIT_CR3_WRITE: { + case VMEXIT_CR4_WRITE: + { #ifdef DEBUG_CTRL_REGS - PrintDebug("CR3 Write\n"); + PrintDebug("CR4 Write\n"); #endif - if (v3_handle_cr3_write(info) == -1) { - return -1; - } - } - break; - - case VMEXIT_CR3_READ: { + if (v3_handle_cr4_write(info) == -1) { + return -1; + } + break; + } + case VMEXIT_CR4_READ: + { #ifdef DEBUG_CTRL_REGS - PrintDebug("CR3 Read\n"); + PrintDebug("CR4 Read\n"); #endif - if (v3_handle_cr3_read(info) == -1) { - return -1; + if (v3_handle_cr4_read(info) == -1) { + return -1; + } + break; } - } - break; - case VMEXIT_EXCP14: { - addr_t fault_addr = guest_ctrl->exit_info2; - pf_error_t * error_code = (pf_error_t *)&(guest_ctrl->exit_info1); + case VMEXIT_EXCP14: + { + addr_t fault_addr = guest_ctrl->exit_info2; + pf_error_t * error_code = (pf_error_t *)&(guest_ctrl->exit_info1); #ifdef DEBUG_SHADOW_PAGING - PrintDebug("PageFault at %p (error=%d)\n", - (void *)fault_addr, *(uint_t *)error_code); + PrintDebug("PageFault at %p (error=%d)\n", + (void *)fault_addr, *(uint_t *)error_code); #endif - if (info->shdw_pg_mode == SHADOW_PAGING) { - if (v3_handle_shadow_pagefault(info, fault_addr, *error_code) == -1) { + if (info->shdw_pg_mode == SHADOW_PAGING) { + if (v3_handle_shadow_pagefault(info, fault_addr, *error_code) == -1) { + return -1; + } + } else { + PrintError("Page fault in un implemented paging mode\n"); return -1; } - } else { - PrintError("Page fault in un implemented paging mode\n"); + break; + } + case VMEXIT_NPF: + { + PrintError("Currently unhandled Nested Page Fault\n"); return -1; - } - } - break; - - case VMEXIT_NPF: { - PrintError("Currently unhandled Nested Page Fault\n"); - return -1; - - } - break; - case VMEXIT_INVLPG: { - if (info->shdw_pg_mode == SHADOW_PAGING) { + break; + } + case VMEXIT_INVLPG: + { + if (info->shdw_pg_mode == SHADOW_PAGING) { #ifdef DEBUG_SHADOW_PAGING - PrintDebug("Invlpg\n"); + PrintDebug("Invlpg\n"); #endif - if (v3_handle_shadow_invlpg(info) == -1) { - return -1; + if (v3_handle_shadow_invlpg(info) == -1) { + return -1; + } } - } - /* - (exit_code == VMEXIT_INVLPGA) || - */ - - } - break; - - case VMEXIT_INTR: { - - // handled by interrupt dispatch earlier - - } - break; - - case VMEXIT_SMI: { - - // handle_svm_smi(info); // ignored for now - - } - break; - - case VMEXIT_HLT: { + /* + (exit_code == VMEXIT_INVLPGA) || + */ + break; + } + case VMEXIT_INTR: + { + // handled by interrupt dispatch earlier + break; + } + case VMEXIT_SMI: + { + // handle_svm_smi(info); // ignored for now + break; + } + case VMEXIT_HLT: + { #ifdef DEBUG_HALT - PrintDebug("Guest halted\n"); + PrintDebug("Guest halted\n"); #endif - if (v3_handle_svm_halt(info) == -1) { - return -1; + if (v3_handle_svm_halt(info) == -1) { + return -1; + } + break; } - } - break; - case VMEXIT_PAUSE: { //PrintDebug("Guest paused\n"); if (v3_handle_svm_pause(info) == -1) { diff --git a/palacios/src/palacios/svm_msr.c b/palacios/src/palacios/svm_msr.c new file mode 100644 index 0000000..bd16fb2 --- /dev/null +++ b/palacios/src/palacios/svm_msr.c @@ -0,0 +1,141 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#include +#include + +#include + + +#define PENTIUM_MSRS_START 0x00000000 +#define PENTIUM_MSRS_END 0x00001fff +#define AMD_6_GEN_MSRS_START 0xc0000000 +#define AMD_6_GEN_MSRS_END 0xc0001fff +#define AMD_7_8_GEN_MSRS_START 0xc0010000 +#define AMD_7_8_GEN_MSRS_END 0xc0011fff + +#define PENTIUM_MSRS_INDEX (0x0 * 4) +#define AMD_6_GEN_MSRS_INDEX (0x800 * 4) +#define AMD_7_8_GEN_MSRS_INDEX (0x1000 * 4) + + + +static int get_bitmap_index(uint_t msr) { + if ((msr >= PENTIUM_MSRS_START) && + (msr <= PENTIUM_MSRS_END)) { + return (PENTIUM_MSRS_INDEX + (msr - PENTIUM_MSRS_START)); + } else if ((msr >= AMD_6_GEN_MSRS_START) && + (msr <= AMD_6_GEN_MSRS_END)) { + return (AMD_6_GEN_MSRS_INDEX + (msr - AMD_6_GEN_MSRS_START)); + } else if ((msr >= AMD_7_8_GEN_MSRS_START) && + (msr <= AMD_7_8_GEN_MSRS_END)) { + return (AMD_7_8_GEN_MSRS_INDEX + (msr - AMD_7_8_GEN_MSRS_START)); + } else { + PrintError("MSR out of range (MSR=0x%x)\n", msr); + return -1; + } +} + + + +addr_t v3_init_svm_msr_map(struct guest_info * info) { + uchar_t * msr_bitmap = (uchar_t*)V3_VAddr(V3_AllocPages(2)); + struct v3_msr_map * msr_map = &(info->msr_map); + struct v3_msr_hook * hook = NULL; + + + memset(msr_bitmap, 0, PAGE_SIZE * 2); + + list_for_each_entry(hook, &(msr_map->hook_list), link) { + int index = get_bitmap_index(hook->msr); + uint_t byte_offset = index / 4; + uint_t bit_offset = (index % 4) * 2; + uchar_t val = 0; + uchar_t mask = ~0x3; + + if (hook->read) { + val |= 0x1; + } + + if (hook->write) { + val |= 0x2; + } + + val = val << bit_offset; + mask = mask << bit_offset; + + *(msr_bitmap + byte_offset) &= mask; + *(msr_bitmap + byte_offset) |= val; + } + + return (addr_t)V3_PAddr(msr_bitmap); +} + + + +int v3_handle_msr_write(struct guest_info * info) { + uint_t msr_num = info->vm_regs.rcx; + struct v3_msr msr_val; + struct v3_msr_hook * hook = NULL; + + hook = v3_get_msr_hook(info, msr_num); + + if (!hook) { + PrintError("Hook for MSR write %d not found\n", msr_num); + return -1; + } + + msr_val.value = 0; + msr_val.lo = info->vm_regs.rax; + msr_val.hi = info->vm_regs.rdx; + + if (hook->write(msr_num, msr_val, hook->priv_data) == -1) { + PrintError("Error in MSR hook Write\n"); + return -1; + } + + return 0; +} + + + +int v3_handle_msr_read(struct guest_info * info) { + uint_t msr_num = info->vm_regs.rcx; + struct v3_msr msr_val; + struct v3_msr_hook * hook = NULL; + + hook = v3_get_msr_hook(info, msr_num); + + if (!hook) { + PrintError("Hook for MSR read %d not found\n", msr_num); + return -1; + } + + msr_val.value = 0; + + if (hook->read(msr_num, &msr_val, hook->priv_data) == -1) { + PrintError("Error in MSR hook Read\n"); + return -1; + } + + info->vm_regs.rax = msr_val.lo; + info->vm_regs.rdx = msr_val.hi; + + return 0; +} diff --git a/palacios/src/palacios/vm_guest.c b/palacios/src/palacios/vm_guest.c index e47c1fb..c7512b7 100644 --- a/palacios/src/palacios/vm_guest.c +++ b/palacios/src/palacios/vm_guest.c @@ -51,6 +51,8 @@ v3_vm_cpu_mode_t v3_get_cpu_mode(struct guest_info * info) { } else if ((efer->lma == 1) && (cs->long_mode == 1)) { return LONG; } else { + return -1; + // What about LONG_16_COMPAT??? return LONG_32_COMPAT; } } diff --git a/palacios/src/palacios/vmm_config.c b/palacios/src/palacios/vmm_config.c index 5114521..3bbac88 100644 --- a/palacios/src/palacios/vmm_config.c +++ b/palacios/src/palacios/vmm_config.c @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -89,6 +90,7 @@ int v3_config_guest(struct guest_info * info, struct v3_vm_config * config_ptr) v3_init_vmm_io_map(info); + v3_init_msr_map(info); v3_init_interrupt_state(info); v3_init_dev_mgr(info); diff --git a/palacios/src/palacios/vmm_ctrl_regs.c b/palacios/src/palacios/vmm_ctrl_regs.c index e2ab2bc..4476b8b 100644 --- a/palacios/src/palacios/vmm_ctrl_regs.c +++ b/palacios/src/palacios/vmm_ctrl_regs.c @@ -96,6 +96,8 @@ int v3_handle_cr0_write(struct guest_info * info) { if (info->cpu_mode == LONG) { // 64 bit registers + PrintError("Long mode currently not handled\n"); + return -1; } else { // 32 bit registers struct cr0_32 *real_cr0 = (struct cr0_32*)&(info->ctrl_regs.cr0); @@ -270,11 +272,9 @@ int v3_handle_cr3_write(struct guest_info * info) { } else if (cached == 0) { addr_t shadow_pt; - if( info->mem_mode == VIRTUAL_MEM ) - { - PrintDebug("New CR3 is different - flushing shadow page table %p\n", shadow_cr3 ); - - delete_page_tables_pde32((pde32_t *)CR3_TO_PDE32(*(uint_t*)shadow_cr3)); + if(info->mem_mode == VIRTUAL_MEM) { + PrintDebug("New CR3 is different - flushing shadow page table %p\n", shadow_cr3 ); + delete_page_tables_pde32((pde32_t *)CR3_TO_PDE32(*(uint_t*)shadow_cr3)); } shadow_pt = v3_create_new_shadow_pt32(); @@ -361,3 +361,78 @@ int v3_handle_cr3_read(struct guest_info * info) { return 0; } + +int v3_handle_cr4_read(struct guest_info * info) { + PrintError("CR4 Read not handled\n"); + return -1; +} + +int v3_handle_cr4_write(struct guest_info * info) { + uchar_t instr[15]; + int ret; + struct x86_instr dec_instr; + + if (info->mem_mode == PHYSICAL_MEM) { + ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr); + } else { + ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 15, instr); + } + + if (v3_decode(info, (addr_t)instr, &dec_instr) == -1) { + PrintError("Could not decode instruction\n"); + return -1; + } + + if (v3_opcode_cmp(V3_OPCODE_MOV2CR, (const uchar_t *)(dec_instr.opcode)) != 0) { + PrintError("Invalid opcode in write to CR4\n"); + return -1; + } + + if ((info->cpu_mode == PROTECTED) || (info->cpu_mode == PROTECTED_PAE)) { + struct cr4_32 * new_cr4 = (struct cr4_32 *)(dec_instr.src_operand.operand); + struct cr4_32 * old_cr4 = (struct cr4_32 *)&(info->ctrl_regs.cr4); + + PrintDebug("OperandVal = %x, length = %d\n", *(uint_t *)new_cr4, dec_instr.src_operand.size); + PrintDebug("Old CR4=%x\n", *(uint_t *)old_cr4); + + + + + if ((info->shdw_pg_mode == SHADOW_PAGING) && + (v3_get_mem_mode(info) == PHYSICAL_MEM)) { + + if ((old_cr4->pae == 0) && (new_cr4->pae == 1)) { + // Create Passthrough PAE pagetables + PrintDebug("Creating PAE passthrough tables\n"); + info->ctrl_regs.cr3 = (addr_t)V3_PAddr(create_passthrough_pts_PAE32(info)); + } else if ((old_cr4->pae == 1) && (new_cr4->pae == 0)) { + // Create passthrough standard 32bit pagetables + return -1; + } + } + + *old_cr4 = *new_cr4; + PrintDebug("New CR4=%x\n", *(uint_t *)old_cr4); + + } else { + return -1; + } + + info->rip += dec_instr.instr_length; + return 0; +} + + +int v3_handle_efer_read(uint_t msr, struct v3_msr * dst, void * priv_data) { + PrintError("EFER Read not handled\n"); + return -1; +} + + +int v3_handle_efer_write(uint_t msr, struct v3_msr src, void * priv_data) { + // struct guest_info * info = (struct guest_info *)(priv_data); + PrintError("EFER Write not handled (rax=%p, rdx=%p)\n", + (void *)(addr_t)(src.lo), + (void *)(addr_t)(src.hi)); + return -1; +} diff --git a/palacios/src/palacios/vmm_msr.c b/palacios/src/palacios/vmm_msr.c new file mode 100644 index 0000000..84a1499 --- /dev/null +++ b/palacios/src/palacios/vmm_msr.c @@ -0,0 +1,87 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + + +#include +#include +#include + + +void v3_init_msr_map(struct guest_info * info) { + struct v3_msr_map * msr_map = &(info->msr_map); + + INIT_LIST_HEAD(&(msr_map->hook_list)); + msr_map->num_hooks = 0; +} + + +int v3_hook_msr(struct guest_info * info, uint_t msr, + int (*read)(uint_t msr, struct v3_msr * dst, void * priv_data), + int (*write)(uint_t msr, struct v3_msr src, void * priv_data), + void * priv_data) { + + struct v3_msr_map * msr_map = &(info->msr_map); + struct v3_msr_hook * hook = NULL; + + hook = (struct v3_msr_hook *)V3_Malloc(sizeof(struct v3_msr_hook)); + if (hook == NULL) { + PrintError("Could not allocate msr hook for MSR %d\n", msr); + return -1; + } + + hook->read = read; + hook->write = write; + hook->msr = msr; + hook->priv_data = priv_data; + + list_add(&(hook->link), &(msr_map->hook_list)); + + return 0; +} + + +int v3_unhook_msr(struct guest_info * info, uint_t msr) { + return -1; +} + + + +struct v3_msr_hook * v3_get_msr_hook(struct guest_info * info, uint_t msr) { + struct v3_msr_map * msr_map = &(info->msr_map); + struct v3_msr_hook * hook = NULL; + + list_for_each_entry(hook, &(msr_map->hook_list), link) { + if (hook->msr == msr) { + return hook; + } + } + + return NULL; +} + + +void v3_print_msr_map(struct guest_info * info) { + struct v3_msr_map * msr_map = &(info->msr_map); + struct v3_msr_hook * hook = NULL; + + list_for_each_entry(hook, &(msr_map->hook_list), link) { + PrintDebug("MSR HOOK (MSR=%d) (read=0x%p) (write=0x%p)\n", + hook->msr, hook->read, hook->write); + } +} diff --git a/palacios/src/palacios/vmm_paging.c b/palacios/src/palacios/vmm_paging.c index dff74dd..cdb14f7 100644 --- a/palacios/src/palacios/vmm_paging.c +++ b/palacios/src/palacios/vmm_paging.c @@ -262,6 +262,147 @@ pde32_t * create_passthrough_pts_32(struct guest_info * guest_info) { } +/* We generate a page table to correspond to a given memory layout + * pulling pages from the mem_list when necessary + * If there are any gaps in the layout, we add them as unmapped pages + */ +pdpe32pae_t * create_passthrough_pts_PAE32(struct guest_info * guest_info) { + addr_t current_page_addr = 0; + int i, j, k; + struct shadow_map * map = &(guest_info->mem_map); + + pdpe32pae_t * pdpe = V3_VAddr(V3_AllocPages(1)); + memset(pdpe, 0, PAGE_SIZE); + + for (i = 0; i < MAX_PDPE32PAE_ENTRIES; i++) { + int pde_present = 0; + pde32pae_t * pde = V3_VAddr(V3_AllocPages(1)); + + for (j = 0; j < MAX_PDE32PAE_ENTRIES; j++) { + + + int pte_present = 0; + pte32pae_t * pte = V3_VAddr(V3_AllocPages(1)); + + + for (k = 0; k < MAX_PTE32PAE_ENTRIES; k++) { + struct shadow_region * region = get_shadow_region_by_addr(map, current_page_addr); + + if (!region || + (region->host_type == HOST_REGION_HOOK) || + (region->host_type == HOST_REGION_UNALLOCATED) || + (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) || + (region->host_type == HOST_REGION_REMOTE) || + (region->host_type == HOST_REGION_SWAPPED)) { + pte[k].present = 0; + pte[k].writable = 0; + pte[k].user_page = 0; + pte[k].write_through = 0; + pte[k].cache_disable = 0; + pte[k].accessed = 0; + pte[k].dirty = 0; + pte[k].pte_attr = 0; + pte[k].global_page = 0; + pte[k].vmm_info = 0; + pte[k].page_base_addr = 0; + pte[k].rsvd = 0; + } else { + addr_t host_addr; + pte[k].present = 1; + pte[k].writable = 1; + pte[k].user_page = 1; + pte[k].write_through = 0; + pte[k].cache_disable = 0; + pte[k].accessed = 0; + pte[k].dirty = 0; + pte[k].pte_attr = 0; + pte[k].global_page = 0; + pte[k].vmm_info = 0; + + if (guest_pa_to_host_pa(guest_info, current_page_addr, &host_addr) == -1) { + // BIG ERROR + // PANIC + return NULL; + } + + pte[k].page_base_addr = host_addr >> 12; + pte[k].rsvd = 0; + + pte_present = 1; + } + + current_page_addr += PAGE_SIZE; + } + + if (pte_present == 0) { + V3_FreePage(V3_PAddr(pte)); + + pde[j].present = 0; + pde[j].writable = 0; + pde[j].user_page = 0; + pde[j].write_through = 0; + pde[j].cache_disable = 0; + pde[j].accessed = 0; + pde[j].avail = 0; + pde[j].large_page = 0; + pde[j].global_page = 0; + pde[j].vmm_info = 0; + pde[j].pt_base_addr = 0; + pde[j].rsvd = 0; + } else { + pde[j].present = 1; + pde[j].writable = 1; + pde[j].user_page = 1; + pde[j].write_through = 0; + pde[j].cache_disable = 0; + pde[j].accessed = 0; + pde[j].avail = 0; + pde[j].large_page = 0; + pde[j].global_page = 0; + pde[j].vmm_info = 0; + pde[j].pt_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pte)); + pde[j].rsvd = 0; + + pde_present = 1; + } + + } + + if (pde_present == 0) { + V3_FreePage(V3_PAddr(pde)); + + pdpe[i].present = 0; + pdpe[i].rsvd = 0; + pdpe[i].write_through = 0; + pdpe[i].cache_disable = 0; + pdpe[i].accessed = 0; + pdpe[i].avail = 0; + pdpe[i].rsvd2 = 0; + pdpe[i].vmm_info = 0; + pdpe[i].pd_base_addr = 0; + pdpe[i].rsvd3 = 0; + } else { + pdpe[i].present = 1; + pdpe[i].rsvd = 0; + pdpe[i].write_through = 0; + pdpe[i].cache_disable = 0; + pdpe[i].accessed = 0; + pdpe[i].avail = 0; + pdpe[i].rsvd2 = 0; + pdpe[i].vmm_info = 0; + pdpe[i].pd_base_addr = PAGE_ALIGNED_ADDR((addr_t)V3_PAddr(pde)); + pdpe[i].rsvd3 = 0; + } + + } + + + return pdpe; +} + + + + pml4e64_t * create_passthrough_pts_64(struct guest_info * info) { @@ -476,6 +617,10 @@ void PrintPTE32(addr_t virtual_address, pte32_t * pte) } + + + + void PrintPDE64(addr_t virtual_address, pde64_t * pde) { PrintDebug("PDE64 %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, reserved=%x, largePages=%x, globalPage=%x, kernelInfo=%x\n", @@ -544,6 +689,8 @@ void PrintPT32(addr_t starting_address, pte32_t * pte) + + void PrintDebugPageTables(pde32_t * pde) { int i; @@ -558,3 +705,102 @@ void PrintDebugPageTables(pde32_t * pde) } } + + + + + + + +void PrintPDPE32PAE(addr_t virtual_address, pdpe32pae_t * pdpe) +{ + PrintDebug("PDPE %p -> %p : present=%x, wt=%x, cd=%x, accessed=%x, kernelInfo=%x\n", + (void *)virtual_address, + (void *)(addr_t) (pdpe->pd_base_addr << PAGE_POWER), + pdpe->present, + pdpe->write_through, + pdpe->cache_disable, + pdpe->accessed, + pdpe->vmm_info); +} + +void PrintPDE32PAE(addr_t virtual_address, pde32pae_t * pde) +{ + PrintDebug("PDE %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, largePages=%x, globalPage=%x, kernelInfo=%x\n", + (void *)virtual_address, + (void *)(addr_t) (pde->pt_base_addr << PAGE_POWER), + pde->present, + pde->writable, + pde->user_page, + pde->write_through, + pde->cache_disable, + pde->accessed, + pde->large_page, + pde->global_page, + pde->vmm_info); +} + + +void PrintPTE32PAE(addr_t virtual_address, pte32pae_t * pte) +{ + PrintDebug("PTE %p -> %p : present=%x, writable=%x, user=%x, wt=%x, cd=%x, accessed=%x, dirty=%x, pteAttribute=%x, globalPage=%x, vmm_info=%x\n", + (void *)virtual_address, + (void*)(addr_t)(pte->page_base_addr << PAGE_POWER), + pte->present, + pte->writable, + pte->user_page, + pte->write_through, + pte->cache_disable, + pte->accessed, + pte->dirty, + pte->pte_attr, + pte->global_page, + pte->vmm_info); +} + + + + + + +void PrintDebugPageTables32PAE(pdpe32pae_t * pdpe) +{ + int i, j, k; + pde32pae_t * pde; + pte32pae_t * pte; + addr_t virtual_addr = 0; + + PrintDebug("Dumping the pages starting with the pde page at %p\n", pdpe); + + for (i = 0; (i < MAX_PDPE32PAE_ENTRIES); i++) { + + if (pdpe[i].present) { + pde = (pde32pae_t *)V3_VAddr((void *)(addr_t)BASE_TO_PAGE_ADDR(pdpe[i].pd_base_addr)); + + PrintPDPE32PAE(virtual_addr, &(pdpe[i])); + + for (j = 0; j < MAX_PDE32PAE_ENTRIES; j++) { + + if (pde[j].present) { + pte = (pte32pae_t *)V3_VAddr((void *)(addr_t)BASE_TO_PAGE_ADDR(pde[j].pt_base_addr)); + + PrintPDE32PAE(virtual_addr, &(pde[j])); + + for (k = 0; k < MAX_PTE32PAE_ENTRIES; k++) { + if (pte[k].present) { + PrintPTE32PAE(virtual_addr, &(pte[k])); + } + + virtual_addr += PAGE_SIZE; + } + } else { + virtual_addr += PAGE_SIZE * MAX_PTE32PAE_ENTRIES; + } + } + } else { + virtual_addr += PAGE_SIZE * MAX_PDE32PAE_ENTRIES * MAX_PTE32PAE_ENTRIES; + } + } +} + +