From: Andy Gocke Date: Fri, 21 Aug 2009 20:25:41 +0000 (-0500) Subject: Modified boot and vmxassist to handle real/protected transition. X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=61597ea2c5ccace036d8a65e429e32b8f8a7ed4a Modified boot and vmxassist to handle real/protected transition. --- diff --git a/bios/vmxassist/Makefile b/bios/vmxassist/Makefile index 6959a28..ededccd 100644 --- a/bios/vmxassist/Makefile +++ b/bios/vmxassist/Makefile @@ -28,9 +28,9 @@ TEXTADDR=0x000D0000 DEFINES=-DDEBUG -DTEXTADDR=$(TEXTADDR) # Disable PIE/SSP if GCC supports them. They can break us. -CFLAGS += $(call test-gcc-flag,$(CC),-nopie) -CFLAGS += $(call test-gcc-flag,$(CC),-fno-stack-protector) -CFLAGS += $(call test-gcc-flag,$(CC),-fno-stack-protector-all) +CFLAGS += $(call cc-option,$(CC),-nopie,) +CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,) +CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,) CPP = cpp -P OBJCOPY = objcopy -p -O binary -R .note -R .comment -R .bss -S --gap-fill=0 diff --git a/bios/vmxassist/e820.h b/bios/vmxassist/e820.h index 8190c76..151313c 100644 --- a/bios/vmxassist/e820.h +++ b/bios/vmxassist/e820.h @@ -1,32 +1,31 @@ -#ifndef __XEN_PUBLIC_HVM_E820_H__ -#define __XEN_PUBLIC_HVM_E820_H__ +#ifndef __HVMLOADER_E820_H__ +#define __HVMLOADER_E820_H__ -/* PC BIOS standard E820 types. */ +/* E820 location in HVM virtual address space. */ +#define HVM_E820_PAGE 0x00090000 +#define HVM_E820_NR_OFFSET 0x000001E8 +#define HVM_E820_OFFSET 0x000002D0 + +#define HVM_BELOW_4G_RAM_END 0xF0000000 +#define HVM_BELOW_4G_MMIO_START HVM_BELOW_4G_RAM_END +#define HVM_BELOW_4G_MMIO_LENGTH ((1ULL << 32) - HVM_BELOW_4G_MMIO_START) + + +/* + * PC BIOS standard E820 types and structure. + */ #define E820_RAM 1 #define E820_RESERVED 2 #define E820_ACPI 3 #define E820_NVS 4 -/* Xen HVM extended E820 types. */ -#define E820_IO 16 -#define E820_SHARED_PAGE 17 -#define E820_XENSTORE 18 -#define E820_BUFFERED_IO 19 - -/* E820 location in HVM virtual address space. */ -#define E820_MAP_PAGE 0x00090000 -#define E820_MAP_NR_OFFSET 0x000001E8 -#define E820_MAP_OFFSET 0x000002D0 - struct e820entry { uint64_t addr; uint64_t size; uint32_t type; } __attribute__((packed)); -#define HVM_BELOW_4G_RAM_END 0xF0000000 - -#define HVM_BELOW_4G_MMIO_START HVM_BELOW_4G_RAM_END -#define HVM_BELOW_4G_MMIO_LENGTH ((1ULL << 32) - HVM_BELOW_4G_MMIO_START) +#define HVM_E820_NR ((unsigned char *)HVM_E820_PAGE + HVM_E820_NR_OFFSET) +#define HVM_E820 ((struct e820entry *)(HVM_E820_PAGE + HVM_E820_OFFSET)) -#endif /* __XEN_PUBLIC_HVM_E820_H__ */ +#endif /* __HVMLOADER_E820_H__ */ diff --git a/bios/vmxassist/head.S b/bios/vmxassist/head.S index b183fac..3af285e 100644 --- a/bios/vmxassist/head.S +++ b/bios/vmxassist/head.S @@ -25,81 +25,13 @@ * switch happens to the environment below. The magic indicates * that this is a valid context. */ -#ifdef TEST - .byte 0x55, 0xaa - .byte 0x80 - .code16 - jmp _start16 -#else jmp _start -#endif .align 8 .long VMXASSIST_MAGIC .long newctx /* new context */ .long oldctx /* old context */ -#ifdef TEST -/* - * We are running in 16-bit. Get into the protected mode as soon as - * possible. We use our own (minimal) GDT to get started. - * - * ROM is a misnomer as this code isn't really rommable (although it - * only requires a few changes) but it does live in a BIOS ROM segment. - * This code allows me to debug vmxassists under (a modified version of) - * Bochs and load it as a "optromimage1". - */ - .code16 - .globl _start16 -_start16: - cli - - /* load our own global descriptor table */ - data32 addr32 lgdt %cs:(rom_gdtr - TEXTADDR) - - /* go to protected mode */ - movl %cr0, %eax - orl $CR0_PE, %eax - movl %eax, %cr0 - data32 ljmp $0x08, $1f - - .align 32 - .globl rom_gdt -rom_gdt: - .word 0, 0 /* 0x00: reserved */ - .byte 0, 0, 0, 0 - - .word 0xFFFF, 0 /* 0x08: CS 32-bit */ - .byte 0, 0x9A, 0xCF, 0 - - .word 0xFFFF, 0 /* 0x10: CS 32-bit */ - .byte 0, 0x92, 0xCF, 0 -rom_gdt_end: - - .align 4 - .globl rom_gdtr -rom_gdtr: - .word rom_gdt_end - rom_gdt - 1 - .long rom_gdt - - .code32 -1: - /* welcome to the 32-bit world */ - movw $0x10, %ax - movw %ax, %ds - movw %ax, %es - movw %ax, %ss - movw %ax, %fs - movw %ax, %gs - - /* enable Bochs debug facilities */ - movw $0x8A00, %dx - movw $0x8A00, %ax - outw %ax, (%dx) - - jmp _start -#endif /* TEST */ - /* * This is the real start. Control was transfered to this point * with CR0_PE set and executing in some 32-bit segment. We call @@ -111,9 +43,6 @@ _start: cli /* save register parameters to C land */ -#ifdef TEST - xorl %edx, %edx -#endif /* clear bss */ cld @@ -130,7 +59,7 @@ _start: clts /* setup my own stack */ - movl $stack_top - 4*4, %esp + movl $stack_top, %esp movl %esp, %ebp /* go ... */ @@ -145,11 +74,6 @@ _start: halt: push $halt_msg call printf -#ifdef TEST - movw $0x8A00, %dx - movw $0x8AE0, %ax - outw %ax, (%dx) -#endif cli jmp . diff --git a/bios/vmxassist/machine.h b/bios/vmxassist/machine.h index 0ea2adf..f91646f 100644 --- a/bios/vmxassist/machine.h +++ b/bios/vmxassist/machine.h @@ -38,10 +38,15 @@ #define CR4_PSE (1 << 4) #define CR4_PAE (1 << 5) +#define EFLAGS_CF (1 << 0) +#define EFLAGS_PF (1 << 2) +#define EFLAGS_AF (1 << 4) #define EFLAGS_ZF (1 << 6) +#define EFLAGS_SF (1 << 7) #define EFLAGS_TF (1 << 8) #define EFLAGS_IF (1 << 9) #define EFLAGS_DF (1 << 10) +#define EFLAGS_OF (1 << 11) #define EFLAGS_IOPL (3 << 12) #define EFLAGS_VM ((1 << 17) | EFLAGS_IOPL) #define EFLAGS_VIF (1 << 19) @@ -56,13 +61,6 @@ #define LPGSIZE (1 << LOG_PDSIZE) /* large page size */ #define LPGMASK (~(LPGSIZE - 1)) /* large page mask */ -#ifdef TEST -#define PTE_P (1 << 0) /* Present */ -#define PTE_RW (1 << 1) /* Read/Write */ -#define PTE_US (1 << 2) /* User/Supervisor */ -#define PTE_PS (1 << 7) /* Page Size */ -#endif - /* Programmable Interrupt Contoller (PIC) defines */ #define PIC_MASTER 0x20 #define PIC_SLAVE 0xA0 @@ -115,7 +113,7 @@ struct tss { #ifdef ENABLE_VME unsigned long int_redir[8]; #endif - unsigned char iomap[8192]; + unsigned char iomap[8193]; }; static inline void @@ -195,14 +193,6 @@ set_cr4(unsigned value) __asm__ __volatile__("movl %0, %%cr4" : /* no outputs */ : "r"(value)); } -#ifdef TEST -static inline void -breakpoint(void) -{ - outw(0x8A00, 0x8AE0); -} -#endif /* TEST */ - #endif /* __ASSEMBLY__ */ #endif /* __MACHINE_H__ */ diff --git a/bios/vmxassist/setup.c b/bios/vmxassist/setup.c index c453ecd..1e2e86c 100644 --- a/bios/vmxassist/setup.c +++ b/bios/vmxassist/setup.c @@ -47,29 +47,13 @@ unsigned long long idt[NR_TRAPS] __attribute__ ((aligned(32))); struct dtr idtr = { sizeof(idt)-1, (unsigned long) &idt }; -#ifdef TEST -unsigned pgd[NR_PGD] __attribute__ ((aligned(PGSIZE))) = { 0 }; - -struct e820entry e820map[] = { - { 0x0000000000000000ULL, 0x000000000009F800ULL, E820_RAM }, - { 0x000000000009F800ULL, 0x0000000000000800ULL, E820_RESERVED }, - { 0x00000000000A0000ULL, 0x0000000000020000ULL, E820_IO }, - { 0x00000000000C0000ULL, 0x0000000000040000ULL, E820_RESERVED }, - { 0x0000000000100000ULL, 0x0000000000000000ULL, E820_RAM }, - { 0x0000000000000000ULL, 0x0000000000001000ULL, E820_SHARED_PAGE }, - { 0x0000000000000000ULL, 0x0000000000003000ULL, E820_NVS }, - { 0x0000000000003000ULL, 0x000000000000A000ULL, E820_ACPI }, - { 0x00000000FEC00000ULL, 0x0000000001400000ULL, E820_IO }, -}; -#endif /* TEST */ - struct vmx_assist_context oldctx; struct vmx_assist_context newctx; unsigned long memory_size; int initialize_real_mode; -extern char stack[], stack_top[]; +extern char stack_top[]; extern unsigned trap_handlers[]; void @@ -87,39 +71,12 @@ banner(void) (((get_cmos(0x31) << 8) | get_cmos(0x30)) + 0x400) << 10; memory_size += 0x400 << 10; /* + 1MB */ -#ifdef TEST - /* Create an SMAP for our debug environment */ - e820map[4].size = memory_size - e820map[4].addr - PGSIZE; - e820map[5].addr = memory_size - PGSIZE; - e820map[6].addr = memory_size; - e820map[7].addr += memory_size; - - *E820_MAP_NR = sizeof(e820map)/sizeof(e820map[0]); - memcpy(E820_MAP, e820map, sizeof(e820map)); -#endif - printf("Memory size %ld MB\n", memory_size >> 20); printf("E820 map:\n"); - print_e820_map(E820_MAP, *E820_MAP_NR); + print_e820_map(HVM_E820, *HVM_E820_NR); printf("\n"); } -#ifdef TEST -void -setup_paging(void) -{ - unsigned long i; - - if (((unsigned)pgd & ~PGMASK) != 0) - panic("PGD not page aligned"); - set_cr4(get_cr4() | CR4_PSE); - for (i = 0; i < NR_PGD; i++) - pgd[i] = (i * LPGSIZE)| PTE_PS | PTE_US | PTE_RW | PTE_P; - set_cr3((unsigned) pgd); - set_cr0(get_cr0() | (CR0_PE|CR0_PG)); -} -#endif /* TEST */ - void setup_gdt(void) { @@ -128,8 +85,9 @@ setup_gdt(void) /* setup task state segment */ memset(&tss, 0, sizeof(tss)); tss.ss0 = DATA_SELECTOR; - tss.esp0 = (unsigned) stack_top - 4*4; + tss.esp0 = (unsigned) stack_top; tss.iomap_base = offsetof(struct tss, iomap); + tss.iomap[sizeof(tss.iomap)-1] = 0xff; /* initialize gdt's tss selector */ gdt[TSS_SELECTOR / sizeof(gdt[0])] |= @@ -204,7 +162,7 @@ void enter_real_mode(struct regs *regs) { /* mask off TSS busy bit */ - gdt[TSS_SELECTOR / sizeof(gdt[0])] &= ~0x0000020000000000ULL; + gdt[TSS_SELECTOR / sizeof(gdt[0])] &= ~0x0000020000000000ULL; /* start 8086 emulation of BIOS */ if (initialize_real_mode) { @@ -213,17 +171,15 @@ enter_real_mode(struct regs *regs) regs->ves = regs->vds = regs->vfs = regs->vgs = 0xF000; if (booting_cpu == 0) { regs->cs = 0xF000; /* ROM BIOS POST entry point */ -#ifdef TEST - regs->eip = 0xFFE0; -#else regs->eip = 0xFFF0; -#endif } else { regs->cs = booting_vector << 8; /* AP entry point */ regs->eip = 0; } - regs->uesp = 0; - regs->uss = 0; + + regs->uesp = regs->uss = 0; + regs->eax = regs->ecx = regs->edx = regs->ebx = 0; + regs->esp = regs->ebp = regs->esi = regs->edi = 0; /* intercept accesses to the PIC */ setiomap(PIC_MASTER+PIC_CMD); @@ -239,14 +195,13 @@ enter_real_mode(struct regs *regs) /* this should get us into 16-bit mode */ return; - } else { - /* go from protected to real mode */ - regs->eflags |= EFLAGS_VM; - - set_mode(regs, VM86_PROTECTED_TO_REAL); - - emulate(regs); } + + /* go from protected to real mode */ + set_mode(regs, VM86_PROTECTED_TO_REAL); + emulate(regs); + if (mode != VM86_REAL) + panic("failed to emulate between clear PE and long jump.\n"); } /* @@ -261,7 +216,7 @@ setup_ctx(void) memset(c, 0, sizeof(*c)); c->eip = (unsigned long) switch_to_real_mode; - c->esp = (unsigned) stack_top - 4*4; + c->esp = (unsigned) stack_top; c->eflags = 0x2; /* no interrupts, please */ /* @@ -271,13 +226,8 @@ setup_ctx(void) * more natural to enable CR0.PE to cause a world switch to * protected mode rather than disabling it. */ -#ifdef TEST - c->cr0 = (get_cr0() | CR0_NE | CR0_PG) & ~CR0_PE; - c->cr3 = (unsigned long) pgd; -#else c->cr0 = (get_cr0() | CR0_NE) & ~CR0_PE; c->cr3 = 0; -#endif c->cr4 = get_cr4(); c->idtr_limit = sizeof(idt)-1; @@ -368,21 +318,13 @@ start_bios(void) int main(void) { - printf("Hello from VMXAssist\n"); - if (booting_cpu == 0) banner(); -#ifdef TEST - setup_paging(); -#endif - setup_gdt(); setup_idt(); -#ifndef TEST set_cr4(get_cr4() | CR4_VME); -#endif setup_ctx(); diff --git a/bios/vmxassist/trap.S b/bios/vmxassist/trap.S index 468da0a..d5ece3e 100644 --- a/bios/vmxassist/trap.S +++ b/bios/vmxassist/trap.S @@ -100,13 +100,9 @@ trap_handlers: .code32 .align 16 common_trap: /* common trap handler */ - pushl %gs - pushl %fs - pushl %ds - pushl %es pushal - movl $DATA_SELECTOR, %eax /* make sure these are sane */ + movl $(DATA_SELECTOR), %eax /* make sure these are sane */ movl %eax, %ds movl %eax, %es movl %eax, %fs @@ -114,17 +110,13 @@ common_trap: /* common trap handler */ movl %esp, %ebp pushl %ebp - pushl 52(%ebp) - pushl 48(%ebp) + pushl 36(%ebp) + pushl 32(%ebp) call trap /* trap(trapno, errno, regs) */ addl $12, %esp trap_return: popal - popl %es - popl %ds - popl %fs - popl %gs addl $8, %esp /* skip trapno, errno */ iret /* NOT REACHED */ @@ -152,10 +144,6 @@ switch_to_real_mode: pushl oldctx+VMX_ASSIST_CTX_EIP pushl $-1 /* trapno, errno */ pushl $-1 - pushl %gs - pushl %fs - pushl %ds - pushl %es pushal movl %esp, %ebp diff --git a/bios/vmxassist/util.c b/bios/vmxassist/util.c index 0181fe7..c7d7170 100644 --- a/bios/vmxassist/util.c +++ b/bios/vmxassist/util.c @@ -27,8 +27,33 @@ static void putchar(int); static char *printnum(char *, unsigned long, int); -static void _doprint(void (*)(int), char const *, va_list); +static void _doprint(void (*)(int), const char *, va_list); +void +cpuid_addr_value(uint64_t addr, uint64_t *value) +{ + uint32_t addr_low = (uint32_t)addr; + uint32_t addr_high = (uint32_t)(addr >> 32); + uint32_t value_low, value_high; + static unsigned int addr_leaf; + + if (!addr_leaf) { + unsigned int eax, ebx, ecx, edx; + __asm__ __volatile__( + "cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "0" (0x40000000)); + addr_leaf = eax + 1; + } + + __asm__ __volatile__( + "cpuid" + : "=c" (value_low), "=d" (value_high) + : "a" (addr_leaf), "0" (addr_low), "1" (addr_high) + : "ebx"); + + *value = (uint64_t)value_high << 32 | value_low; +} void dump_regs(struct regs *regs) @@ -37,16 +62,15 @@ dump_regs(struct regs *regs) regs->eax, regs->ecx, regs->edx, regs->ebx); printf("esp %8x ebp %8x esi %8x edi %8x\n", regs->esp, regs->ebp, regs->esi, regs->edi); - printf("eip %8x eflags %8x cs %8x ds %8x\n", - regs->eip, regs->eflags, regs->cs, regs->ds); - printf("es %8x fs %8x uss %8x uesp %8x\n", - regs->es, regs->fs, regs->uss, regs->uesp); + printf("trapno %8x errno %8x\n", regs->trapno, regs->errno); + printf("eip %8x cs %8x eflags %8x\n", + regs->eip, regs->cs, regs->eflags); + printf("uesp %8x uss %8x\n", + regs->uesp, regs->uss); printf("ves %8x vds %8x vfs %8x vgs %8x\n", regs->ves, regs->vds, regs->vfs, regs->vgs); - if (regs->trapno != -1 || regs->errno != -1) - printf("trapno %8x errno %8x\n", regs->trapno, regs->errno); - printf("cr0 %8lx cr2 %8x cr3 %8lx cr4 %8lx\n", + printf("cr0 %8lx cr2 %8x cr3 %8lx cr4 %8lx\n\n", (long)oldctx.cr0, get_cr2(), (long)oldctx.cr3, (long)oldctx.cr4); } @@ -297,7 +321,7 @@ putchar(int ch) * but still powerful enough for most tasks. */ static void -_doprint(void (*put)(int), char const *fmt, va_list ap) +_doprint(void (*put)(int), const char *fmt, va_list ap) { register char *str, c; int lflag, zflag, nflag; diff --git a/bios/vmxassist/util.h b/bios/vmxassist/util.h index 9c2982f..1fd52ed 100644 --- a/bios/vmxassist/util.h +++ b/bios/vmxassist/util.h @@ -23,14 +23,13 @@ #include #include -#include -#define E820_MAP_NR ((unsigned char *)E820_MAP_PAGE + E820_MAP_NR_OFFSET) -#define E820_MAP ((struct e820entry *)(E820_MAP_PAGE + E820_MAP_OFFSET)) - #define offsetof(type, member) ((unsigned) &((type *)0)->member) struct vmx_assist_context; +#include "e820.h" + +extern void cpuid_addr_value(uint64_t addr, uint64_t *value); extern void hexdump(unsigned char *, int); extern void dump_regs(struct regs *); extern void dump_vmx_context(struct vmx_assist_context *); diff --git a/bios/vmxassist/vm86.c b/bios/vmxassist/vm86.c index 8c620a4..55b6905 100644 --- a/bios/vmxassist/vm86.c +++ b/bios/vmxassist/vm86.c @@ -1,6 +1,6 @@ /* * vm86.c: A vm86 emulator. The main purpose of this emulator is to do as - * little work as possible. + * little work as possible. * * Leendert van Doorn, leendert@watson.ibm.com * Copyright (c) 2005-2006, International Business Machines Corporation. @@ -33,6 +33,7 @@ #define SEG_SS 0x0020 #define SEG_FS 0x0040 #define SEG_GS 0x0080 +#define REP 0x0100 static unsigned prev_eip = 0; enum vm86_mode mode = 0; @@ -52,12 +53,12 @@ char *states[] = { static char *rnames[] = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" }; #endif /* DEBUG */ -#define PDE_PS (1 << 7) -#define PT_ENTRY_PRESENT 0x1 +#define PDE_PS (1 << 7) +#define PT_ENTRY_PRESENT 0x1 /* We only support access to <=4G physical memory due to 1:1 mapping */ -static unsigned -guest_linear_to_real(uint32_t base) +static uint64_t +guest_linear_to_phys(uint32_t base) { uint32_t gcr3 = oldctx.cr3; uint64_t l2_mfn; @@ -89,23 +90,32 @@ guest_linear_to_real(uint32_t base) l2_mfn = ((uint64_t *)(long)gcr3)[(base >> 30) & 0x3]; if (!(l2_mfn & PT_ENTRY_PRESENT)) panic("l3 entry not present\n"); - l2_mfn &= 0x3fffff000ULL; + l2_mfn &= 0xffffff000ULL; - l1_mfn = ((uint64_t *)(long)l2_mfn)[(base >> 21) & 0x1ff]; + if (l2_mfn & 0xf00000000ULL) { + printf("l2 page above 4G\n"); + cpuid_addr_value(l2_mfn + 8 * ((base >> 21) & 0x1ff), &l1_mfn); + } else + l1_mfn = ((uint64_t *)(long)l2_mfn)[(base >> 21) & 0x1ff]; if (!(l1_mfn & PT_ENTRY_PRESENT)) panic("l2 entry not present\n"); if (l1_mfn & PDE_PS) { /* CR4.PSE is ignored in PAE mode */ - l0_mfn = l1_mfn & 0x3ffe00000ULL; + l0_mfn = l1_mfn & 0xfffe00000ULL; return l0_mfn + (base & 0x1fffff); } - l1_mfn &= 0x3fffff000ULL; + l1_mfn &= 0xffffff000ULL; - l0_mfn = ((uint64_t *)(long)l1_mfn)[(base >> 12) & 0x1ff]; + if (l1_mfn & 0xf00000000ULL) { + printf("l1 page above 4G\n"); + cpuid_addr_value(l1_mfn + 8 * ((base >> 12) & 0x1ff), &l0_mfn); + } else + l0_mfn = ((uint64_t *)(long)l1_mfn)[(base >> 12) & 0x1ff]; if (!(l0_mfn & PT_ENTRY_PRESENT)) panic("l1 entry not present\n"); - l0_mfn &= 0x3fffff000ULL; + + l0_mfn &= 0xffffff000ULL; return l0_mfn + (base & 0xfff); } @@ -114,6 +124,7 @@ guest_linear_to_real(uint32_t base) static unsigned address(struct regs *regs, unsigned seg, unsigned off) { + uint64_t gdt_phys_base; unsigned long long entry; unsigned seg_base, seg_limit; unsigned entry_low, entry_high; @@ -126,11 +137,16 @@ address(struct regs *regs, unsigned seg, unsigned off) } if (mode == VM86_REAL || seg > oldctx.gdtr_limit || - (mode == VM86_REAL_TO_PROTECTED && regs->cs == seg)) + (mode == VM86_REAL_TO_PROTECTED && regs->cs == seg)) return ((seg & 0xFFFF) << 4) + off; - entry = ((unsigned long long *) - guest_linear_to_real(oldctx.gdtr_base))[seg >> 3]; + gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base); + if (gdt_phys_base != (uint32_t)gdt_phys_base) { + printf("gdt base address above 4G\n"); + cpuid_addr_value(gdt_phys_base + 8 * (seg >> 3), &entry); + } else + entry = ((unsigned long long *)(long)gdt_phys_base)[seg >> 3]; + entry_high = entry >> 32; entry_low = entry & 0xFFFFFFFF; @@ -138,13 +154,13 @@ address(struct regs *regs, unsigned seg, unsigned off) seg_limit = (entry_high & 0xF0000) | (entry_low & 0xFFFF); if (entry_high & 0x8000 && - ((entry_high & 0x800000 && off >> 12 <= seg_limit) || - (!(entry_high & 0x800000) && off <= seg_limit))) + ((entry_high & 0x800000 && off >> 12 <= seg_limit) || + (!(entry_high & 0x800000) && off <= seg_limit))) return seg_base + off; panic("should never reach here in function address():\n\t" - "entry=0x%08x%08x, mode=%d, seg=0x%08x, offset=0x%08x\n", - entry_high, entry_low, mode, seg, off); + "entry=0x%08x%08x, mode=%d, seg=0x%08x, offset=0x%08x\n", + entry_high, entry_low, mode, seg, off); return 0; } @@ -157,7 +173,7 @@ trace(struct regs *regs, int adjust, char *fmt, ...) va_list ap; if ((traceset & (1 << mode)) && - (mode == VM86_REAL_TO_PROTECTED || mode == VM86_REAL)) { + (mode == VM86_REAL_TO_PROTECTED || mode == VM86_REAL)) { /* 16-bit, seg:off addressing */ unsigned addr = address(regs, regs->cs, off); printf("0x%08x: 0x%x:0x%04x ", addr, regs->cs, off); @@ -168,7 +184,7 @@ trace(struct regs *regs, int adjust, char *fmt, ...) printf("\n"); } if ((traceset & (1 << mode)) && - (mode == VM86_PROTECTED_TO_REAL || mode == VM86_PROTECTED)) { + (mode == VM86_PROTECTED_TO_REAL || mode == VM86_PROTECTED)) { /* 16-bit, gdt addressing */ unsigned addr = address(regs, regs->cs, off); printf("0x%08x: 0x%x:0x%08x ", addr, regs->cs, off); @@ -282,7 +298,7 @@ getreg32(struct regs *regs, int r) case 1: return regs->ecx; case 2: return regs->edx; case 3: return regs->ebx; - case 4: return regs->esp; + case 4: return regs->uesp; case 5: return regs->ebp; case 6: return regs->esi; case 7: return regs->edi; @@ -304,10 +320,10 @@ getreg8(struct regs *regs, int r) case 1: return regs->ecx & 0xFF; /* cl */ case 2: return regs->edx & 0xFF; /* dl */ case 3: return regs->ebx & 0xFF; /* bl */ - case 4: return (regs->esp >> 8) & 0xFF; /* ah */ - case 5: return (regs->ebp >> 8) & 0xFF; /* ch */ - case 6: return (regs->esi >> 8) & 0xFF; /* dh */ - case 7: return (regs->edi >> 8) & 0xFF; /* bh */ + case 4: return (regs->eax >> 8) & 0xFF; /* ah */ + case 5: return (regs->ecx >> 8) & 0xFF; /* ch */ + case 6: return (regs->edx >> 8) & 0xFF; /* dh */ + case 7: return (regs->ebx >> 8) & 0xFF; /* bh */ } return ~0; } @@ -320,7 +336,7 @@ setreg32(struct regs *regs, int r, unsigned v) case 1: regs->ecx = v; break; case 2: regs->edx = v; break; case 3: regs->ebx = v; break; - case 4: regs->esp = v; break; + case 4: regs->uesp = v; break; case 5: regs->ebp = v; break; case 6: regs->esi = v; break; case 7: regs->edi = v; break; @@ -342,10 +358,10 @@ setreg8(struct regs *regs, int r, unsigned v) case 1: regs->ecx = (regs->ecx & ~0xFF) | v; break; case 2: regs->edx = (regs->edx & ~0xFF) | v; break; case 3: regs->ebx = (regs->ebx & ~0xFF) | v; break; - case 4: regs->esp = (regs->esp & ~0xFF00) | (v << 8); break; - case 5: regs->ebp = (regs->ebp & ~0xFF00) | (v << 8); break; - case 6: regs->esi = (regs->esi & ~0xFF00) | (v << 8); break; - case 7: regs->edi = (regs->edi & ~0xFF00) | (v << 8); break; + case 4: regs->eax = (regs->eax & ~0xFF00) | (v << 8); break; + case 5: regs->ecx = (regs->ecx & ~0xFF00) | (v << 8); break; + case 6: regs->edx = (regs->edx & ~0xFF00) | (v << 8); break; + case 7: regs->ebx = (regs->ebx & ~0xFF00) | (v << 8); break; } } @@ -361,9 +377,9 @@ segment(unsigned prefix, struct regs *regs, unsigned seg) if (prefix & SEG_SS) seg = regs->uss; if (prefix & SEG_FS) - seg = regs->fs; + seg = regs->vfs; if (prefix & SEG_GS) - seg = regs->gs; + seg = regs->vgs; return seg; } @@ -415,7 +431,7 @@ operand(unsigned prefix, struct regs *regs, unsigned modrm) case 2: return address(regs, seg, regs->edx); case 3: return address(regs, seg, regs->ebx); case 4: return address(regs, seg, - sib(regs, mod, fetch8(regs))); + sib(regs, mod, fetch8(regs))); case 5: return address(regs, seg, fetch32(regs)); case 6: return address(regs, seg, regs->esi); case 7: return address(regs, seg, regs->edi); @@ -435,7 +451,7 @@ operand(unsigned prefix, struct regs *regs, unsigned modrm) case 2: return address(regs, seg, regs->edx + disp); case 3: return address(regs, seg, regs->ebx + disp); case 4: return address(regs, seg, - sib(regs, mod, fetch8(regs))); + sib(regs, mod, fetch8(regs))); case 5: return address(regs, seg, regs->ebp + disp); case 6: return address(regs, seg, regs->esi + disp); case 7: return address(regs, seg, regs->edi + disp); @@ -492,7 +508,7 @@ operand(unsigned prefix, struct regs *regs, unsigned modrm) } } - return 0; + return 0; } /* @@ -546,11 +562,7 @@ lmsw(struct regs *regs, unsigned prefix, unsigned modrm) unsigned cr0 = (oldctx.cr0 & 0xFFFFFFF0) | ax; TRACE((regs, regs->eip - eip, "lmsw 0x%x", ax)); -#ifndef TEST oldctx.cr0 = cr0 | CR0_PE | CR0_NE; -#else - oldctx.cr0 = cr0 | CR0_PE | CR0_NE | CR0_PG; -#endif if (cr0 & CR0_PE) set_mode(regs, VM86_REAL_TO_PROTECTED); @@ -569,8 +581,13 @@ movr(struct regs *regs, unsigned prefix, unsigned opc) unsigned addr = operand(prefix, regs, modrm); unsigned val, r = (modrm >> 3) & 7; - if ((modrm & 0xC0) == 0xC0) /* no registers */ - return 0; + if ((modrm & 0xC0) == 0xC0) { + /* + * Emulate all guest instructions in protected to real mode. + */ + if (mode != VM86_PROTECTED_TO_REAL) + return 0; + } switch (opc) { case 0x88: /* addr32 mov r8, r/m8 */ @@ -578,16 +595,24 @@ movr(struct regs *regs, unsigned prefix, unsigned opc) TRACE((regs, regs->eip - eip, "movb %%e%s, *0x%x", rnames[r], addr)); write8(addr, val); - break; + return 1; case 0x8A: /* addr32 mov r/m8, r8 */ TRACE((regs, regs->eip - eip, "movb *0x%x, %%%s", addr, rnames[r])); setreg8(regs, r, read8(addr)); - break; + return 1; case 0x89: /* addr32 mov r16, r/m16 */ val = getreg32(regs, r); + if ((modrm & 0xC0) == 0xC0) { + if (prefix & DATA32) + setreg32(regs, modrm & 7, val); + else + setreg16(regs, modrm & 7, MASK16(val)); + return 1; + } + if (prefix & DATA32) { TRACE((regs, regs->eip - eip, "movl %%e%s, *0x%x", rnames[r], addr)); @@ -597,9 +622,17 @@ movr(struct regs *regs, unsigned prefix, unsigned opc) "movw %%%s, *0x%x", rnames[r], addr)); write16(addr, MASK16(val)); } - break; + return 1; + + case 0x8B: /* mov r/m16, r16 */ + if ((modrm & 0xC0) == 0xC0) { + if (prefix & DATA32) + setreg32(regs, r, addr); + else + setreg16(regs, r, MASK16(addr)); + return 1; + } - case 0x8B: /* addr32 mov r/m16, r16 */ if (prefix & DATA32) { TRACE((regs, regs->eip - eip, "movl *0x%x, %%e%s", addr, rnames[r])); @@ -609,7 +642,7 @@ movr(struct regs *regs, unsigned prefix, unsigned opc) "movw *0x%x, %%%s", addr, rnames[r])); setreg16(regs, r, read16(addr)); } - break; + return 1; case 0xC6: /* addr32 movb $imm, r/m8 */ if ((modrm >> 3) & 7) @@ -618,11 +651,113 @@ movr(struct regs *regs, unsigned prefix, unsigned opc) write8(addr, val); TRACE((regs, regs->eip - eip, "movb $0x%x, *0x%x", val, addr)); + return 1; + } + return 0; +} + +/* + * We need to handle string moves that address memory beyond the 64KB segment + * limit that VM8086 mode enforces. + */ +static inline int +movs(struct regs *regs, unsigned prefix, unsigned opc) +{ + unsigned eip = regs->eip - 1; + unsigned sseg = segment(prefix, regs, regs->vds); + unsigned dseg = regs->ves; + unsigned saddr, daddr; + unsigned count = 1; + int incr = ((regs->eflags & EFLAGS_DF) == 0) ? 1 : -1; + + saddr = address(regs, sseg, regs->esi); + daddr = address(regs, dseg, regs->edi); + + if ((prefix & REP) != 0) { + count = regs->ecx; + regs->ecx = 0; + } + + switch (opc) { + case 0xA4: /* movsb */ + regs->esi += (incr * count); + regs->edi += (incr * count); + + while (count-- != 0) { + write8(daddr, read8(saddr)); + daddr += incr; + saddr += incr; + } + TRACE((regs, regs->eip - eip, "movsb (%%esi),%%es:(%%edi)")); + break; + + case 0xA5: /* movsw */ + if ((prefix & DATA32) == 0) { + incr = 2 * incr; + regs->esi += (incr * count); + regs->edi += (incr * count); + + while (count-- != 0) { + write16(daddr, read16(saddr)); + daddr += incr; + saddr += incr; + } + } else { + incr = 4 * incr; + regs->esi += (incr * count); + regs->edi += (incr * count); + + while (count-- != 0) { + write32(daddr, read32(saddr)); + daddr += incr; + saddr += incr; + } + } + TRACE((regs, regs->eip - eip, "movsw %s(%%esi),%%es:(%%edi)")); break; } + return 1; } +static inline int +lods(struct regs *regs, unsigned prefix, unsigned opc) +{ + unsigned eip = regs->eip - 1; + unsigned seg = segment(prefix, regs, regs->vds); + unsigned addr = address(regs, seg, regs->esi); + unsigned count = 1; + int incr = ((regs->eflags & EFLAGS_DF) == 0) ? 1 : -1; + + if ((prefix & REP) != 0) { + count = regs->ecx; + regs->ecx = 0; + } + + switch (opc) { + case 0xAD: /* lodsw */ + if ((prefix & DATA32) == 0) { + incr = 2 * incr; + regs->esi += (incr * count); + while (count-- != 0) { + setreg16(regs, 0, read16(addr)); + addr += incr; + } + + TRACE((regs, regs->eip - eip, "lodsw (%%esi),%%ax")); + } else { + incr = 4 * incr; + regs->esi += (incr * count); + while (count-- != 0) { + setreg32(regs, 0, read32(addr)); + addr += incr; + } + TRACE((regs, regs->eip - eip, "lodsw (%%esi),%%eax")); + } + break; + } + return 1; +} /* * Move to and from a control register. */ @@ -641,13 +776,8 @@ movcr(struct regs *regs, unsigned prefix, unsigned opc) TRACE((regs, regs->eip - eip, "movl %%cr%d, %%eax", cr)); switch (cr) { case 0: -#ifndef TEST setreg32(regs, modrm, oldctx.cr0 & ~(CR0_PE | CR0_NE)); -#else - setreg32(regs, modrm, - oldctx.cr0 & ~(CR0_PE | CR0_NE | CR0_PG)); -#endif break; case 2: setreg32(regs, modrm, get_cr2()); @@ -665,13 +795,10 @@ movcr(struct regs *regs, unsigned prefix, unsigned opc) switch (cr) { case 0: oldctx.cr0 = getreg32(regs, modrm) | (CR0_PE | CR0_NE); -#ifdef TEST - oldctx.cr0 |= CR0_PG; -#endif if (getreg32(regs, modrm) & CR0_PE) set_mode(regs, VM86_REAL_TO_PROTECTED); - else - set_mode(regs, VM86_REAL); + //else + // set_mode(regs, VM86_REAL); break; case 3: oldctx.cr3 = getreg32(regs, modrm); @@ -694,6 +821,55 @@ static inline void set_eflags_ZF(unsigned mask, unsigned v1, struct regs *regs) regs->eflags &= ~EFLAGS_ZF; } +static void set_eflags_add(unsigned hi_bit_mask, unsigned v1, unsigned v2, + unsigned result, struct regs *regs) +{ + int bit_count; + unsigned tmp; + unsigned full_mask; + unsigned nonsign_mask; + + /* Carry out of high order bit? */ + if ( v1 & v2 & hi_bit_mask ) + regs->eflags |= EFLAGS_CF; + else + regs->eflags &= ~EFLAGS_CF; + + /* Even parity in least significant byte? */ + tmp = result & 0xff; + for (bit_count = 0; tmp != 0; bit_count++) + tmp &= (tmp - 1); + + if (bit_count & 1) + regs->eflags &= ~EFLAGS_PF; + else + regs->eflags |= EFLAGS_PF; + + /* Carry out of least significant BCD digit? */ + if ( v1 & v2 & (1<<3) ) + regs->eflags |= EFLAGS_AF; + else + regs->eflags &= ~EFLAGS_AF; + + /* Result is zero? */ + full_mask = (hi_bit_mask - 1) | hi_bit_mask; + set_eflags_ZF(full_mask, result, regs); + + /* Sign of result? */ + if ( result & hi_bit_mask ) + regs->eflags |= EFLAGS_SF; + else + regs->eflags &= ~EFLAGS_SF; + + /* Carry out of highest non-sign bit? */ + nonsign_mask = (hi_bit_mask >> 1) & ~hi_bit_mask; + if ( v1 & v2 & hi_bit_mask ) + regs->eflags |= EFLAGS_OF; + else + regs->eflags &= ~EFLAGS_OF; + +} + /* * We need to handle cmp opcodes that address memory beyond the 64KB * segment limit that VM8086 mode enforces. @@ -768,6 +944,82 @@ test(struct regs *regs, unsigned prefix, unsigned opc) } /* + * We need to handle add opcodes that address memory beyond the 64KB + * segment limit that VM8086 mode enforces. + */ +static int +add(struct regs *regs, unsigned prefix, unsigned opc) +{ + unsigned eip = regs->eip - 1; + unsigned modrm = fetch8(regs); + unsigned addr = operand(prefix, regs, modrm); + unsigned r = (modrm >> 3) & 7; + + unsigned val1 = 0; + unsigned val2 = 0; + unsigned result = 0; + unsigned hi_bit; + + if ((modrm & 0xC0) == 0xC0) /* no registers */ + return 0; + + switch (opc) { + case 0x00: /* addr32 add r8, r/m8 */ + val1 = getreg8(regs, r); + val2 = read8(addr); + result = val1 + val2; + write8(addr, result); + TRACE((regs, regs->eip - eip, + "addb %%e%s, *0x%x", rnames[r], addr)); + break; + + case 0x01: /* addr32 add r16, r/m16 */ + if (prefix & DATA32) { + val1 = getreg32(regs, r); + val2 = read32(addr); + result = val1 + val2; + write32(addr, result); + TRACE((regs, regs->eip - eip, + "addl %%e%s, *0x%x", rnames[r], addr)); + } else { + val1 = getreg16(regs, r); + val2 = read16(addr); + result = val1 + val2; + write16(addr, result); + TRACE((regs, regs->eip - eip, + "addw %%e%s, *0x%x", rnames[r], addr)); + } + break; + + case 0x03: /* addr32 add r/m16, r16 */ + if (prefix & DATA32) { + val1 = getreg32(regs, r); + val2 = read32(addr); + result = val1 + val2; + setreg32(regs, r, result); + TRACE((regs, regs->eip - eip, + "addl *0x%x, %%e%s", addr, rnames[r])); + } else { + val1 = getreg16(regs, r); + val2 = read16(addr); + result = val1 + val2; + setreg16(regs, r, result); + TRACE((regs, regs->eip - eip, + "addw *0x%x, %%%s", addr, rnames[r])); + } + break; + } + + if (opc == 0x00) + hi_bit = (1<<7); + else + hi_bit = (prefix & DATA32) ? (1<<31) : (1<<15); + set_eflags_add(hi_bit, val1, val2, result, regs); + + return 1; +} + +/* * We need to handle pop opcodes that address memory beyond the 64KB * segment limit that VM8086 mode enforces. */ @@ -798,12 +1050,78 @@ pop(struct regs *regs, unsigned prefix, unsigned opc) return 1; } +static int +mov_to_seg(struct regs *regs, unsigned prefix, unsigned opc) +{ + unsigned modrm = fetch8(regs); + + /* + * Emulate segment loads in: + * 1) real->protected mode. + * 2) protected->real mode. + */ + if (mode != VM86_REAL_TO_PROTECTED && + mode != VM86_PROTECTED_TO_REAL) + return 0; + + /* Register source only. */ + if ((modrm & 0xC0) != 0xC0) + goto fail; + + switch ((modrm & 0x38) >> 3) { + case 0: /* es */ + regs->ves = getreg16(regs, modrm); + if (mode == VM86_PROTECTED_TO_REAL) + return 1; + saved_rm_regs.ves = 0; + oldctx.es_sel = regs->ves; + return 1; + + /* case 1: cs */ + + case 2: /* ss */ + regs->uss = getreg16(regs, modrm); + if (mode == VM86_PROTECTED_TO_REAL) + return 1; + saved_rm_regs.uss = 0; + oldctx.ss_sel = regs->uss; + return 1; + case 3: /* ds */ + regs->vds = getreg16(regs, modrm); + if (mode == VM86_PROTECTED_TO_REAL) + return 1; + saved_rm_regs.vds = 0; + oldctx.ds_sel = regs->vds; + return 1; + case 4: /* fs */ + regs->vfs = getreg16(regs, modrm); + if (mode == VM86_PROTECTED_TO_REAL) + return 1; + saved_rm_regs.vfs = 0; + oldctx.fs_sel = regs->vfs; + return 1; + case 5: /* gs */ + regs->vgs = getreg16(regs, modrm); + if (mode == VM86_PROTECTED_TO_REAL) + return 1; + saved_rm_regs.vgs = 0; + oldctx.gs_sel = regs->vgs; + return 1; + } + + fail: + printf("%s:%d: missed opcode %02x %02x\n", + __FUNCTION__, __LINE__, opc, modrm); + return 0; +} + /* * Emulate a segment load in protected mode */ static int load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes *arbytes) { + uint64_t gdt_phys_base; unsigned long long entry; /* protected mode: use seg as index into gdt */ @@ -815,8 +1133,12 @@ load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes return 1; } - entry = ((unsigned long long *) - guest_linear_to_real(oldctx.gdtr_base))[sel >> 3]; + gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base); + if (gdt_phys_base != (uint32_t)gdt_phys_base) { + printf("gdt base address above 4G\n"); + cpuid_addr_value(gdt_phys_base + 8 * (sel >> 3), &entry); + } else + entry = ((unsigned long long *)(long)gdt_phys_base)[sel >> 3]; /* Check the P bit first */ if (!((entry >> (15+32)) & 0x1) && sel != 0) @@ -826,11 +1148,11 @@ load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes ((entry >> (32-16)) & 0x00FF0000) | ((entry >> ( 16)) & 0x0000FFFF)); *limit = (((entry >> (48-16)) & 0x000F0000) | - ((entry ) & 0x0000FFFF)); + (entry & 0x0000FFFF)); arbytes->bytes = 0; arbytes->fields.seg_type = (entry >> (8+32)) & 0xF; /* TYPE */ - arbytes->fields.s = (entry >> (12+32)) & 0x1; /* S */ + arbytes->fields.s = (entry >> (12+32)) & 0x1; /* S */ if (arbytes->fields.s) arbytes->fields.seg_type |= 1; /* accessed */ arbytes->fields.dpl = (entry >> (13+32)) & 0x3; /* DPL */ @@ -847,82 +1169,57 @@ load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes } /* + * Emulate a protected mode segment load, falling back to clearing it if + * the descriptor was invalid. + */ +static void +load_or_clear_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes *arbytes) +{ + if (!load_seg(sel, base, limit, arbytes)) + load_seg(0, base, limit, arbytes); +} + +static unsigned char rm_irqbase[2]; + +/* * Transition to protected mode */ static void protected_mode(struct regs *regs) { + extern char stack_top[]; + + oldctx.rm_irqbase[0] = rm_irqbase[0]; + oldctx.rm_irqbase[1] = rm_irqbase[1]; + regs->eflags &= ~(EFLAGS_TF|EFLAGS_VM); oldctx.eip = regs->eip; oldctx.esp = regs->uesp; oldctx.eflags = regs->eflags; - memset(&saved_rm_regs, 0, sizeof(struct regs)); - /* reload all segment registers */ if (!load_seg(regs->cs, &oldctx.cs_base, &oldctx.cs_limit, &oldctx.cs_arbytes)) panic("Invalid %%cs=0x%x for protected mode\n", regs->cs); oldctx.cs_sel = regs->cs; - if (load_seg(regs->ves, &oldctx.es_base, - &oldctx.es_limit, &oldctx.es_arbytes)) - oldctx.es_sel = regs->ves; - else { - load_seg(0, &oldctx.es_base, - &oldctx.es_limit, &oldctx.es_arbytes); - oldctx.es_sel = 0; - saved_rm_regs.ves = regs->ves; - } - - if (load_seg(regs->uss, &oldctx.ss_base, - &oldctx.ss_limit, &oldctx.ss_arbytes)) - oldctx.ss_sel = regs->uss; - else { - load_seg(0, &oldctx.ss_base, - &oldctx.ss_limit, &oldctx.ss_arbytes); - oldctx.ss_sel = 0; - saved_rm_regs.uss = regs->uss; - } - - if (load_seg(regs->vds, &oldctx.ds_base, - &oldctx.ds_limit, &oldctx.ds_arbytes)) - oldctx.ds_sel = regs->vds; - else { - load_seg(0, &oldctx.ds_base, - &oldctx.ds_limit, &oldctx.ds_arbytes); - oldctx.ds_sel = 0; - saved_rm_regs.vds = regs->vds; - } - - if (load_seg(regs->vfs, &oldctx.fs_base, - &oldctx.fs_limit, &oldctx.fs_arbytes)) - oldctx.fs_sel = regs->vfs; - else { - load_seg(0, &oldctx.fs_base, - &oldctx.fs_limit, &oldctx.fs_arbytes); - oldctx.fs_sel = 0; - saved_rm_regs.vfs = regs->vfs; - } - - if (load_seg(regs->vgs, &oldctx.gs_base, - &oldctx.gs_limit, &oldctx.gs_arbytes)) - oldctx.gs_sel = regs->vgs; - else { - load_seg(0, &oldctx.gs_base, - &oldctx.gs_limit, &oldctx.gs_arbytes); - oldctx.gs_sel = 0; - saved_rm_regs.vgs = regs->vgs; - } + load_or_clear_seg(oldctx.es_sel, &oldctx.es_base, + &oldctx.es_limit, &oldctx.es_arbytes); + load_or_clear_seg(oldctx.ss_sel, &oldctx.ss_base, + &oldctx.ss_limit, &oldctx.ss_arbytes); + load_or_clear_seg(oldctx.ds_sel, &oldctx.ds_base, + &oldctx.ds_limit, &oldctx.ds_arbytes); + load_or_clear_seg(oldctx.fs_sel, &oldctx.fs_base, + &oldctx.fs_limit, &oldctx.fs_arbytes); + load_or_clear_seg(oldctx.gs_sel, &oldctx.gs_base, + &oldctx.gs_limit, &oldctx.gs_arbytes); /* initialize jump environment to warp back to protected mode */ + regs->uss = DATA_SELECTOR; + regs->uesp = (unsigned long)stack_top; regs->cs = CODE_SELECTOR; - regs->ds = DATA_SELECTOR; - regs->es = DATA_SELECTOR; - regs->fs = DATA_SELECTOR; - regs->gs = DATA_SELECTOR; - regs->eip = (unsigned) &switch_to_protected_mode; + regs->eip = (unsigned long)switch_to_protected_mode; /* this should get us into 32-bit mode */ } @@ -934,10 +1231,6 @@ static void real_mode(struct regs *regs) { regs->eflags |= EFLAGS_VM | 0x02; - regs->ds = DATA_SELECTOR; - regs->es = DATA_SELECTOR; - regs->fs = DATA_SELECTOR; - regs->gs = DATA_SELECTOR; /* * When we transition from protected to real-mode and we @@ -951,21 +1244,21 @@ real_mode(struct regs *regs) panic("%%ss 0x%lx higher than 1MB", regs->uss); regs->uss = address(regs, regs->uss, 0) >> 4; } else { - regs->uss = saved_rm_regs.uss; + regs->uss = saved_rm_regs.uss; } if (regs->vds != 0) { if (regs->vds >= HIGHMEM) panic("%%ds 0x%lx higher than 1MB", regs->vds); regs->vds = address(regs, regs->vds, 0) >> 4; } else { - regs->vds = saved_rm_regs.vds; + regs->vds = saved_rm_regs.vds; } if (regs->ves != 0) { if (regs->ves >= HIGHMEM) panic("%%es 0x%lx higher than 1MB", regs->ves); regs->ves = address(regs, regs->ves, 0) >> 4; } else { - regs->ves = saved_rm_regs.ves; + regs->ves = saved_rm_regs.ves; } /* this should get us into 16-bit mode */ @@ -988,47 +1281,46 @@ set_mode(struct regs *regs, enum vm86_mode newmode) { switch (newmode) { case VM86_REAL: - if ((mode == VM86_PROTECTED_TO_REAL) || - (mode == VM86_REAL_TO_PROTECTED)) { + if (mode == VM86_PROTECTED_TO_REAL || + mode == VM86_REAL_TO_PROTECTED) { regs->eflags &= ~EFLAGS_TF; real_mode(regs); - break; - } else if (mode == VM86_REAL) { - break; - } else + } else if (mode != VM86_REAL) panic("unexpected real mode transition"); break; case VM86_REAL_TO_PROTECTED: if (mode == VM86_REAL) { regs->eflags |= EFLAGS_TF; - break; - } else if (mode == VM86_REAL_TO_PROTECTED) { - break; - } else + saved_rm_regs.vds = regs->vds; + saved_rm_regs.ves = regs->ves; + saved_rm_regs.vfs = regs->vfs; + saved_rm_regs.vgs = regs->vgs; + saved_rm_regs.uss = regs->uss; + oldctx.ds_sel = 0; + oldctx.es_sel = 0; + oldctx.fs_sel = 0; + oldctx.gs_sel = 0; + oldctx.ss_sel = 0; + } else if (mode != VM86_REAL_TO_PROTECTED) panic("unexpected real-to-protected mode transition"); break; case VM86_PROTECTED_TO_REAL: - if (mode == VM86_PROTECTED) { - break; - } else + if (mode != VM86_PROTECTED) panic("unexpected protected-to-real mode transition"); break; case VM86_PROTECTED: - if (mode == VM86_REAL_TO_PROTECTED) { - protected_mode(regs); -// printf("\n"); - mode = newmode; - return; - } else + if (mode != VM86_REAL_TO_PROTECTED) panic("unexpected protected mode transition"); + protected_mode(regs); break; } mode = newmode; - TRACE((regs, 0, states[mode])); + if (mode != VM86_PROTECTED) + TRACE((regs, 0, states[mode])); } static void @@ -1037,25 +1329,19 @@ jmpl(struct regs *regs, int prefix) unsigned n = regs->eip; unsigned cs, eip; - if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */ - eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs); - cs = fetch16(regs); + eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs); + cs = fetch16(regs); - TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip)); - - regs->cs = cs; - regs->eip = eip; - set_mode(regs, VM86_PROTECTED); - } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */ - eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs); - cs = fetch16(regs); + TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip)); - TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip)); + regs->cs = cs; + regs->eip = eip; - regs->cs = cs; - regs->eip = eip; + if (mode == VM86_REAL_TO_PROTECTED) /* jump to protected mode */ + set_mode(regs, VM86_PROTECTED); + else if (mode == VM86_PROTECTED_TO_REAL) /* jump to real mode */ set_mode(regs, VM86_REAL); - } else + else panic("jmpl"); } @@ -1066,29 +1352,22 @@ jmpl_indirect(struct regs *regs, int prefix, unsigned modrm) unsigned cs, eip; unsigned addr; - addr = operand(prefix, regs, modrm); + addr = operand(prefix, regs, modrm); - if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */ - eip = (prefix & DATA32) ? read32(addr) : read16(addr); - addr += (prefix & DATA32) ? 4 : 2; - cs = read16(addr); + eip = (prefix & DATA32) ? read32(addr) : read16(addr); + addr += (prefix & DATA32) ? 4 : 2; + cs = read16(addr); - TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip)); + TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip)); - regs->cs = cs; - regs->eip = eip; - set_mode(regs, VM86_PROTECTED); - } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */ - eip = (prefix & DATA32) ? read32(addr) : read16(addr); - addr += (prefix & DATA32) ? 4 : 2; - cs = read16(addr); - - TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip)); + regs->cs = cs; + regs->eip = eip; - regs->cs = cs; - regs->eip = eip; + if (mode == VM86_REAL_TO_PROTECTED) /* jump to protected mode */ + set_mode(regs, VM86_PROTECTED); + else if (mode == VM86_PROTECTED_TO_REAL) /* jump to real mode */ set_mode(regs, VM86_REAL); - } else + else panic("jmpl"); } @@ -1107,15 +1386,14 @@ retl(struct regs *regs, int prefix) TRACE((regs, 1, "retl (to 0x%x:0x%x)", cs, eip)); - if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */ - regs->cs = cs; - regs->eip = eip; + regs->cs = cs; + regs->eip = eip; + + if (mode == VM86_REAL_TO_PROTECTED) /* jump to protected mode */ set_mode(regs, VM86_PROTECTED); - } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */ - regs->cs = cs; - regs->eip = eip; + else if (mode == VM86_PROTECTED_TO_REAL) /* jump to real mode */ set_mode(regs, VM86_REAL); - } else + else panic("retl"); } @@ -1166,6 +1444,7 @@ outbyte(struct regs *regs, unsigned prefix, unsigned opc) icw2[0] = 0; printf("Remapping master: ICW2 0x%x -> 0x%x\n", al, NR_EXCEPTION_HANDLER); + rm_irqbase[0] = al; al = NR_EXCEPTION_HANDLER; } break; @@ -1179,6 +1458,7 @@ outbyte(struct regs *regs, unsigned prefix, unsigned opc) icw2[1] = 0; printf("Remapping slave: ICW2 0x%x -> 0x%x\n", al, NR_EXCEPTION_HANDLER+8); + rm_irqbase[1] = al; al = NR_EXCEPTION_HANDLER+8; } break; @@ -1215,8 +1495,8 @@ pushrm(struct regs *regs, int prefix, unsigned modrm) unsigned addr; unsigned data; - addr = operand(prefix, regs, modrm); - + addr = operand(prefix, regs, modrm); + if (prefix & DATA32) { data = read32(addr); push32(regs, data); @@ -1254,14 +1534,34 @@ opcode(struct regs *regs) unsigned opc, modrm, disp; unsigned prefix = 0; + if (mode == VM86_PROTECTED_TO_REAL && + oldctx.cs_arbytes.fields.default_ops_size) { + prefix |= DATA32; + prefix |= ADDR32; + } + for (;;) { switch ((opc = fetch8(regs))) { - case 0x07: - if (prefix & DATA32) - regs->ves = pop32(regs); - else - regs->ves = pop16(regs); + + case 0x00: /* addr32 add r8, r/m8 */ + case 0x01: /* addr32 add r16, r/m16 */ + case 0x03: /* addr32 add r/m16, r16 */ + if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED) + goto invalid; + if ((prefix & ADDR32) == 0) + goto invalid; + if (!add(regs, prefix, opc)) + goto invalid; + return OPC_EMULATED; + + case 0x07: /* pop %es */ + regs->ves = (prefix & DATA32) ? + pop32(regs) : pop16(regs); TRACE((regs, regs->eip - eip, "pop %%es")); + if (mode == VM86_REAL_TO_PROTECTED) { + saved_rm_regs.ves = 0; + oldctx.es_sel = regs->ves; + } return OPC_EMULATED; case 0x0F: /* two byte opcode */ @@ -1293,6 +1593,9 @@ opcode(struct regs *regs) goto invalid; } break; + case 0x06: /* clts */ + oldctx.cr0 &= ~CR0_TS; + return OPC_EMULATED; case 0x09: /* wbinvd */ return OPC_EMULATED; case 0x20: /* mov Rd, Cd (1h) */ @@ -1311,6 +1614,16 @@ opcode(struct regs *regs) } goto invalid; + case 0x1F: /* pop %ds */ + regs->vds = (prefix & DATA32) ? + pop32(regs) : pop16(regs); + TRACE((regs, regs->eip - eip, "pop %%ds")); + if (mode == VM86_REAL_TO_PROTECTED) { + saved_rm_regs.vds = 0; + oldctx.ds_sel = regs->vds; + } + return OPC_EMULATED; + case 0x26: TRACE((regs, regs->eip - eip, "%%es:")); prefix |= SEG_ES; @@ -1328,13 +1641,11 @@ opcode(struct regs *regs) case 0x39: /* addr32 cmp r16, r/m16 */ case 0x3B: /* addr32 cmp r/m16, r16 */ - if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED) + if (mode == VM86_PROTECTED_TO_REAL || !(prefix & ADDR32)) goto invalid; - if ((prefix & ADDR32) == 0) - goto invalid; - if (!cmp(regs, prefix, opc)) - goto invalid; - return OPC_EMULATED; + if (!cmp(regs, prefix, opc)) + goto invalid; + return OPC_EMULATED; case 0x3E: TRACE((regs, regs->eip - eip, "%%ds:")); @@ -1352,57 +1663,54 @@ opcode(struct regs *regs) continue; case 0x66: - TRACE((regs, regs->eip - eip, "data32")); - prefix |= DATA32; + if (mode == VM86_PROTECTED_TO_REAL && + oldctx.cs_arbytes.fields.default_ops_size) { + TRACE((regs, regs->eip - eip, "data16")); + prefix &= ~DATA32; + } else { + TRACE((regs, regs->eip - eip, "data32")); + prefix |= DATA32; + } continue; - case 0x67: - TRACE((regs, regs->eip - eip, "addr32")); - prefix |= ADDR32; + case 0x67: + if (mode == VM86_PROTECTED_TO_REAL && + oldctx.cs_arbytes.fields.default_ops_size) { + TRACE((regs, regs->eip - eip, "addr16")); + prefix &= ~ADDR32; + } else { + TRACE((regs, regs->eip - eip, "addr32")); + prefix |= ADDR32; + } continue; case 0x88: /* addr32 mov r8, r/m8 */ case 0x8A: /* addr32 mov r/m8, r8 */ - if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED) + if (mode == VM86_PROTECTED_TO_REAL || !(prefix & ADDR32)) goto invalid; - if ((prefix & ADDR32) == 0) - goto invalid; - if (!movr(regs, prefix, opc)) - goto invalid; - return OPC_EMULATED; - - case 0x89: /* addr32 mov r16, r/m16 */ - if (mode == VM86_PROTECTED_TO_REAL) { - unsigned modrm = fetch8(regs); - unsigned addr = operand(prefix, regs, modrm); - unsigned val, r = (modrm >> 3) & 7; - - if (prefix & DATA32) { - val = getreg16(regs, r); - write32(addr, val); - } else { - val = getreg32(regs, r); - write16(addr, MASK16(val)); - } - TRACE((regs, regs->eip - eip, - "mov %%%s, *0x%x", rnames[r], addr)); - return OPC_EMULATED; - } - case 0x8B: /* addr32 mov r/m16, r16 */ - if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED) + if (!movr(regs, prefix, opc)) + goto invalid; + return OPC_EMULATED; + + case 0x89: /* mov r16, r/m16 */ + case 0x8B: /* mov r/m16, r16 */ + if (mode != VM86_PROTECTED_TO_REAL && !(prefix & ADDR32)) + goto invalid; + if (!movr(regs, prefix, opc)) + goto invalid; + return OPC_EMULATED; + + case 0x8E: /* mov r16, sreg */ + if (!mov_to_seg(regs, prefix, opc)) goto invalid; - if ((prefix & ADDR32) == 0) - goto invalid; - if (!movr(regs, prefix, opc)) - goto invalid; - return OPC_EMULATED; + return OPC_EMULATED; case 0x8F: /* addr32 pop r/m16 */ - if ((prefix & ADDR32) == 0) - goto invalid; - if (!pop(regs, prefix, opc)) - goto invalid; - return OPC_EMULATED; + if (!(prefix & ADDR32)) + goto invalid; + if (!pop(regs, prefix, opc)) + goto invalid; + return OPC_EMULATED; case 0x90: /* nop */ TRACE((regs, regs->eip - eip, "nop")); @@ -1426,49 +1734,64 @@ opcode(struct regs *regs) regs->eflags |= EFLAGS_VM; return OPC_EMULATED; - case 0xA1: /* mov ax, r/m16 */ - { - int addr, data; - int seg = segment(prefix, regs, regs->vds); - int offset = prefix & ADDR32? fetch32(regs) : fetch16(regs); - - if (prefix & DATA32) { - addr = address(regs, seg, offset); - data = read32(addr); - setreg32(regs, 0, data); - } else { - addr = address(regs, seg, offset); - data = read16(addr); - setreg16(regs, 0, data); - } - TRACE((regs, regs->eip - eip, "mov *0x%x, %%ax", addr)); + case 0xA1: /* mov ax, r/m16 */ + { + int addr, data; + int seg = segment(prefix, regs, regs->vds); + int offset = prefix & ADDR32 ? fetch32(regs) : fetch16(regs); + + if (prefix & DATA32) { + addr = address(regs, seg, offset); + data = read32(addr); + setreg32(regs, 0, data); + } else { + addr = address(regs, seg, offset); + data = read16(addr); + setreg16(regs, 0, data); } + TRACE((regs, regs->eip - eip, "mov *0x%x, %%ax", addr)); + return OPC_EMULATED; + } + + case 0xA4: /* movsb */ + case 0xA5: /* movsw */ + if ((prefix & ADDR32) == 0) + goto invalid; + if (!movs(regs, prefix, opc)) + goto invalid; return OPC_EMULATED; + case 0xAD: /* lodsw */ + if ((prefix & ADDR32) == 0) + goto invalid; + if (!lods(regs, prefix, opc)) + goto invalid; + return OPC_EMULATED; + case 0xBB: /* mov bx, imm16 */ - { - int data; - if (prefix & DATA32) { - data = fetch32(regs); - setreg32(regs, 3, data); - } else { - data = fetch16(regs); - setreg16(regs, 3, data); - } - TRACE((regs, regs->eip - eip, "mov $0x%x, %%bx", data)); + { + int data; + if (prefix & DATA32) { + data = fetch32(regs); + setreg32(regs, 3, data); + } else { + data = fetch16(regs); + setreg16(regs, 3, data); } + TRACE((regs, regs->eip - eip, "mov $0x%x, %%bx", data)); return OPC_EMULATED; + } case 0xC6: /* addr32 movb $imm, r/m8 */ - if ((prefix & ADDR32) == 0) - goto invalid; - if (!movr(regs, prefix, opc)) - goto invalid; + if (!(prefix & ADDR32)) + goto invalid; + if (!movr(regs, prefix, opc)) + goto invalid; return OPC_EMULATED; case 0xCB: /* retl */ - if ((mode == VM86_REAL_TO_PROTECTED) || - (mode == VM86_PROTECTED_TO_REAL)) { + if (mode == VM86_REAL_TO_PROTECTED || + mode == VM86_PROTECTED_TO_REAL) { retl(regs, prefix); return OPC_INVALID; } @@ -1505,37 +1828,37 @@ opcode(struct regs *regs) return OPC_EMULATED; case 0xEA: /* jmpl */ - if ((mode == VM86_REAL_TO_PROTECTED) || - (mode == VM86_PROTECTED_TO_REAL)) { + if (mode == VM86_REAL_TO_PROTECTED || + mode == VM86_PROTECTED_TO_REAL) { jmpl(regs, prefix); return OPC_INVALID; } goto invalid; - case 0xFF: /* jmpl (indirect) */ - { - unsigned modrm = fetch8(regs); - switch((modrm >> 3) & 7) { - case 5: /* jmpl (indirect) */ - if ((mode == VM86_REAL_TO_PROTECTED) || - (mode == VM86_PROTECTED_TO_REAL)) { - jmpl_indirect(regs, prefix, modrm); - return OPC_INVALID; - } - goto invalid; + case 0xFF: + { + unsigned modrm = fetch8(regs); + switch((modrm >> 3) & 7) { + case 5: /* jmpl (indirect) */ + if (mode == VM86_REAL_TO_PROTECTED || + mode == VM86_PROTECTED_TO_REAL) { + jmpl_indirect(regs, prefix, modrm); + return OPC_INVALID; + } + goto invalid; - case 6: /* push r/m16 */ - pushrm(regs, prefix, modrm); - return OPC_EMULATED; + case 6: /* push r/m16 */ + pushrm(regs, prefix, modrm); + return OPC_EMULATED; - default: - goto invalid; - } + default: + goto invalid; } + } case 0xEB: /* short jump */ - if ((mode == VM86_REAL_TO_PROTECTED) || - (mode == VM86_PROTECTED_TO_REAL)) { + if (mode == VM86_REAL_TO_PROTECTED || + mode == VM86_PROTECTED_TO_REAL) { disp = (char) fetch8(regs); TRACE((regs, 2, "jmp 0x%x", regs->eip + disp)); regs->eip += disp; @@ -1557,11 +1880,21 @@ opcode(struct regs *regs) TRACE((regs, regs->eip - eip, "lock")); continue; + case 0xF4: /* hlt */ + TRACE((regs, regs->eip - eip, "hlt")); + /* Do something power-saving here! */ + return OPC_EMULATED; + + case 0xF3: /* rep/repe/repz */ + TRACE((regs, regs->eip - eip, "rep")); + prefix |= REP; + continue; + case 0xF6: /* addr32 testb $imm, r/m8 */ - if ((prefix & ADDR32) == 0) - goto invalid; - if (!test(regs, prefix, opc)) - goto invalid; + if (!(prefix & ADDR32)) + goto invalid; + if (!test(regs, prefix, opc)) + goto invalid; return OPC_EMULATED; case 0xFA: /* cli */ @@ -1590,6 +1923,7 @@ emulate(struct regs *regs) { unsigned flteip; int nemul = 0; + unsigned ip; /* emulate as many instructions as possible */ while (opcode(regs) != OPC_INVALID) @@ -1598,6 +1932,12 @@ emulate(struct regs *regs) /* detect the case where we are not making progress */ if (nemul == 0 && prev_eip == regs->eip) { flteip = address(regs, MASK16(regs->cs), regs->eip); + + printf("Undecoded sequence: \n"); + for (ip=flteip; ip < flteip+16; ip++) + printf("0x%02x ", read8(ip)); + printf("\n"); + panic("Unknown opcode at %04x:%04x=0x%x", MASK16(regs->cs), regs->eip, flteip); } else @@ -1621,6 +1961,8 @@ trap(int trapno, int errno, struct regs *regs) case 1: /* Debug */ if (regs->eflags & EFLAGS_VM) { /* emulate any 8086 instructions */ + if (mode == VM86_REAL) + return; if (mode != VM86_REAL_TO_PROTECTED) panic("not in real-to-protected mode"); emulate(regs); @@ -1641,7 +1983,7 @@ trap(int trapno, int errno, struct regs *regs) default: invalid: printf("Trap (0x%x) while in %s mode\n", - trapno, regs->eflags & EFLAGS_VM ? "real" : "protected"); + trapno, regs->eflags & EFLAGS_VM ? "real" : "protected"); if (trapno == 14) printf("Page fault address 0x%x\n", get_cr2()); dump_regs(regs); diff --git a/bios/vmxassist/vm86.h b/bios/vmxassist/vm86.h index e0997e4..64c24aa 100644 --- a/bios/vmxassist/vm86.h +++ b/bios/vmxassist/vm86.h @@ -26,18 +26,13 @@ #include "vmx_assist.h" -#define NR_EXCEPTION_HANDLER 32 -#define NR_INTERRUPT_HANDLERS 16 -#define NR_TRAPS (NR_EXCEPTION_HANDLER+NR_INTERRUPT_HANDLERS) - #ifndef __ASSEMBLY__ struct regs { - unsigned edi, esi, ebp, esp, ebx, edx, ecx, eax; - unsigned ds, es, fs, gs; - unsigned trapno, errno; - unsigned eip, cs, eflags, uesp, uss; - unsigned ves, vds, vfs, vgs; + unsigned edi, esi, ebp, esp, ebx, edx, ecx, eax; + unsigned trapno, errno; + unsigned eip, cs, eflags, uesp, uss; + unsigned ves, vds, vfs, vgs; }; enum vm86_mode { @@ -55,7 +50,6 @@ enum vm86_mode { extern enum vm86_mode prevmode, mode; extern struct vmx_assist_context oldctx; -extern struct vmx_assist_context newctx; extern void emulate(struct regs *); extern void dump_regs(struct regs *); diff --git a/bios/vmxassist/vmx_assist.h b/bios/vmxassist/vmx_assist.h index f987b0f..4ef17fe 100644 --- a/bios/vmxassist/vmx_assist.h +++ b/bios/vmxassist/vmx_assist.h @@ -1,6 +1,24 @@ /* * vmx_assist.h: Context definitions for the VMXASSIST world switch. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Leendert van Doorn, leendert@watson.ibm.com * Copyright (c) 2005, International Business Machines Corporation. */ @@ -17,6 +35,10 @@ #ifndef __ASSEMBLY__ +#define NR_EXCEPTION_HANDLER 32 +#define NR_INTERRUPT_HANDLERS 16 +#define NR_TRAPS (NR_EXCEPTION_HANDLER+NR_INTERRUPT_HANDLERS) + union vmcs_arbytes { struct arbyte_fields { unsigned int seg_type : 4, @@ -80,6 +102,8 @@ struct vmx_assist_context { uint32_t ldtr_limit; uint32_t ldtr_base; union vmcs_arbytes ldtr_arbytes; + + unsigned char rm_irqbase[2]; }; typedef struct vmx_assist_context vmx_assist_context_t; diff --git a/bios/vmxassist/vmxassist.bin b/bios/vmxassist/vmxassist.bin index 1cafb02..e93a308 100644 Binary files a/bios/vmxassist/vmxassist.bin and b/bios/vmxassist/vmxassist.bin differ diff --git a/palacios/include/palacios/vmx.h b/palacios/include/palacios/vmx.h index e5fe949..eebd289 100644 --- a/palacios/include/palacios/vmx.h +++ b/palacios/include/palacios/vmx.h @@ -70,10 +70,8 @@ struct vmx_basic_msr { } __attribute__((packed)); typedef enum { - VMXASSIST_STARTUP, - VMXASSIST_V8086_BIOS, - VMXASSIST_V8086, - NORMAL + VMXASSIST_DISABLED, + VMXASSIST_ENABLED } vmx_state_t; struct tss_descriptor { diff --git a/palacios/include/palacios/vmx_assist.h b/palacios/include/palacios/vmx_assist.h new file mode 100644 index 0000000..31b9a37 --- /dev/null +++ b/palacios/include/palacios/vmx_assist.h @@ -0,0 +1,126 @@ +/* + * vmx_assist.h: Context definitions for the VMXASSIST world switch. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Leendert van Doorn, leendert@watson.ibm.com + * Copyright (c) 2005, International Business Machines Corporation. + */ + +#ifndef _VMX_ASSIST_H_ +#define _VMX_ASSIST_H_ + +#include + +#define VMXASSIST_BASE 0xD0000 +#define VMXASSIST_MAGIC 0x17101966 +#define VMXASSIST_MAGIC_OFFSET (VMXASSIST_BASE+8) + +#define VMXASSIST_NEW_CONTEXT (VMXASSIST_BASE + 12) +#define VMXASSIST_OLD_CONTEXT (VMXASSIST_NEW_CONTEXT + 4) + +#ifndef __ASSEMBLY__ + +#define NR_EXCEPTION_HANDLER 32 +#define NR_INTERRUPT_HANDLERS 16 +#define NR_TRAPS (NR_EXCEPTION_HANDLER+NR_INTERRUPT_HANDLERS) + +union vmcs_arbytes { + struct arbyte_fields { + unsigned int seg_type : 4, + s : 1, + dpl : 2, + p : 1, + reserved0 : 4, + avl : 1, + reserved1 : 1, + default_ops_size: 1, + g : 1, + null_bit : 1, + reserved2 : 15; + } fields; + unsigned int bytes; +}; + +/* + * World switch state + */ +struct vmx_assist_context { + uint32_t eip; /* execution pointer */ + uint32_t esp; /* stack pointer */ + uint32_t eflags; /* flags register */ + uint32_t cr0; + uint32_t cr3; /* page table directory */ + uint32_t cr4; + uint32_t idtr_limit; /* idt */ + uint32_t idtr_base; + uint32_t gdtr_limit; /* gdt */ + uint32_t gdtr_base; + uint32_t cs_sel; /* cs selector */ + uint32_t cs_limit; + uint32_t cs_base; + union vmcs_arbytes cs_arbytes; + uint32_t ds_sel; /* ds selector */ + uint32_t ds_limit; + uint32_t ds_base; + union vmcs_arbytes ds_arbytes; + uint32_t es_sel; /* es selector */ + uint32_t es_limit; + uint32_t es_base; + union vmcs_arbytes es_arbytes; + uint32_t ss_sel; /* ss selector */ + uint32_t ss_limit; + uint32_t ss_base; + union vmcs_arbytes ss_arbytes; + uint32_t fs_sel; /* fs selector */ + uint32_t fs_limit; + uint32_t fs_base; + union vmcs_arbytes fs_arbytes; + uint32_t gs_sel; /* gs selector */ + uint32_t gs_limit; + uint32_t gs_base; + union vmcs_arbytes gs_arbytes; + uint32_t tr_sel; /* task selector */ + uint32_t tr_limit; + uint32_t tr_base; + union vmcs_arbytes tr_arbytes; + uint32_t ldtr_sel; /* ldtr selector */ + uint32_t ldtr_limit; + uint32_t ldtr_base; + union vmcs_arbytes ldtr_arbytes; + + unsigned char rm_irqbase[2]; +}; +typedef struct vmx_assist_context vmx_assist_context_t; + +int v3_vmxassist_ctx_switch(struct guest_info * info); + +#endif /* __ASSEMBLY__ */ + +#endif /* _VMX_ASSIST_H_ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/palacios/include/palacios/vmx_ctrl_regs.h b/palacios/include/palacios/vmx_ctrl_regs.h new file mode 100644 index 0000000..f35e878 --- /dev/null +++ b/palacios/include/palacios/vmx_ctrl_regs.h @@ -0,0 +1,5 @@ + +#include +#include + +int v3_vmx_handle_cr0_write(struct guest_info * info, v3_reg_t new_val); diff --git a/palacios/include/palacios/vmx_handler.h b/palacios/include/palacios/vmx_handler.h index 7525edb..1507e4a 100644 --- a/palacios/include/palacios/vmx_handler.h +++ b/palacios/include/palacios/vmx_handler.h @@ -85,7 +85,7 @@ struct vmexit_io_qual { uint32_t access_size : 3; // (0: 1 Byte ;; 1: 2 Bytes ;; 3: 4 Bytes) uint32_t dir : 1; // (0: Out ;; 1: In) uint32_t string : 1; // (0: not string ;; 1: string) - uint32_t REP : 1; // (0: not REP ;; 1: REP) + uint32_t rep : 1; // (0: not REP ;; 1: REP) uint32_t op_enc : 1; // (0: DX ;; 1: immediate) uint32_t rsvd : 9; // Set to 0 uint32_t port : 16; // IO Port Number diff --git a/palacios/include/palacios/vmx_io.h b/palacios/include/palacios/vmx_io.h index 2530a7f..3c2ebbd 100644 --- a/palacios/include/palacios/vmx_io.h +++ b/palacios/include/palacios/vmx_io.h @@ -1,3 +1,25 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ +#ifndef __VMX_IO_H__ +#define __VMX_IO_H__ + +#ifdef __V3VEE__ #include @@ -8,3 +30,6 @@ int v3_handle_vmx_io_ins(struct guest_info * info); int v3_handle_vmx_io_out(struct guest_info * info); int v3_handle_vmx_io_outs(struct guest_info * info); + +#endif +#endif diff --git a/palacios/include/palacios/vmx_lowlevel.h b/palacios/include/palacios/vmx_lowlevel.h index b654414..81872e0 100644 --- a/palacios/include/palacios/vmx_lowlevel.h +++ b/palacios/include/palacios/vmx_lowlevel.h @@ -22,6 +22,7 @@ #ifdef __V3VEE__ +#include #define VMX_SUCCESS 0 #define VMX_FAIL_INVALID 1 @@ -134,10 +135,10 @@ static inline int vmcs_read(vmcs_field_t vmcs_field, void * dst) { __asm__ __volatile__ ( VMREAD_OPCODE EAX_ECX_MODRM - "seteb %0;" // fail valid - "setnaeb %1;" // fail invalid - : "=q"(ret_valid), "=q"(ret_invalid), "=c"(val) // Use ECX - : "a" (vmcs_field), "0"(ret_valid), "1"(ret_invalid) + "seteb %1;" // fail valid + "setnaeb %2;" // fail invalid + : "=&c"(val), "=q"(ret_valid), "=q"(ret_invalid) // Use ECX + : "a" (vmcs_field), "1"(ret_valid), "2"(ret_invalid) : "memory" ); diff --git a/palacios/include/palacios/vmx_msr.h b/palacios/include/palacios/vmx_msr.h index 4b89491..ad1d7a9 100644 --- a/palacios/include/palacios/vmx_msr.h +++ b/palacios/include/palacios/vmx_msr.h @@ -1,4 +1,30 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2009, Andy Gocke + * Copyright (c) 2009, The V3VEE Project + * All rights reserved. + * + * Author: Andy Gocke + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#ifndef __VMX_MSR_H__ +#define __VMX_MSR_H__ + +#ifdef __V3VEE__ #include int v3_init_vmx_msr_map(struct guest_info * info); + +#endif +#endif diff --git a/palacios/src/devices/ide.c b/palacios/src/devices/ide.c index 3138c64..e11944f 100644 --- a/palacios/src/devices/ide.c +++ b/palacios/src/devices/ide.c @@ -1157,7 +1157,7 @@ static int ide_read_data_port(ushort_t port, void * dst, uint_t length, struct v struct ide_channel * channel = get_selected_channel(ide, port); struct ide_drive * drive = get_selected_drive(channel); - // PrintDebug("IDE: Reading Data Port %x (len=%d)\n", port, length); + PrintDebug("IDE: Reading Data Port %x (len=%d)\n", port, length); if ((channel->cmd_reg == 0xec) || (channel->cmd_reg == 0xa1)) { diff --git a/palacios/src/devices/ram_cd.c b/palacios/src/devices/ram_cd.c index 87670c7..4a3baac 100644 --- a/palacios/src/devices/ram_cd.c +++ b/palacios/src/devices/ram_cd.c @@ -93,7 +93,7 @@ static int cd_init(struct guest_info * vm, void * cfg_data) { cd = (struct cd_state *)V3_Malloc(sizeof(struct cd_state)); - PrintDebug("Registering Ram CD at %p (size=%d)\n", (void *)ramdisk, size); + PrintDebug("Registering Ram CD at %p (size=%d)\n", (void *)cfg->ramdisk, cfg->size); cd->disk_image = cfg->ramdisk; diff --git a/palacios/src/devices/ram_hd.c b/palacios/src/devices/ram_hd.c index 6a5730c..c90e5be 100644 --- a/palacios/src/devices/ram_hd.c +++ b/palacios/src/devices/ram_hd.c @@ -108,7 +108,7 @@ static int hd_init(struct guest_info * vm, void * cfg_data) { hd = (struct hd_state *)V3_Malloc(sizeof(struct hd_state)); - PrintDebug("Registering Ram HDD at %p (size=%d)\n", (void *)ramdisk, size); + PrintDebug("Registering Ram HDD at %p (size=%d)\n", (void *)cfg->ramdisk, cfg->size); hd->disk_image = cfg->ramdisk; hd->capacity = cfg->size; diff --git a/palacios/src/palacios/Makefile b/palacios/src/palacios/Makefile index 87ee4d8..fb2623e 100644 --- a/palacios/src/palacios/Makefile +++ b/palacios/src/palacios/Makefile @@ -46,7 +46,9 @@ obj-$(CONFIG_VMX) += vmx.o \ vmx_io.o \ vmx_lowlevel.o \ vmx_msr.o \ - vmcs.o + vmcs.o \ + vmx_ctrl_regs.o \ + vmx_assist.o diff --git a/palacios/src/palacios/vmx.c b/palacios/src/palacios/vmx.c index 632daab..226839c 100644 --- a/palacios/src/palacios/vmx.c +++ b/palacios/src/palacios/vmx.c @@ -21,7 +21,6 @@ #include -#include #include #include #include @@ -355,48 +354,6 @@ static addr_t allocate_vmcs() return (addr_t)V3_PAddr((void *)vmcs_page); } -#if 0 - -#endif - -#if 0 -static int init_vmcs_bios(struct guest_info * vm_info) -{ -#if 0 - - setup_v8086_mode_for_boot(vm_info); - - - // Setup guest state - // TODO: This is not 32-bit safe! - vmx_ret |= check_vmcs_write(VMCS_GUEST_RIP, vm_info->rip); - vmx_ret |= check_vmcs_write(VMCS_GUEST_RSP, vm_info->vm_regs.rsp); - - - vmx_ret |= check_vmcs_write(VMCS_GUEST_CR0, vm_info->ctrl_regs.cr0); - vmx_ret |= check_vmcs_write(VMCS_GUEST_CR4, vm_info->ctrl_regs.cr4); - - vmx_ret |= vmcs_write_guest_segments(vm_info); - - vmx_ret |= check_vmcs_write(VMCS_GUEST_RFLAGS, vm_info->ctrl_regs.rflags); -#define DEBUGCTL_MSR 0x1d9 - - v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo)); - vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value); - - vmx_ret |= check_vmcs_write(VMCS_GUEST_DR7, 0x400); - - vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, 0xffffffffffffffff); - - if (vmx_ret != 0) { - PrintError("Could not initialize VMCS segments\n"); - return -1; - } - -#endif - return 0; -} -#endif static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config_ptr) { v3_pre_config_guest(info, config_ptr); @@ -492,6 +449,10 @@ static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config /********** Setup and VMX Control Fields from MSR ***********/ + /* Setup IO map */ + (void) v3_init_vmx_io_map(info); + (void) v3_init_vmx_msr_map(info); + struct v3_msr tmp_msr; v3_get_msr(VMX_PINBASED_CTLS_MSR,&(tmp_msr.hi),&(tmp_msr.lo)); @@ -499,7 +460,15 @@ static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config vmx_data->pinbased_ctrls = tmp_msr.lo | NMI_EXIT; v3_get_msr(VMX_PROCBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo)); - vmx_data->pri_procbased_ctrls = tmp_msr.lo; + + PrintDebug("MSR High: 0x%x\n", tmp_msr.hi); + vmx_data->pri_procbased_ctrls = tmp_msr.lo | USE_IO_BITMAPS ; + + vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->io_map.arch_data)); + vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, + (addr_t)V3_PAddr(info->io_map.arch_data) + PAGE_SIZE_4KB); + + vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(info->msr_map.arch_data)); v3_get_msr(VMX_EXIT_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo)); vmx_data->exit_ctrls = tmp_msr.lo ; @@ -509,6 +478,7 @@ static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config struct vmx_exception_bitmap excp_bmap; excp_bmap.value = 0xffffffff; + excp_bmap.gp = 0; vmx_ret |= check_vmcs_write(VMCS_EXCP_BITMAP, excp_bmap.value); @@ -552,10 +522,9 @@ static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config // vmx_data->pinbased_ctrls |= NMI_EXIT; - /* Add unconditional I/O and CR exits */ - vmx_data->pri_procbased_ctrls |= UNCOND_IO_EXIT - | CR3_LOAD_EXIT - | CR3_STORE_EXIT; + /* Add CR exits */ + vmx_data->pri_procbased_ctrls |= CR3_LOAD_EXIT + | CR3_STORE_EXIT; vmx_data->exit_ctrls |= HOST_ADDR_SPACE_SIZE; } @@ -589,10 +558,7 @@ static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config info->segments.ldtr.present = 1; info->segments.ldtr.granularity = 0; - /* Setup IO map */ - (void) v3_init_vmx_io_map(info); - (void) v3_init_vmx_msr_map(info); - + /************* Map in GDT and vmxassist *************/ uint64_t gdt[] __attribute__ ((aligned(32))) = { @@ -671,7 +637,7 @@ static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config v3_print_vmcs(); - vmx_data->state = VMXASSIST_STARTUP; + vmx_data->state = VMXASSIST_DISABLED; v3_post_config_guest(info, config_ptr); diff --git a/palacios/src/palacios/vmx_assist.c b/palacios/src/palacios/vmx_assist.c new file mode 100644 index 0000000..8a12fe7 --- /dev/null +++ b/palacios/src/palacios/vmx_assist.c @@ -0,0 +1,217 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Andy Gocke + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Andy Gocke + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#include +#include +#include +#include + +static int vmx_save_world_ctx(struct guest_info * info, struct vmx_assist_context * ctx); +static int vmx_restore_world_ctx(struct guest_info * info, struct vmx_assist_context * ctx); + +int v3_vmxassist_ctx_switch(struct guest_info * info) { + uint32_t vmx_magic = 0; // Magic number to check for vmxassist + struct vmx_assist_context * old_ctx = NULL; + struct vmx_assist_context * new_ctx = NULL; + uint32_t old_ctx_gpa = 0; + uint32_t new_ctx_gpa = 0; + vmx_state_t state = ((struct vmx_data *)info->vmm_data)->state; + + /* Check validity of VMXASSIST_MAGIC field */ + if (read_guest_pa_memory(info, VMXASSIST_MAGIC_OFFSET, sizeof(vmx_magic), (uint8_t *)&vmx_magic) != sizeof(vmx_magic)) { + PrintError("Could not read guest VMXASSIST magic field\n"); + return -1; + } + + if (vmx_magic != VMXASSIST_MAGIC) { + PrintError("VMXASSIT_MAGIC field is invalid\n"); + return -1; + } + + + /* Retrieve the pointer to the Old Context struct */ + if (read_guest_pa_memory(info, VMXASSIST_OLD_CONTEXT, sizeof(old_ctx_gpa), (uint8_t *)&old_ctx_gpa) != sizeof(old_ctx_gpa)) { + PrintError("Could not read Old Context pointer field\n"); + return -1; + } + + guest_pa_to_host_va(info, (addr_t)old_ctx_gpa, (addr_t *)&(old_ctx)); + + + /* Retrieve the pointer to the New Context struct */ + if (read_guest_pa_memory(info, VMXASSIST_NEW_CONTEXT, sizeof(new_ctx_gpa), (uint8_t *)&new_ctx_gpa) != sizeof(new_ctx_gpa)) { + PrintError("Could not read New Context pointer field\n"); + return -1; + } + + guest_pa_to_host_va(info, (addr_t)new_ctx_gpa, (addr_t *)&(new_ctx)); + + + + if (state == VMXASSIST_DISABLED) { + + /* Save the old Context */ + if (vmx_save_world_ctx(info, old_ctx) != 0) { + PrintError("Could not save VMXASSIST world context\n"); + return -1; + } + + /* restore new context, vmxassist should launch the bios the first time */ + if (vmx_restore_world_ctx(info, new_ctx) != 0) { + PrintError("VMXASSIST could not restore new context\n"); + return -1; + } + + } else if (state == VMXASSIST_ENABLED) { + /* restore old context */ + if (vmx_restore_world_ctx(info, old_ctx) != 0) { + PrintError("VMXASSIST could not restore old context\n"); + return -1; + } + } + + return 0; +} + + +int vmx_save_world_ctx(struct guest_info * info, struct vmx_assist_context * ctx) { + int error = 0; + + PrintDebug("Writing from RIP: 0x%p\n", (void *)info->rip); + + error |= vmcs_read(VMCS_GUEST_RIP, &(ctx->eip)); + error |= vmcs_read(VMCS_GUEST_RSP, &(ctx->esp)); + error |= vmcs_read(VMCS_GUEST_RFLAGS, &(ctx->eflags)); + + error |= vmcs_read(VMCS_CR0_READ_SHDW, &(ctx->cr0)); + ctx->cr3 = info->shdw_pg_state.guest_cr3; + error |= vmcs_read(VMCS_CR4_READ_SHDW, &(ctx->cr4)); + + error |= vmcs_read(VMCS_GUEST_IDTR_LIMIT, &(ctx->idtr_limit)); + error |= vmcs_read(VMCS_GUEST_IDTR_BASE, &(ctx->idtr_base)); + + error |= vmcs_read(VMCS_GUEST_GDTR_LIMIT, &(ctx->gdtr_limit)); + error |= vmcs_read(VMCS_GUEST_GDTR_BASE, &(ctx->gdtr_base)); + + error |= vmcs_read(VMCS_GUEST_CS_SELECTOR, &(ctx->cs_sel)); + error |= vmcs_read(VMCS_GUEST_CS_LIMIT, &(ctx->cs_limit)); + error |= vmcs_read(VMCS_GUEST_CS_BASE, &(ctx->cs_base)); + error |= vmcs_read(VMCS_GUEST_CS_ACCESS, &(ctx->cs_arbytes.bytes)); + + error |= vmcs_read(VMCS_GUEST_DS_SELECTOR, &(ctx->ds_sel)); + error |= vmcs_read(VMCS_GUEST_DS_LIMIT, &(ctx->ds_limit)); + error |= vmcs_read(VMCS_GUEST_DS_BASE, &(ctx->ds_base)); + error |= vmcs_read(VMCS_GUEST_DS_ACCESS, &(ctx->ds_arbytes.bytes)); + + error |= vmcs_read(VMCS_GUEST_ES_SELECTOR, &(ctx->es_sel)); + error |= vmcs_read(VMCS_GUEST_ES_LIMIT, &(ctx->es_limit)); + error |= vmcs_read(VMCS_GUEST_ES_BASE, &(ctx->es_base)); + error |= vmcs_read(VMCS_GUEST_ES_ACCESS, &(ctx->es_arbytes.bytes)); + + error |= vmcs_read(VMCS_GUEST_SS_SELECTOR, &(ctx->ss_sel)); + error |= vmcs_read(VMCS_GUEST_SS_LIMIT, &(ctx->ss_limit)); + error |= vmcs_read(VMCS_GUEST_SS_BASE, &(ctx->ss_base)); + error |= vmcs_read(VMCS_GUEST_SS_ACCESS, &(ctx->ss_arbytes.bytes)); + + error |= vmcs_read(VMCS_GUEST_FS_SELECTOR, &(ctx->fs_sel)); + error |= vmcs_read(VMCS_GUEST_FS_LIMIT, &(ctx->fs_limit)); + error |= vmcs_read(VMCS_GUEST_FS_BASE, &(ctx->fs_base)); + error |= vmcs_read(VMCS_GUEST_FS_ACCESS, &(ctx->fs_arbytes.bytes)); + + error |= vmcs_read(VMCS_GUEST_GS_SELECTOR, &(ctx->gs_sel)); + error |= vmcs_read(VMCS_GUEST_GS_LIMIT, &(ctx->gs_limit)); + error |= vmcs_read(VMCS_GUEST_GS_BASE, &(ctx->gs_base)); + error |= vmcs_read(VMCS_GUEST_GS_ACCESS, &(ctx->gs_arbytes.bytes)); + + error |= vmcs_read(VMCS_GUEST_TR_SELECTOR, &(ctx->tr_sel)); + error |= vmcs_read(VMCS_GUEST_TR_LIMIT, &(ctx->tr_limit)); + error |= vmcs_read(VMCS_GUEST_TR_BASE, &(ctx->tr_base)); + error |= vmcs_read(VMCS_GUEST_TR_ACCESS, &(ctx->tr_arbytes.bytes)); + + error |= vmcs_read(VMCS_GUEST_LDTR_SELECTOR, &(ctx->ldtr_sel)); + error |= vmcs_read(VMCS_GUEST_LDTR_LIMIT, &(ctx->ldtr_limit)); + error |= vmcs_read(VMCS_GUEST_LDTR_BASE, &(ctx->ldtr_base)); + error |= vmcs_read(VMCS_GUEST_LDTR_ACCESS, &(ctx->ldtr_arbytes.bytes)); + + return error; +} + +int vmx_restore_world_ctx(struct guest_info * info, struct vmx_assist_context * ctx) { + int error = 0; + + PrintDebug("ctx rip: %p\n", (void *)(addr_t)ctx->eip); + + error |= vmcs_write(VMCS_GUEST_RIP, ctx->eip); + error |= vmcs_write(VMCS_GUEST_RSP, ctx->esp); + error |= vmcs_write(VMCS_GUEST_RFLAGS, ctx->eflags); + + error |= vmcs_write(VMCS_CR0_READ_SHDW, ctx->cr0); + info->shdw_pg_state.guest_cr3 = ctx->cr3; + error |= vmcs_write(VMCS_CR4_READ_SHDW, ctx->cr4); + + error |= vmcs_write(VMCS_GUEST_IDTR_LIMIT, ctx->idtr_limit); + error |= vmcs_write(VMCS_GUEST_IDTR_BASE, ctx->idtr_base); + + error |= vmcs_write(VMCS_GUEST_GDTR_LIMIT, ctx->gdtr_limit); + error |= vmcs_write(VMCS_GUEST_GDTR_BASE, ctx->gdtr_base); + + error |= vmcs_write(VMCS_GUEST_CS_SELECTOR, ctx->cs_sel); + error |= vmcs_write(VMCS_GUEST_CS_LIMIT, ctx->cs_limit); + error |= vmcs_write(VMCS_GUEST_CS_BASE, ctx->cs_base); + error |= vmcs_write(VMCS_GUEST_CS_ACCESS, ctx->cs_arbytes.bytes); + + error |= vmcs_write(VMCS_GUEST_DS_SELECTOR, ctx->ds_sel); + error |= vmcs_write(VMCS_GUEST_DS_LIMIT, ctx->ds_limit); + error |= vmcs_write(VMCS_GUEST_DS_BASE, ctx->ds_base); + error |= vmcs_write(VMCS_GUEST_DS_ACCESS, ctx->ds_arbytes.bytes); + + error |= vmcs_write(VMCS_GUEST_ES_SELECTOR, ctx->es_sel); + error |= vmcs_write(VMCS_GUEST_ES_LIMIT, ctx->es_limit); + error |= vmcs_write(VMCS_GUEST_ES_BASE, ctx->es_base); + error |= vmcs_write(VMCS_GUEST_ES_ACCESS, ctx->es_arbytes.bytes); + + error |= vmcs_write(VMCS_GUEST_SS_SELECTOR, ctx->ss_sel); + error |= vmcs_write(VMCS_GUEST_SS_LIMIT, ctx->ss_limit); + error |= vmcs_write(VMCS_GUEST_SS_BASE, ctx->ss_base); + error |= vmcs_write(VMCS_GUEST_SS_ACCESS, ctx->ss_arbytes.bytes); + + error |= vmcs_write(VMCS_GUEST_FS_SELECTOR, ctx->fs_sel); + error |= vmcs_write(VMCS_GUEST_FS_LIMIT, ctx->fs_limit); + error |= vmcs_write(VMCS_GUEST_FS_BASE, ctx->fs_base); + error |= vmcs_write(VMCS_GUEST_FS_ACCESS, ctx->fs_arbytes.bytes); + + error |= vmcs_write(VMCS_GUEST_GS_SELECTOR, ctx->gs_sel); + error |= vmcs_write(VMCS_GUEST_GS_LIMIT, ctx->gs_limit); + error |= vmcs_write(VMCS_GUEST_GS_BASE, ctx->gs_base); + error |= vmcs_write(VMCS_GUEST_GS_ACCESS, ctx->gs_arbytes.bytes); + + error |= vmcs_write(VMCS_GUEST_TR_SELECTOR, ctx->tr_sel); + error |= vmcs_write(VMCS_GUEST_TR_LIMIT, ctx->tr_limit); + error |= vmcs_write(VMCS_GUEST_TR_BASE, ctx->tr_base); + error |= vmcs_write(VMCS_GUEST_TR_ACCESS, ctx->tr_arbytes.bytes); + + error |= vmcs_write(VMCS_GUEST_LDTR_SELECTOR, ctx->ldtr_sel); + error |= vmcs_write(VMCS_GUEST_LDTR_LIMIT, ctx->ldtr_limit); + error |= vmcs_write(VMCS_GUEST_LDTR_BASE, ctx->ldtr_base); + error |= vmcs_write(VMCS_GUEST_LDTR_ACCESS, ctx->ldtr_arbytes.bytes); + + return error; +} + + diff --git a/palacios/src/palacios/vmx_ctrl_regs.c b/palacios/src/palacios/vmx_ctrl_regs.c new file mode 100644 index 0000000..10503b4 --- /dev/null +++ b/palacios/src/palacios/vmx_ctrl_regs.c @@ -0,0 +1,65 @@ + +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Andy Gocke + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Andy Gocke + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#include +#include +#include +#include +#include +#include + +static int handle_mov_to_cr0(struct guest_info * info, v3_reg_t new_val); + +int v3_vmx_handle_cr0_write(struct guest_info * info, v3_reg_t new_val) { + return handle_mov_to_cr0(info, new_val); +} + +static int handle_mov_to_cr0(struct guest_info * info, v3_reg_t new_val) { + PrintDebug("CR0 RIP: %p\n", (void *)info->rip); + + struct cr0_32 * guest_cr0 = (struct cr0_32 *)&(info->ctrl_regs.cr0); + struct cr0_32 * new_cr0 = (struct cr0_32 *)&new_val; + struct cr0_32 * shadow_cr0 = (struct cr0_32 *)&(info->shdw_pg_state.guest_cr0); + + // PG and PE are always enabled for VMX + + // Check if this is a paging transition + PrintDebug("Old CR0: 0x%x\n", *(uint32_t *)guest_cr0); + PrintDebug("Old shadow CR0: 0x%x\n", *(uint32_t *)shadow_cr0); + PrintDebug("New CR0: 0x%x\n", *(uint32_t *)new_cr0); + + if ( new_cr0->pe ) { + + if (v3_vmxassist_ctx_switch(info) != 0) { + PrintError("Unable to execute VMXASSIST context switch!\n"); + return -1; + } + + ((struct vmx_data *)info->vmm_data)->state = VMXASSIST_DISABLED; + + PrintDebug("New Shadow: 0x%x\n", *(uint32_t *)shadow_cr0); + PrintDebug("mem_mode: %s\n", v3_mem_mode_to_str(v3_get_vm_mem_mode(info))); + + return 0; + } + + return -1; +} + diff --git a/palacios/src/palacios/vmx_handler.c b/palacios/src/palacios/vmx_handler.c index d6eebdc..f872aee 100644 --- a/palacios/src/palacios/vmx_handler.c +++ b/palacios/src/palacios/vmx_handler.c @@ -25,37 +25,37 @@ #include #include #include +#include +#include +#include static int inline check_vmcs_write(vmcs_field_t field, addr_t val) { int ret = 0; - ret = vmcs_write(field,val); + ret = vmcs_write(field, val); if (ret != VMX_SUCCESS) { PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret); - return 1; } - return 0; + return ret; } static int inline check_vmcs_read(vmcs_field_t field, void * val) { int ret = 0; - ret = vmcs_read(field,val); + ret = vmcs_read(field, val); - if(ret != VMX_SUCCESS) { + if (ret != VMX_SUCCESS) { PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret); - return ret; } - return 0; + return ret; } static void inline translate_access_to_v3_seg(struct vmcs_segment_access * access, - struct v3_segment * v3_seg) -{ + struct v3_segment * v3_seg) { v3_seg->type = access->type; v3_seg->system = access->desc_type; v3_seg->dpl = access->dpl; @@ -66,15 +66,13 @@ static void inline translate_access_to_v3_seg(struct vmcs_segment_access * acces v3_seg->granularity = access->granularity; } -static void load_vmcs_guest_state(struct guest_info * info) +static int load_vmcs_guest_state(struct guest_info * info) { - check_vmcs_read(VMCS_GUEST_RIP, &(info->rip)); - check_vmcs_read(VMCS_GUEST_RSP, &(info->vm_regs.rsp)); - check_vmcs_read(VMCS_GUEST_CR0, &(info->ctrl_regs.cr0)); - check_vmcs_read(VMCS_GUEST_CR3, &(info->ctrl_regs.cr3)); - check_vmcs_read(VMCS_GUEST_CR4, &(info->ctrl_regs.cr4)); struct vmcs_segment_access access; + int ret = 0; + + // JRL: Add error checking memset(&access, 0, sizeof(access)); @@ -150,9 +148,23 @@ static void load_vmcs_guest_state(struct guest_info * info) /* IDTR Segment */ check_vmcs_read(VMCS_GUEST_IDTR_BASE, &(info->segments.idtr.base)); check_vmcs_read(VMCS_GUEST_IDTR_LIMIT, &(info->segments.idtr.limit)); + + + /* + * Read the control state + */ + check_vmcs_read(VMCS_GUEST_RIP, &(info->rip)); + check_vmcs_read(VMCS_GUEST_RSP, &(info->vm_regs.rsp)); + check_vmcs_read(VMCS_GUEST_CR0, &(info->ctrl_regs.cr0)); + check_vmcs_read(VMCS_CR0_READ_SHDW, &(info->shdw_pg_state.guest_cr0)); + check_vmcs_read(VMCS_GUEST_CR3, &(info->ctrl_regs.cr3)); + check_vmcs_read(VMCS_GUEST_CR4, &(info->ctrl_regs.cr4)); + + return ret; } +#if 0 static void setup_v8086_mode_for_boot(struct guest_info * info) { @@ -163,7 +175,6 @@ static void setup_v8086_mode_for_boot(struct guest_info * info) flags->iopl = 3; info->rip = 0xfff0; - //info->vm_regs.rsp = 0x0; /* Zero the segment registers */ memset(&(info->segments), 0, sizeof(struct v3_segment)*6); @@ -193,32 +204,19 @@ static void setup_v8086_mode_for_boot(struct guest_info * info) seg_ptr[i].granularity = 0; } - PrintDebug("END INFO!\n"); -#if 0 - for(i = 6; i < 10; i++) { - seg_ptr[i].base = 0x0; - seg_ptr[i].limit = 0xffff; - } - - info->segments.ldtr.type = 2; - info->segments.ldtr.system = 0; - info->segments.ldtr.present = 1; - info->segments.ldtr.granularity = 0; - - info->segments.tr.type = 3; - info->segments.tr.system = 0; - info->segments.tr.present = 1; - info->segments.tr.granularity = 0; -#endif } -static int inline handle_cr_access(struct guest_info * info, ulong_t exit_qual) -{ +#endif + +static int inline handle_cr_access(struct guest_info * info, ulong_t exit_qual) { struct vmexit_cr_qual * cr_qual = (struct vmexit_cr_qual *)&exit_qual; - if(cr_qual->access_type < 2) { - ulong_t reg = 0; - switch(cr_qual->gpr) { + PrintDebug("Control register: %d\n", cr_qual->access_type); + + if (cr_qual->access_type < 2) { + v3_reg_t reg = 0; + + switch(cr_qual->gpr) { case 0: reg = info->vm_regs.rax; break; @@ -268,97 +266,138 @@ static int inline handle_cr_access(struct guest_info * info, ulong_t exit_qual) reg = info->vm_regs.r15; break; } - PrintDebug("RAX: %p\n", (void *)info->vm_regs.rax); - - if(cr_qual->cr_id == 0 - && (~reg & CR0_PE) - && ((struct vmx_data*)info->vmm_data)->state == VMXASSIST_STARTUP) { - setup_v8086_mode_for_boot(info); - info->shdw_pg_state.guest_cr0 = 0x0; - v3_update_vmcs_guest_state(info); + + if (cr_qual->cr_id == 0) { + uint32_t instr_len; + + vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len); + + if ( ~reg & CR0_PE ) { + + if (v3_vmxassist_ctx_switch(info) != 0) { + PrintError("Unable to execute VMXASSIST context switch!\n"); + return -1; + } + + load_vmcs_guest_state(info); + + ((struct vmx_data *)info->vmm_data)->state = VMXASSIST_ENABLED; + + PrintDebug("Loading vmxassist at RIP: 0x%p\n", (void *)info->rip); + return 0; + } else if (v3_vmx_handle_cr0_write(info, reg) != 0) { + PrintError("Could not handle CR0 Write\n"); + return -1; + } + + load_vmcs_guest_state(info); + + PrintDebug("Leaving VMXASSIST and entering protected mode at RIP: 0x%p\n", (void *)info->rip); + return 0; } } + PrintError("Unhandled CR access\n"); return -1; } -int v3_handle_vmx_exit(struct v3_gprs * gprs, struct guest_info * info) -{ +/* At this point the GPRs are already copied into the guest_info state */ +int v3_handle_vmx_exit(struct v3_gprs * gprs, struct guest_info * info) { uint32_t exit_reason; ulong_t exit_qual; check_vmcs_read(VMCS_EXIT_REASON, &exit_reason); check_vmcs_read(VMCS_EXIT_QUAL, &exit_qual); - PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_reason, exit_qual); + // PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_reason, exit_qual); /* Update guest state */ load_vmcs_guest_state(info); - switch(exit_reason) - { - case VMEXIT_INFO_EXCEPTION_OR_NMI: - { - uint32_t int_info; - pf_error_t error_code; - check_vmcs_read(VMCS_EXIT_INT_INFO, &int_info); - check_vmcs_read(VMCS_EXIT_INT_ERR, &error_code); - - if((uint8_t)int_info == 0x0e) { - PrintDebug("Page Fault at %p\n", (void*)exit_qual); - if(info->shdw_pg_mode == SHADOW_PAGING) { - if(v3_handle_shadow_pagefault(info, (addr_t)exit_qual, error_code) == -1) { - return -1; - } - } else { - PrintError("Page fault in unimplemented paging mode\n"); - return -1; - } - } else { - PrintDebug("Unknown exception: 0x%x\n", (uint8_t)int_info); - v3_print_GPRs(info); - return -1; - } - break; - } - - case VMEXIT_IO_INSTR: - { - struct vmexit_io_qual * io_qual = (struct vmexit_io_qual *)&exit_qual; - - if(io_qual->dir == 0) { - if(io_qual->string) { - if(v3_handle_vmx_io_outs(info) == -1) { - return -1; - } - } else { - if(v3_handle_vmx_io_out(info) == -1) { - return -1; - } - } - } else { - if(io_qual->string) { - if(v3_handle_vmx_io_ins(info) == -1) { - return -1; - } - } else { - if(v3_handle_vmx_io_in(info) == -1) { - return -1; - } - } - } - break; - } - + switch (exit_reason) { + case VMEXIT_INFO_EXCEPTION_OR_NMI: { + uint32_t int_info; + pf_error_t error_code; + + check_vmcs_read(VMCS_EXIT_INT_INFO, &int_info); + check_vmcs_read(VMCS_EXIT_INT_ERR, &error_code); + + // JRL: Change "0x0e" to a macro value + if ((uint8_t)int_info == 0x0e) { + PrintDebug("Page Fault at %p\n", (void *)exit_qual); + + if (info->shdw_pg_mode == SHADOW_PAGING) { + if (v3_handle_shadow_pagefault(info, (addr_t)exit_qual, error_code) == -1) { + PrintError("Error handling shadow page fault\n"); + return -1; + } + } else { + PrintError("Page fault in unimplemented paging mode\n"); + return -1; + } + } else { + PrintDebug("Unknown exception: 0x%x\n", (uint8_t)int_info); + v3_print_GPRs(info); + return -1; + } + break; + } + + case VMEXIT_CPUID: { + int instr_len; + + v3_cpuid(info->vm_regs.rax, (addr_t *)&(info->vm_regs.rax), (addr_t *)&(info->vm_regs.rbx), + (addr_t *)&(info->vm_regs.rcx), (addr_t *)&(info->vm_regs.rdx)); + + check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len); + + info->rip += instr_len; + break; + } + + case VMEXIT_IO_INSTR: { + struct vmexit_io_qual * io_qual = (struct vmexit_io_qual *)&exit_qual; + + if (io_qual->dir == 0) { + if (io_qual->string) { + if (v3_handle_vmx_io_outs(info) == -1) { + PrintError("Error in outs IO handler\n"); + return -1; + } + } else { + if (v3_handle_vmx_io_out(info) == -1) { + PrintError("Error in out IO handler\n"); + return -1; + } + } + } else { + if (io_qual->string) { + if(v3_handle_vmx_io_ins(info) == -1) { + PrintError("Error in ins IO handler\n"); + return -1; + } + } else { + if (v3_handle_vmx_io_in(info) == -1) { + PrintError("Error in in IO handler\n"); + return -1; + } + } + } + break; + } + case VMEXIT_CR_REG_ACCESSES: - if(handle_cr_access(info,exit_qual) != 0) + if (handle_cr_access(info,exit_qual) != 0) { + PrintError("Error handling CR access\n"); return -1; + } + break; default: - PrintError("Unhandled VMEXIT\n"); + PrintError("Unhandled VMEXIT: %u (0x%x), %lu (0x%lx)\n", exit_reason, exit_reason, exit_qual, exit_qual); return -1; } diff --git a/palacios/src/palacios/vmx_io.c b/palacios/src/palacios/vmx_io.c index 2f2596d..0f841e4 100644 --- a/palacios/src/palacios/vmx_io.c +++ b/palacios/src/palacios/vmx_io.c @@ -1,3 +1,21 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Andy Gocke + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Andy Gocke + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ #include #include @@ -5,6 +23,15 @@ #include #include #include +#include +#include +#include + +#ifndef CONFIG_DEBUG_IO +#undef PrintDebug +#define PrintDebug(fmt, args...) +#endif + /* Same as SVM */ static int update_map(struct guest_info * info, uint16_t port, int hook_read, int hook_write) @@ -32,34 +59,34 @@ int v3_init_vmx_io_map(struct guest_info * info) return 0; } -int v3_handle_vmx_io_in(struct guest_info * info) -{ +int v3_handle_vmx_io_in(struct guest_info * info) { ulong_t exit_qual; + uint32_t instr_length = 0; vmcs_read(VMCS_EXIT_QUAL, &exit_qual); struct vmexit_io_qual * io_qual = (struct vmexit_io_qual *)&exit_qual; - struct v3_io_hook * hook = v3_get_io_hook(info,io_qual->port); + struct v3_io_hook * hook = v3_get_io_hook(info, io_qual->port); int read_size = 0; - if(hook == NULL) { + if (hook == NULL) { PrintError("Hook not present for IN on port %x\n", io_qual->port); return -1; } - read_size = 1<<(io_qual->access_size); + read_size = io_qual->access_size + 1; PrintDebug("IN of %d bytes on port %d (0x%x)\n", read_size, io_qual->port, io_qual->port); - if(hook->read(io_qual->port, &(info->vm_regs.rax), read_size, hook->priv_data) != read_size) { + if (hook->read(io_qual->port, &(info->vm_regs.rax), read_size, hook->priv_data) != read_size) { PrintError("Read failure for IN on port %x\n", io_qual->port); return -1; } - uint32_t instr_length = 0; - if(vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_length) != VMX_SUCCESS) { + + if (vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_length) != VMX_SUCCESS) { PrintError("Could not read instruction length\n"); return -1; } @@ -71,12 +98,74 @@ int v3_handle_vmx_io_in(struct guest_info * info) int v3_handle_vmx_io_ins(struct guest_info * info) { - PrintDebug("INS not implemented\n"); - return -1; + ulong_t exit_qual; + + vmcs_read(VMCS_EXIT_QUAL, &exit_qual); + + struct vmexit_io_qual * io_qual = (struct vmexit_io_qual *)&exit_qual; + struct v3_io_hook * hook = v3_get_io_hook(info, io_qual->port); + int read_size; + addr_t guest_va; + addr_t host_addr; + int rdi_change; + ulong_t rep_num = 1; + + if(hook == NULL) { + PrintError("Hook not present for INS on port 0x%x\n", io_qual->port); + return -1; + } + + PrintDebug("INS on port 0x%x\n", io_qual->port); + + read_size = io_qual->access_size + 1; + + if (io_qual->rep) { + rep_num = info->vm_regs.rcx & get_gpr_mask(info); + } + + if ( ((struct rflags *)&(info->ctrl_regs.rflags))->df ) { + rdi_change = -read_size; + } else { + rdi_change = read_size; + } + + PrintDebug("INS size=%d for %ld steps\n", read_size, rep_num); + + vmcs_read(VMCS_GUEST_LINEAR_ADDR, &guest_va); + + if (guest_va_to_host_va(info, guest_va, &host_addr) == -1) { + PrintError("Could not convert Guest VA to host VA\n"); + return -1; + } + + do { + if (hook->read(io_qual->port, (char *)host_addr, read_size, hook->priv_data) != read_size) { + PrintError("Read Failure for INS on port 0x%x\n", io_qual->port); + return -1; + } + + host_addr += rdi_change; + info->vm_regs.rdi += rdi_change; + + if (io_qual->rep) { + --info->vm_regs.rcx; + } + --rep_num; + + } while (rep_num > 0); + + int instr_len = 0; + + vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len); + + info->rip += instr_len; + + return 0; } -int v3_handle_vmx_io_out(struct guest_info * info) -{ + + +int v3_handle_vmx_io_out(struct guest_info * info) { ulong_t exit_qual; vmcs_read(VMCS_EXIT_QUAL, &exit_qual); @@ -85,24 +174,24 @@ int v3_handle_vmx_io_out(struct guest_info * info) struct v3_io_hook * hook = v3_get_io_hook(info, io_qual->port); - if(hook == NULL) { + if (hook == NULL) { PrintError("Hook not present for out on port %x\n", io_qual->port); return -1; } - int write_size = 1<<(io_qual->access_size); + int write_size = io_qual->access_size + 1; PrintDebug("OUT of %d bytes on port %d (0x%x)\n", write_size, io_qual->port, io_qual->port); - if(hook->write(io_qual->port, &(info->vm_regs.rax), write_size, hook->priv_data) != write_size) { + if (hook->write(io_qual->port, &(info->vm_regs.rax), write_size, hook->priv_data) != write_size) { PrintError("Write failure for out on port %x\n",io_qual->port); return -1; } uint32_t instr_length = 0; - if(vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_length) != VMX_SUCCESS) { + if (vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_length) != VMX_SUCCESS) { PrintError("Could not read instruction length\n"); return -1; } @@ -112,14 +201,72 @@ int v3_handle_vmx_io_out(struct guest_info * info) return 0; } -int v3_handle_vmx_io_outs(struct guest_info * info) -{ + + +int v3_handle_vmx_io_outs(struct guest_info * info) { ulong_t exit_qual; vmcs_read(VMCS_EXIT_QUAL, &exit_qual); struct vmexit_io_qual * io_qual = (struct vmexit_io_qual *)&exit_qual; + struct v3_io_hook * hook = v3_get_io_hook(info, io_qual->port); + int write_size; + addr_t guest_va; + addr_t host_addr; + int rsi_change; + ulong_t rep_num = 1; + + if (hook == NULL) { + PrintError("Hook not present for OUTS on port 0x%x\n", io_qual->port); + return -1; + } + + PrintDebug("OUTS on port 0x%x\n", io_qual->port); + + write_size = io_qual->access_size + 1; + + if (io_qual->rep) { + // Grab the address sized bits of rcx + rep_num = info->vm_regs.rcx & get_gpr_mask(info); + } - PrintDebug("OUTS on port %d, (0x%x)\n", io_qual->port, io_qual->port); - return -1; + if ( ((struct rflags *)&(info->ctrl_regs.rflags))->df ) { + rsi_change = -write_size; + } else { + rsi_change = write_size; + } + + vmcs_read(VMCS_GUEST_LINEAR_ADDR, &guest_va); + + PrintDebug("OUTS size=%d for %ld steps\n", write_size, rep_num); + + if (guest_va_to_host_va(info, guest_va, &host_addr) == -1) { + PrintError("Could not convert guest VA to host VA\n"); + return -1; + } + + do { + if (hook->write(io_qual->port, (char *)host_addr, write_size, hook->priv_data) != write_size) { + PrintError("Read failure for INS on port 0x%x\n", io_qual->port); + return -1; + } + + host_addr += rsi_change; + info->vm_regs.rsi += rsi_change; + + if (io_qual->rep) { + --info->vm_regs.rcx; + } + --rep_num; + + } while (rep_num > 0); + + int instr_len = 0; + + vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len); + + info->rip += instr_len; + + return 0; } + diff --git a/palacios/src/palacios/vmx_msr.c b/palacios/src/palacios/vmx_msr.c index 0b46b88..fa53ffc 100644 --- a/palacios/src/palacios/vmx_msr.c +++ b/palacios/src/palacios/vmx_msr.c @@ -1,10 +1,27 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Andy Gocke + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Andy Gocke + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ #include #include /* Same as SVM */ -static int update_map(struct guest_info * info, uint_t msr, int hook_reads, int hook_writes) -{ +static int update_map(struct guest_info * info, uint_t msr, int hook_reads, int hook_writes) { #if 0 int index = get_bitmap_index(msr); @@ -29,8 +46,7 @@ static int update_map(struct guest_info * info, uint_t msr, int hook_reads, int return 0; } -int v3_init_vmx_msr_map(struct guest_info * info) -{ +int v3_init_vmx_msr_map(struct guest_info * info) { struct v3_msr_map * msr_map = &(info->msr_map); msr_map->update_map = update_map;