Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Modified boot and vmxassist to handle real/protected transition.
Andy Gocke [Fri, 21 Aug 2009 20:25:41 +0000 (15:25 -0500)]
29 files changed:
bios/vmxassist/Makefile
bios/vmxassist/e820.h
bios/vmxassist/head.S
bios/vmxassist/machine.h
bios/vmxassist/setup.c
bios/vmxassist/trap.S
bios/vmxassist/util.c
bios/vmxassist/util.h
bios/vmxassist/vm86.c
bios/vmxassist/vm86.h
bios/vmxassist/vmx_assist.h
bios/vmxassist/vmxassist.bin
palacios/include/palacios/vmx.h
palacios/include/palacios/vmx_assist.h [new file with mode: 0644]
palacios/include/palacios/vmx_ctrl_regs.h [new file with mode: 0644]
palacios/include/palacios/vmx_handler.h
palacios/include/palacios/vmx_io.h
palacios/include/palacios/vmx_lowlevel.h
palacios/include/palacios/vmx_msr.h
palacios/src/devices/ide.c
palacios/src/devices/ram_cd.c
palacios/src/devices/ram_hd.c
palacios/src/palacios/Makefile
palacios/src/palacios/vmx.c
palacios/src/palacios/vmx_assist.c [new file with mode: 0644]
palacios/src/palacios/vmx_ctrl_regs.c [new file with mode: 0644]
palacios/src/palacios/vmx_handler.c
palacios/src/palacios/vmx_io.c
palacios/src/palacios/vmx_msr.c

index 6959a28..ededccd 100644 (file)
@@ -28,9 +28,9 @@ TEXTADDR=0x000D0000
 DEFINES=-DDEBUG -DTEXTADDR=$(TEXTADDR)
 
 # Disable PIE/SSP if GCC supports them. They can break us.
-CFLAGS  += $(call test-gcc-flag,$(CC),-nopie)
-CFLAGS  += $(call test-gcc-flag,$(CC),-fno-stack-protector)
-CFLAGS  += $(call test-gcc-flag,$(CC),-fno-stack-protector-all)
+CFLAGS  += $(call cc-option,$(CC),-nopie,)
+CFLAGS  += $(call cc-option,$(CC),-fno-stack-protector,)
+CFLAGS  += $(call cc-option,$(CC),-fno-stack-protector-all,)
 
 CPP      = cpp -P
 OBJCOPY  = objcopy -p -O binary -R .note -R .comment -R .bss -S --gap-fill=0
index 8190c76..151313c 100644 (file)
@@ -1,32 +1,31 @@
-#ifndef __XEN_PUBLIC_HVM_E820_H__
-#define __XEN_PUBLIC_HVM_E820_H__
+#ifndef __HVMLOADER_E820_H__
+#define __HVMLOADER_E820_H__
 
-/* PC BIOS standard E820 types. */
+/* E820 location in HVM virtual address space. */
+#define HVM_E820_PAGE        0x00090000
+#define HVM_E820_NR_OFFSET   0x000001E8
+#define HVM_E820_OFFSET      0x000002D0
+
+#define HVM_BELOW_4G_RAM_END        0xF0000000
+#define HVM_BELOW_4G_MMIO_START     HVM_BELOW_4G_RAM_END
+#define HVM_BELOW_4G_MMIO_LENGTH    ((1ULL << 32) - HVM_BELOW_4G_MMIO_START)
+
+
+/*
+ * PC BIOS standard E820 types and structure.
+ */
 #define E820_RAM          1
 #define E820_RESERVED     2
 #define E820_ACPI         3
 #define E820_NVS          4
 
-/* Xen HVM extended E820 types. */
-#define E820_IO          16
-#define E820_SHARED_PAGE 17
-#define E820_XENSTORE    18
-#define E820_BUFFERED_IO 19
-
-/* E820 location in HVM virtual address space. */
-#define E820_MAP_PAGE        0x00090000
-#define E820_MAP_NR_OFFSET   0x000001E8
-#define E820_MAP_OFFSET      0x000002D0
-
 struct e820entry {
     uint64_t addr;
     uint64_t size;
     uint32_t type;
 } __attribute__((packed));
 
-#define HVM_BELOW_4G_RAM_END        0xF0000000
-
-#define HVM_BELOW_4G_MMIO_START     HVM_BELOW_4G_RAM_END
-#define HVM_BELOW_4G_MMIO_LENGTH    ((1ULL << 32) - HVM_BELOW_4G_MMIO_START)
+#define HVM_E820_NR ((unsigned char *)HVM_E820_PAGE + HVM_E820_NR_OFFSET)
+#define HVM_E820    ((struct e820entry *)(HVM_E820_PAGE + HVM_E820_OFFSET))
 
-#endif /* __XEN_PUBLIC_HVM_E820_H__ */
+#endif /* __HVMLOADER_E820_H__ */
index b183fac..3af285e 100644 (file)
  * switch happens to the environment below. The magic indicates
  * that this is a valid context.
  */
-#ifdef TEST
-       .byte 0x55, 0xaa
-       .byte 0x80
-       .code16
-       jmp     _start16
-#else
        jmp     _start
-#endif
 
        .align  8
        .long   VMXASSIST_MAGIC
        .long   newctx                  /* new context */
        .long   oldctx                  /* old context */
 
-#ifdef TEST
-/*
- * We are running in 16-bit. Get into the protected mode as soon as
- * possible. We use our own (minimal) GDT to get started.
- *
- * ROM is a misnomer as this code isn't really rommable (although it
- * only requires a few changes) but it does live in a BIOS ROM segment.
- * This code allows me to debug vmxassists under (a modified version of)
- * Bochs and load it as a "optromimage1".
- */
-       .code16
-       .globl  _start16
-_start16:
-        cli
-
-        /* load our own global descriptor table */
-        data32 addr32 lgdt %cs:(rom_gdtr - TEXTADDR)
-
-        /* go to protected mode */
-        movl    %cr0, %eax
-        orl     $CR0_PE, %eax
-        movl    %eax, %cr0
-        data32  ljmp $0x08, $1f
-
-        .align  32
-        .globl  rom_gdt
-rom_gdt:
-        .word   0, 0            /* 0x00: reserved */
-        .byte   0, 0, 0, 0
-
-        .word   0xFFFF, 0       /* 0x08: CS 32-bit */
-        .byte   0, 0x9A, 0xCF, 0
-
-        .word   0xFFFF, 0       /* 0x10: CS 32-bit */
-        .byte   0, 0x92, 0xCF, 0
-rom_gdt_end:
-
-        .align  4
-        .globl  rom_gdtr
-rom_gdtr:
-        .word   rom_gdt_end - rom_gdt - 1
-        .long   rom_gdt
-
-        .code32
-1:
-        /* welcome to the 32-bit world */
-        movw    $0x10, %ax
-        movw    %ax, %ds
-        movw    %ax, %es
-        movw    %ax, %ss
-        movw    %ax, %fs
-        movw    %ax, %gs
-
-        /* enable Bochs debug facilities */
-        movw    $0x8A00, %dx
-        movw    $0x8A00, %ax
-        outw    %ax, (%dx)
-
-       jmp     _start
-#endif /* TEST */
-
 /*
  * This is the real start. Control was transfered to this point
  * with CR0_PE set and executing in some 32-bit segment. We call
@@ -111,9 +43,6 @@ _start:
        cli
 
        /* save register parameters to C land */
-#ifdef TEST
-       xorl    %edx, %edx
-#endif
 
        /* clear bss */
        cld
@@ -130,7 +59,7 @@ _start:
        clts
 
        /* setup my own stack */
-       movl    $stack_top - 4*4, %esp
+       movl    $stack_top, %esp
        movl    %esp, %ebp
 
        /* go ... */
@@ -145,11 +74,6 @@ _start:
 halt:
        push    $halt_msg
        call    printf
-#ifdef TEST
-        movw    $0x8A00, %dx
-        movw    $0x8AE0, %ax
-        outw    %ax, (%dx)
-#endif
        cli
        jmp     .
 
index 0ea2adf..f91646f 100644 (file)
 #define CR4_PSE                (1 << 4)
 #define CR4_PAE                (1 << 5)
 
+#define EFLAGS_CF      (1 << 0)
+#define EFLAGS_PF      (1 << 2)
+#define EFLAGS_AF      (1 << 4)
 #define EFLAGS_ZF      (1 << 6)
+#define EFLAGS_SF      (1 << 7)
 #define EFLAGS_TF      (1 << 8)
 #define EFLAGS_IF      (1 << 9)
 #define EFLAGS_DF      (1 << 10)
+#define EFLAGS_OF      (1 << 11)
 #define EFLAGS_IOPL    (3 << 12)
 #define EFLAGS_VM      ((1 << 17) | EFLAGS_IOPL)
 #define EFLAGS_VIF     (1 << 19)
 #define        LPGSIZE         (1 << LOG_PDSIZE)       /* large page size */
 #define        LPGMASK         (~(LPGSIZE - 1))        /* large page mask */
 
-#ifdef TEST
-#define        PTE_P           (1 << 0)        /* Present */
-#define        PTE_RW          (1 << 1)        /* Read/Write */
-#define        PTE_US          (1 << 2)        /* User/Supervisor */
-#define        PTE_PS          (1 << 7)        /* Page Size */
-#endif
-
 /* Programmable Interrupt Contoller (PIC) defines */
 #define        PIC_MASTER      0x20
 #define        PIC_SLAVE       0xA0
@@ -115,7 +113,7 @@ struct tss {
 #ifdef ENABLE_VME
        unsigned long   int_redir[8];
 #endif
-       unsigned char   iomap[8192];
+       unsigned char   iomap[8193];
 };
 
 static inline void
@@ -195,14 +193,6 @@ set_cr4(unsigned value)
        __asm__ __volatile__("movl %0, %%cr4" : /* no outputs */ : "r"(value));
 }
 
-#ifdef TEST
-static inline void
-breakpoint(void)
-{
-       outw(0x8A00, 0x8AE0);
-}
-#endif /* TEST */
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* __MACHINE_H__ */
index c453ecd..1e2e86c 100644 (file)
@@ -47,29 +47,13 @@ unsigned long long idt[NR_TRAPS] __attribute__ ((aligned(32)));
 
 struct dtr idtr = { sizeof(idt)-1, (unsigned long) &idt };
 
-#ifdef TEST
-unsigned pgd[NR_PGD] __attribute__ ((aligned(PGSIZE))) = { 0 };
-
-struct e820entry e820map[] = {
-       { 0x0000000000000000ULL, 0x000000000009F800ULL, E820_RAM },
-       { 0x000000000009F800ULL, 0x0000000000000800ULL, E820_RESERVED },
-       { 0x00000000000A0000ULL, 0x0000000000020000ULL, E820_IO },
-       { 0x00000000000C0000ULL, 0x0000000000040000ULL, E820_RESERVED },
-       { 0x0000000000100000ULL, 0x0000000000000000ULL, E820_RAM },
-       { 0x0000000000000000ULL, 0x0000000000001000ULL, E820_SHARED_PAGE },
-       { 0x0000000000000000ULL, 0x0000000000003000ULL, E820_NVS },
-       { 0x0000000000003000ULL, 0x000000000000A000ULL, E820_ACPI },
-       { 0x00000000FEC00000ULL, 0x0000000001400000ULL, E820_IO },
-};
-#endif /* TEST */
-
 struct vmx_assist_context oldctx;
 struct vmx_assist_context newctx;
 
 unsigned long memory_size;
 int initialize_real_mode;
 
-extern char stack[], stack_top[];
+extern char stack_top[];
 extern unsigned trap_handlers[];
 
 void
@@ -87,39 +71,12 @@ banner(void)
                    (((get_cmos(0x31) << 8) | get_cmos(0x30)) + 0x400) << 10;
        memory_size += 0x400 << 10; /* + 1MB */
 
-#ifdef TEST
-       /* Create an SMAP for our debug environment */
-       e820map[4].size = memory_size - e820map[4].addr - PGSIZE;
-       e820map[5].addr = memory_size - PGSIZE;
-       e820map[6].addr = memory_size;
-       e820map[7].addr += memory_size;
-
-       *E820_MAP_NR = sizeof(e820map)/sizeof(e820map[0]);
-       memcpy(E820_MAP, e820map, sizeof(e820map));
-#endif
-
        printf("Memory size %ld MB\n", memory_size >> 20);
        printf("E820 map:\n");
-       print_e820_map(E820_MAP, *E820_MAP_NR);
+       print_e820_map(HVM_E820, *HVM_E820_NR);
        printf("\n");
 }
 
-#ifdef TEST
-void
-setup_paging(void)
-{
-       unsigned long i;
-
-       if (((unsigned)pgd & ~PGMASK) != 0)
-               panic("PGD not page aligned");
-       set_cr4(get_cr4() | CR4_PSE);
-       for (i = 0; i < NR_PGD; i++)
-               pgd[i] = (i * LPGSIZE)| PTE_PS | PTE_US | PTE_RW | PTE_P;
-       set_cr3((unsigned) pgd);
-       set_cr0(get_cr0() | (CR0_PE|CR0_PG));
-}
-#endif /* TEST */
-
 void
 setup_gdt(void)
 {
@@ -128,8 +85,9 @@ setup_gdt(void)
        /* setup task state segment */
        memset(&tss, 0, sizeof(tss));
        tss.ss0 = DATA_SELECTOR;
-       tss.esp0 = (unsigned) stack_top - 4*4;
+       tss.esp0 = (unsigned) stack_top;
        tss.iomap_base = offsetof(struct tss, iomap);
+       tss.iomap[sizeof(tss.iomap)-1] = 0xff;
 
        /* initialize gdt's tss selector */
        gdt[TSS_SELECTOR / sizeof(gdt[0])] |=
@@ -204,7 +162,7 @@ void
 enter_real_mode(struct regs *regs)
 {
        /* mask off TSS busy bit */
-        gdt[TSS_SELECTOR / sizeof(gdt[0])] &= ~0x0000020000000000ULL;
+       gdt[TSS_SELECTOR / sizeof(gdt[0])] &= ~0x0000020000000000ULL;
 
        /* start 8086 emulation of BIOS */
        if (initialize_real_mode) {
@@ -213,17 +171,15 @@ enter_real_mode(struct regs *regs)
                regs->ves = regs->vds = regs->vfs = regs->vgs = 0xF000;
                if (booting_cpu == 0) {
                        regs->cs = 0xF000; /* ROM BIOS POST entry point */
-#ifdef TEST
-                       regs->eip = 0xFFE0;
-#else
                        regs->eip = 0xFFF0;
-#endif
                } else {
                        regs->cs = booting_vector << 8; /* AP entry point */
                        regs->eip = 0;
                }
-               regs->uesp = 0;
-               regs->uss = 0;
+
+               regs->uesp = regs->uss = 0;
+               regs->eax = regs->ecx = regs->edx = regs->ebx = 0;
+               regs->esp = regs->ebp = regs->esi = regs->edi = 0;
 
                /* intercept accesses to the PIC */
                setiomap(PIC_MASTER+PIC_CMD);
@@ -239,14 +195,13 @@ enter_real_mode(struct regs *regs)
 
                /* this should get us into 16-bit mode */
                return;
-       } else {
-               /* go from protected to real mode */
-               regs->eflags |= EFLAGS_VM;
-
-               set_mode(regs, VM86_PROTECTED_TO_REAL);
-
-               emulate(regs);
        }
+
+       /* go from protected to real mode */
+       set_mode(regs, VM86_PROTECTED_TO_REAL);
+       emulate(regs);
+       if (mode != VM86_REAL)
+               panic("failed to emulate between clear PE and long jump.\n");
 }
 
 /*
@@ -261,7 +216,7 @@ setup_ctx(void)
 
        memset(c, 0, sizeof(*c));
        c->eip = (unsigned long) switch_to_real_mode;
-       c->esp = (unsigned) stack_top - 4*4;
+       c->esp = (unsigned) stack_top;
        c->eflags = 0x2; /* no interrupts, please */
 
        /*
@@ -271,13 +226,8 @@ setup_ctx(void)
         * more natural to enable CR0.PE to cause a world switch to
         * protected mode rather than disabling it.
         */
-#ifdef TEST
-       c->cr0 = (get_cr0() | CR0_NE | CR0_PG) & ~CR0_PE;
-       c->cr3 = (unsigned long) pgd;
-#else
        c->cr0 = (get_cr0() | CR0_NE) & ~CR0_PE;
        c->cr3 = 0;
-#endif
        c->cr4 = get_cr4();
 
        c->idtr_limit = sizeof(idt)-1;
@@ -368,21 +318,13 @@ start_bios(void)
 int
 main(void)
 {
-    printf("Hello from VMXAssist\n");
-
        if (booting_cpu == 0)
                banner();
 
-#ifdef TEST
-       setup_paging();
-#endif
-
        setup_gdt();
        setup_idt();
 
-#ifndef        TEST
        set_cr4(get_cr4() | CR4_VME);
-#endif
 
        setup_ctx();
 
index 468da0a..d5ece3e 100644 (file)
@@ -100,13 +100,9 @@ trap_handlers:
        .code32
        .align  16
 common_trap:                           /* common trap handler */
-       pushl   %gs
-       pushl   %fs
-       pushl   %ds
-       pushl   %es
        pushal
 
-       movl    $DATA_SELECTOR, %eax    /* make sure these are sane */
+       movl    $(DATA_SELECTOR), %eax  /* make sure these are sane */
        movl    %eax, %ds
        movl    %eax, %es
        movl    %eax, %fs
@@ -114,17 +110,13 @@ common_trap:                              /* common trap handler */
        movl    %esp, %ebp
 
        pushl   %ebp
-       pushl   52(%ebp)
-       pushl   48(%ebp)
+       pushl   36(%ebp)
+       pushl   32(%ebp)
        call    trap                    /* trap(trapno, errno, regs) */
        addl    $12, %esp
 
 trap_return:
        popal
-       popl    %es
-       popl    %ds
-       popl    %fs
-       popl    %gs
        addl    $8, %esp                /* skip trapno, errno */
        iret
        /* NOT REACHED */
@@ -152,10 +144,6 @@ switch_to_real_mode:
        pushl   oldctx+VMX_ASSIST_CTX_EIP
        pushl   $-1                     /* trapno, errno */
        pushl   $-1
-       pushl   %gs
-       pushl   %fs
-       pushl   %ds
-       pushl   %es
        pushal
 
        movl    %esp, %ebp
index 0181fe7..c7d7170 100644 (file)
 
 static void putchar(int);
 static char *printnum(char *, unsigned long, int);
-static void _doprint(void (*)(int), char const *, va_list);
+static void _doprint(void (*)(int), const char *, va_list);
 
+void
+cpuid_addr_value(uint64_t addr, uint64_t *value)
+{
+       uint32_t addr_low   = (uint32_t)addr;
+       uint32_t addr_high  = (uint32_t)(addr >> 32);
+       uint32_t value_low, value_high;
+       static unsigned int addr_leaf;
+
+       if (!addr_leaf) {
+               unsigned int eax, ebx, ecx, edx;
+               __asm__ __volatile__(
+                       "cpuid"
+                       : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+                       : "0" (0x40000000));
+               addr_leaf = eax + 1;
+       }
+
+       __asm__ __volatile__(
+               "cpuid"
+               : "=c" (value_low), "=d" (value_high)
+               : "a" (addr_leaf), "0" (addr_low), "1" (addr_high)
+               : "ebx");
+
+       *value = (uint64_t)value_high << 32 | value_low;
+}
 
 void
 dump_regs(struct regs *regs)
@@ -37,16 +62,15 @@ dump_regs(struct regs *regs)
                regs->eax, regs->ecx, regs->edx, regs->ebx);
        printf("esp    %8x ebp    %8x esi    %8x edi    %8x\n",
                regs->esp, regs->ebp, regs->esi, regs->edi);
-       printf("eip    %8x eflags %8x cs     %8x ds     %8x\n",
-               regs->eip, regs->eflags, regs->cs, regs->ds);
-       printf("es     %8x fs     %8x uss    %8x uesp   %8x\n",
-               regs->es, regs->fs, regs->uss, regs->uesp);
+       printf("trapno %8x errno  %8x\n", regs->trapno, regs->errno);
+       printf("eip    %8x cs     %8x eflags %8x\n",
+               regs->eip, regs->cs, regs->eflags);
+       printf("uesp   %8x uss    %8x\n",
+               regs->uesp, regs->uss);
        printf("ves    %8x vds    %8x vfs    %8x vgs    %8x\n",
                regs->ves, regs->vds, regs->vfs, regs->vgs);
-       if (regs->trapno != -1 || regs->errno != -1)
-               printf("trapno %8x errno  %8x\n", regs->trapno, regs->errno);
 
-       printf("cr0    %8lx cr2    %8x cr3    %8lx cr4    %8lx\n",
+       printf("cr0    %8lx cr2    %8x cr3    %8lx cr4    %8lx\n\n",
                (long)oldctx.cr0, get_cr2(),
                (long)oldctx.cr3, (long)oldctx.cr4);
 }
@@ -297,7 +321,7 @@ putchar(int ch)
  * but still powerful enough for most tasks.
  */
 static void
-_doprint(void (*put)(int), char const *fmt, va_list ap)
+_doprint(void (*put)(int), const char *fmt, va_list ap)
 {
        register char *str, c;
        int lflag, zflag, nflag;
index 9c2982f..1fd52ed 100644 (file)
 #include <stdarg.h>
 #include <vm86.h>
 
-#include <e820.h>
-#define E820_MAP_NR ((unsigned char *)E820_MAP_PAGE + E820_MAP_NR_OFFSET)
-#define E820_MAP    ((struct e820entry *)(E820_MAP_PAGE + E820_MAP_OFFSET))
-
 #define        offsetof(type, member)  ((unsigned) &((type *)0)->member)
 
 struct vmx_assist_context;
 
+#include "e820.h"
+
+extern void cpuid_addr_value(uint64_t addr, uint64_t *value);
 extern void hexdump(unsigned char *, int);
 extern void dump_regs(struct regs *);
 extern void dump_vmx_context(struct vmx_assist_context *);
index 8c620a4..55b6905 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * vm86.c: A vm86 emulator. The main purpose of this emulator is to do as
- * little work as possible. 
+ * little work as possible.
  *
  * Leendert van Doorn, leendert@watson.ibm.com
  * Copyright (c) 2005-2006, International Business Machines Corporation.
@@ -33,6 +33,7 @@
 #define        SEG_SS          0x0020
 #define        SEG_FS          0x0040
 #define        SEG_GS          0x0080
+#define REP            0x0100
 
 static unsigned prev_eip = 0;
 enum vm86_mode mode = 0;
@@ -52,12 +53,12 @@ char *states[] = {
 static char *rnames[] = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" };
 #endif /* DEBUG */
 
-#define PDE_PS           (1 << 7)
-#define PT_ENTRY_PRESENT 0x1
+#define PDE_PS                         (1 << 7)
+#define PT_ENTRY_PRESENT       0x1
 
 /* We only support access to <=4G physical memory due to 1:1 mapping */
-static unsigned
-guest_linear_to_real(uint32_t base)
+static uint64_t
+guest_linear_to_phys(uint32_t base)
 {
        uint32_t gcr3 = oldctx.cr3;
        uint64_t l2_mfn;
@@ -89,23 +90,32 @@ guest_linear_to_real(uint32_t base)
                l2_mfn = ((uint64_t *)(long)gcr3)[(base >> 30) & 0x3];
                if (!(l2_mfn & PT_ENTRY_PRESENT))
                        panic("l3 entry not present\n");
-               l2_mfn &= 0x3fffff000ULL;
+               l2_mfn &= 0xffffff000ULL;
 
-               l1_mfn = ((uint64_t *)(long)l2_mfn)[(base >> 21) & 0x1ff];
+               if (l2_mfn & 0xf00000000ULL) {
+                       printf("l2 page above 4G\n");
+                       cpuid_addr_value(l2_mfn + 8 * ((base >> 21) & 0x1ff), &l1_mfn);
+               } else
+                       l1_mfn = ((uint64_t *)(long)l2_mfn)[(base >> 21) & 0x1ff];
                if (!(l1_mfn & PT_ENTRY_PRESENT))
                        panic("l2 entry not present\n");
 
                if (l1_mfn & PDE_PS) { /* CR4.PSE is ignored in PAE mode */
-                       l0_mfn = l1_mfn & 0x3ffe00000ULL;
+                       l0_mfn = l1_mfn & 0xfffe00000ULL;
                        return l0_mfn + (base & 0x1fffff);
                }
 
-               l1_mfn &= 0x3fffff000ULL;
+               l1_mfn &= 0xffffff000ULL;
 
-               l0_mfn = ((uint64_t *)(long)l1_mfn)[(base >> 12) & 0x1ff];
+               if (l1_mfn & 0xf00000000ULL) {
+                       printf("l1 page above 4G\n");
+                       cpuid_addr_value(l1_mfn + 8 * ((base >> 12) & 0x1ff), &l0_mfn);
+               } else
+                       l0_mfn = ((uint64_t *)(long)l1_mfn)[(base >> 12) & 0x1ff];
                if (!(l0_mfn & PT_ENTRY_PRESENT))
                        panic("l1 entry not present\n");
-               l0_mfn &= 0x3fffff000ULL;
+
+               l0_mfn &= 0xffffff000ULL;
 
                return l0_mfn + (base & 0xfff);
        }
@@ -114,6 +124,7 @@ guest_linear_to_real(uint32_t base)
 static unsigned
 address(struct regs *regs, unsigned seg, unsigned off)
 {
+       uint64_t gdt_phys_base;
        unsigned long long entry;
        unsigned seg_base, seg_limit;
        unsigned entry_low, entry_high;
@@ -126,11 +137,16 @@ address(struct regs *regs, unsigned seg, unsigned off)
        }
 
        if (mode == VM86_REAL || seg > oldctx.gdtr_limit ||
-           (mode == VM86_REAL_TO_PROTECTED && regs->cs == seg))
+               (mode == VM86_REAL_TO_PROTECTED && regs->cs == seg))
                return ((seg & 0xFFFF) << 4) + off;
 
-       entry = ((unsigned long long *)
-                 guest_linear_to_real(oldctx.gdtr_base))[seg >> 3];
+       gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base);
+       if (gdt_phys_base != (uint32_t)gdt_phys_base) {
+               printf("gdt base address above 4G\n");
+               cpuid_addr_value(gdt_phys_base + 8 * (seg >> 3), &entry);
+       } else
+               entry = ((unsigned long long *)(long)gdt_phys_base)[seg >> 3];
+
        entry_high = entry >> 32;
        entry_low = entry & 0xFFFFFFFF;
 
@@ -138,13 +154,13 @@ address(struct regs *regs, unsigned seg, unsigned off)
        seg_limit = (entry_high & 0xF0000) | (entry_low & 0xFFFF);
 
        if (entry_high & 0x8000 &&
-           ((entry_high & 0x800000 && off >> 12 <= seg_limit) ||
-           (!(entry_high & 0x800000) && off <= seg_limit)))
+               ((entry_high & 0x800000 && off >> 12 <= seg_limit) ||
+               (!(entry_high & 0x800000) && off <= seg_limit)))
                return seg_base + off;
 
        panic("should never reach here in function address():\n\t"
-             "entry=0x%08x%08x, mode=%d, seg=0x%08x, offset=0x%08x\n",
-             entry_high, entry_low, mode, seg, off);
+                 "entry=0x%08x%08x, mode=%d, seg=0x%08x, offset=0x%08x\n",
+                 entry_high, entry_low, mode, seg, off);
 
        return 0;
 }
@@ -157,7 +173,7 @@ trace(struct regs *regs, int adjust, char *fmt, ...)
        va_list ap;
 
        if ((traceset & (1 << mode)) &&
-          (mode == VM86_REAL_TO_PROTECTED || mode == VM86_REAL)) {
+               (mode == VM86_REAL_TO_PROTECTED || mode == VM86_REAL)) {
                /* 16-bit, seg:off addressing */
                unsigned addr = address(regs, regs->cs, off);
                printf("0x%08x: 0x%x:0x%04x ", addr, regs->cs, off);
@@ -168,7 +184,7 @@ trace(struct regs *regs, int adjust, char *fmt, ...)
                printf("\n");
        }
        if ((traceset & (1 << mode)) &&
-          (mode == VM86_PROTECTED_TO_REAL || mode == VM86_PROTECTED)) {
+               (mode == VM86_PROTECTED_TO_REAL || mode == VM86_PROTECTED)) {
                /* 16-bit, gdt addressing */
                unsigned addr = address(regs, regs->cs, off);
                printf("0x%08x: 0x%x:0x%08x ", addr, regs->cs, off);
@@ -282,7 +298,7 @@ getreg32(struct regs *regs, int r)
        case 1: return regs->ecx;
        case 2: return regs->edx;
        case 3: return regs->ebx;
-       case 4: return regs->esp;
+       case 4: return regs->uesp;
        case 5: return regs->ebp;
        case 6: return regs->esi;
        case 7: return regs->edi;
@@ -304,10 +320,10 @@ getreg8(struct regs *regs, int r)
        case 1: return regs->ecx & 0xFF; /* cl */
        case 2: return regs->edx & 0xFF; /* dl */
        case 3: return regs->ebx & 0xFF; /* bl */
-       case 4: return (regs->esp >> 8) & 0xFF; /* ah */
-       case 5: return (regs->ebp >> 8) & 0xFF; /* ch */
-       case 6: return (regs->esi >> 8) & 0xFF; /* dh */
-       case 7: return (regs->edi >> 8) & 0xFF; /* bh */
+       case 4: return (regs->eax >> 8) & 0xFF; /* ah */
+       case 5: return (regs->ecx >> 8) & 0xFF; /* ch */
+       case 6: return (regs->edx >> 8) & 0xFF; /* dh */
+       case 7: return (regs->ebx >> 8) & 0xFF; /* bh */
        }
        return ~0;
 }
@@ -320,7 +336,7 @@ setreg32(struct regs *regs, int r, unsigned v)
        case 1: regs->ecx = v; break;
        case 2: regs->edx = v; break;
        case 3: regs->ebx = v; break;
-       case 4: regs->esp = v; break;
+       case 4: regs->uesp = v; break;
        case 5: regs->ebp = v; break;
        case 6: regs->esi = v; break;
        case 7: regs->edi = v; break;
@@ -342,10 +358,10 @@ setreg8(struct regs *regs, int r, unsigned v)
        case 1: regs->ecx = (regs->ecx & ~0xFF) | v; break;
        case 2: regs->edx = (regs->edx & ~0xFF) | v; break;
        case 3: regs->ebx = (regs->ebx & ~0xFF) | v; break;
-       case 4: regs->esp = (regs->esp & ~0xFF00) | (v << 8); break;
-       case 5: regs->ebp = (regs->ebp & ~0xFF00) | (v << 8); break;
-       case 6: regs->esi = (regs->esi & ~0xFF00) | (v << 8); break;
-       case 7: regs->edi = (regs->edi & ~0xFF00) | (v << 8); break;
+       case 4: regs->eax = (regs->eax & ~0xFF00) | (v << 8); break;
+       case 5: regs->ecx = (regs->ecx & ~0xFF00) | (v << 8); break;
+       case 6: regs->edx = (regs->edx & ~0xFF00) | (v << 8); break;
+       case 7: regs->ebx = (regs->ebx & ~0xFF00) | (v << 8); break;
        }
 }
 
@@ -361,9 +377,9 @@ segment(unsigned prefix, struct regs *regs, unsigned seg)
        if (prefix & SEG_SS)
                seg = regs->uss;
        if (prefix & SEG_FS)
-               seg = regs->fs;
+               seg = regs->vfs;
        if (prefix & SEG_GS)
-               seg = regs->gs;
+               seg = regs->vgs;
        return seg;
 }
 
@@ -415,7 +431,7 @@ operand(unsigned prefix, struct regs *regs, unsigned modrm)
                        case 2: return address(regs, seg, regs->edx);
                        case 3: return address(regs, seg, regs->ebx);
                        case 4: return address(regs, seg,
-                                              sib(regs, mod, fetch8(regs)));
+                                                  sib(regs, mod, fetch8(regs)));
                        case 5: return address(regs, seg, fetch32(regs));
                        case 6: return address(regs, seg, regs->esi);
                        case 7: return address(regs, seg, regs->edi);
@@ -435,7 +451,7 @@ operand(unsigned prefix, struct regs *regs, unsigned modrm)
                        case 2: return address(regs, seg, regs->edx + disp);
                        case 3: return address(regs, seg, regs->ebx + disp);
                        case 4: return address(regs, seg,
-                                              sib(regs, mod, fetch8(regs)));
+                                                  sib(regs, mod, fetch8(regs)));
                        case 5: return address(regs, seg, regs->ebp + disp);
                        case 6: return address(regs, seg, regs->esi + disp);
                        case 7: return address(regs, seg, regs->edi + disp);
@@ -492,7 +508,7 @@ operand(unsigned prefix, struct regs *regs, unsigned modrm)
                }
        }
 
-       return 0; 
+       return 0;
 }
 
 /*
@@ -546,11 +562,7 @@ lmsw(struct regs *regs, unsigned prefix, unsigned modrm)
        unsigned cr0 = (oldctx.cr0 & 0xFFFFFFF0) | ax;
 
        TRACE((regs, regs->eip - eip, "lmsw 0x%x", ax));
-#ifndef TEST
        oldctx.cr0 = cr0 | CR0_PE | CR0_NE;
-#else
-       oldctx.cr0 = cr0 | CR0_PE | CR0_NE | CR0_PG;
-#endif
        if (cr0 & CR0_PE)
                set_mode(regs, VM86_REAL_TO_PROTECTED);
 
@@ -569,8 +581,13 @@ movr(struct regs *regs, unsigned prefix, unsigned opc)
        unsigned addr = operand(prefix, regs, modrm);
        unsigned val, r = (modrm >> 3) & 7;
 
-       if ((modrm & 0xC0) == 0xC0) /* no registers */
-               return 0;
+       if ((modrm & 0xC0) == 0xC0) {
+               /*
+                * Emulate all guest instructions in protected to real mode.
+                */
+               if (mode != VM86_PROTECTED_TO_REAL)
+                       return 0;
+       }
 
        switch (opc) {
        case 0x88: /* addr32 mov r8, r/m8 */
@@ -578,16 +595,24 @@ movr(struct regs *regs, unsigned prefix, unsigned opc)
                TRACE((regs, regs->eip - eip,
                        "movb %%e%s, *0x%x", rnames[r], addr));
                write8(addr, val);
-               break;
+               return 1;
 
        case 0x8A: /* addr32 mov r/m8, r8 */
                TRACE((regs, regs->eip - eip,
                        "movb *0x%x, %%%s", addr, rnames[r]));
                setreg8(regs, r, read8(addr));
-               break;
+               return 1;
 
        case 0x89: /* addr32 mov r16, r/m16 */
                val = getreg32(regs, r);
+               if ((modrm & 0xC0) == 0xC0) {
+                       if (prefix & DATA32)
+                               setreg32(regs, modrm & 7, val);
+                       else
+                               setreg16(regs, modrm & 7, MASK16(val));
+                       return 1;
+               }
+
                if (prefix & DATA32) {
                        TRACE((regs, regs->eip - eip,
                                "movl %%e%s, *0x%x", rnames[r], addr));
@@ -597,9 +622,17 @@ movr(struct regs *regs, unsigned prefix, unsigned opc)
                                "movw %%%s, *0x%x", rnames[r], addr));
                        write16(addr, MASK16(val));
                }
-               break;
+               return 1;
+
+       case 0x8B: /* mov r/m16, r16 */
+               if ((modrm & 0xC0) == 0xC0) {
+                       if (prefix & DATA32)
+                               setreg32(regs, r, addr);
+                       else
+                               setreg16(regs, r, MASK16(addr));
+                       return 1;
+               }
 
-       case 0x8B: /* addr32 mov r/m16, r16 */
                if (prefix & DATA32) {
                        TRACE((regs, regs->eip - eip,
                                "movl *0x%x, %%e%s", addr, rnames[r]));
@@ -609,7 +642,7 @@ movr(struct regs *regs, unsigned prefix, unsigned opc)
                                "movw *0x%x, %%%s", addr, rnames[r]));
                        setreg16(regs, r, read16(addr));
                }
-               break;
+               return 1;
 
        case 0xC6: /* addr32 movb $imm, r/m8 */
                if ((modrm >> 3) & 7)
@@ -618,11 +651,113 @@ movr(struct regs *regs, unsigned prefix, unsigned opc)
                write8(addr, val);
                TRACE((regs, regs->eip - eip, "movb $0x%x, *0x%x",
                                                        val, addr));
+               return 1;
+       }
+       return 0;
+}
+
+/*
+ * We need to handle string moves that address memory beyond the 64KB segment
+ * limit that VM8086 mode enforces.
+ */
+static inline int
+movs(struct regs *regs, unsigned prefix, unsigned opc)
+{
+       unsigned eip = regs->eip - 1;
+       unsigned sseg = segment(prefix, regs, regs->vds);
+       unsigned dseg = regs->ves;
+       unsigned saddr, daddr;
+       unsigned count = 1;
+       int incr = ((regs->eflags & EFLAGS_DF) == 0) ? 1 : -1;
+
+       saddr = address(regs, sseg, regs->esi);
+       daddr = address(regs, dseg, regs->edi);
+
+       if ((prefix & REP) != 0) {
+               count = regs->ecx;
+               regs->ecx = 0;
+       }
+
+       switch (opc) {
+       case 0xA4: /* movsb */
+               regs->esi += (incr * count);
+               regs->edi += (incr * count);
+
+               while (count-- != 0) {
+                       write8(daddr, read8(saddr));
+                       daddr += incr;
+                       saddr += incr;
+               }
+               TRACE((regs, regs->eip - eip, "movsb (%%esi),%%es:(%%edi)"));
+               break;
+
+       case 0xA5: /* movsw */
+               if ((prefix & DATA32) == 0) {
+                       incr = 2 * incr;
+                       regs->esi += (incr * count);
+                       regs->edi += (incr * count);
+
+                       while (count-- != 0) {
+                               write16(daddr, read16(saddr));
+                               daddr += incr;
+                               saddr += incr;
+                       }
+               } else {
+                       incr = 4 * incr;
+                       regs->esi += (incr * count);
+                       regs->edi += (incr * count);
+
+                       while (count-- != 0) {
+                               write32(daddr, read32(saddr));
+                               daddr += incr;
+                               saddr += incr;
+                       }
+               }                       
+               TRACE((regs, regs->eip - eip, "movsw %s(%%esi),%%es:(%%edi)"));
                break;
        }
+
        return 1;
 }
 
+static inline int
+lods(struct regs *regs, unsigned prefix, unsigned opc)
+{
+       unsigned eip = regs->eip - 1;
+       unsigned seg = segment(prefix, regs, regs->vds);
+       unsigned addr = address(regs, seg, regs->esi);
+       unsigned count = 1;
+       int incr = ((regs->eflags & EFLAGS_DF) == 0) ? 1 : -1;
+
+       if ((prefix & REP) != 0) {
+               count = regs->ecx;
+               regs->ecx = 0;
+       }
+
+       switch (opc) {
+       case 0xAD: /* lodsw */
+               if ((prefix & DATA32) == 0) {
+                       incr = 2 * incr;
+                       regs->esi += (incr * count);
+                       while (count-- != 0) {
+                               setreg16(regs, 0, read16(addr));
+                               addr += incr;
+                       }
+
+                       TRACE((regs, regs->eip - eip, "lodsw (%%esi),%%ax"));
+               } else {
+                       incr = 4 * incr;
+                       regs->esi += (incr * count);
+                       while (count-- != 0) {
+                               setreg32(regs, 0, read32(addr));
+                               addr += incr;
+                       }
+                       TRACE((regs, regs->eip - eip, "lodsw (%%esi),%%eax"));
+               }
+               break;
+       }
+       return 1;
+}
 /*
  * Move to and from a control register.
  */
@@ -641,13 +776,8 @@ movcr(struct regs *regs, unsigned prefix, unsigned opc)
                TRACE((regs, regs->eip - eip, "movl %%cr%d, %%eax", cr));
                switch (cr) {
                case 0:
-#ifndef TEST
                        setreg32(regs, modrm,
                                oldctx.cr0 & ~(CR0_PE | CR0_NE));
-#else
-                       setreg32(regs, modrm,
-                               oldctx.cr0 & ~(CR0_PE | CR0_NE | CR0_PG));
-#endif
                        break;
                case 2:
                        setreg32(regs, modrm, get_cr2());
@@ -665,13 +795,10 @@ movcr(struct regs *regs, unsigned prefix, unsigned opc)
                switch (cr) {
                case 0:
                        oldctx.cr0 = getreg32(regs, modrm) | (CR0_PE | CR0_NE);
-#ifdef TEST
-                       oldctx.cr0 |= CR0_PG;
-#endif
                        if (getreg32(regs, modrm) & CR0_PE)
                                set_mode(regs, VM86_REAL_TO_PROTECTED);
-                       else
-                               set_mode(regs, VM86_REAL);
+                       //else
+                       //      set_mode(regs, VM86_REAL);
                        break;
                case 3:
                        oldctx.cr3 = getreg32(regs, modrm);
@@ -694,6 +821,55 @@ static inline void set_eflags_ZF(unsigned mask, unsigned v1, struct regs *regs)
                regs->eflags &= ~EFLAGS_ZF;
 }
 
+static void set_eflags_add(unsigned hi_bit_mask, unsigned v1, unsigned v2,
+                               unsigned result, struct regs *regs)
+{
+       int bit_count;
+       unsigned tmp;
+       unsigned full_mask;
+       unsigned nonsign_mask;
+
+       /* Carry out of high order bit? */
+       if ( v1 & v2 & hi_bit_mask )
+               regs->eflags |= EFLAGS_CF;
+       else
+               regs->eflags &= ~EFLAGS_CF;
+
+       /* Even parity in least significant byte? */
+       tmp = result & 0xff;
+       for (bit_count = 0; tmp != 0; bit_count++)
+               tmp &= (tmp - 1);
+
+       if (bit_count & 1)
+               regs->eflags &= ~EFLAGS_PF;
+       else
+               regs->eflags |= EFLAGS_PF;
+
+       /* Carry out of least significant BCD digit? */
+       if ( v1 & v2 & (1<<3) )
+               regs->eflags |= EFLAGS_AF;
+       else
+               regs->eflags &= ~EFLAGS_AF;
+
+       /* Result is zero? */
+       full_mask = (hi_bit_mask - 1) | hi_bit_mask;
+       set_eflags_ZF(full_mask, result, regs);
+
+       /* Sign of result? */
+       if ( result & hi_bit_mask )
+               regs->eflags |= EFLAGS_SF;
+       else
+               regs->eflags &= ~EFLAGS_SF;
+
+       /* Carry out of highest non-sign bit? */
+       nonsign_mask = (hi_bit_mask >> 1) & ~hi_bit_mask;
+       if ( v1 & v2 & hi_bit_mask )
+               regs->eflags |= EFLAGS_OF;
+       else
+               regs->eflags &= ~EFLAGS_OF;
+
+}
+
 /*
  * We need to handle cmp opcodes that address memory beyond the 64KB
  * segment limit that VM8086 mode enforces.
@@ -768,6 +944,82 @@ test(struct regs *regs, unsigned prefix, unsigned opc)
 }
 
 /*
+ * We need to handle add opcodes that address memory beyond the 64KB
+ * segment limit that VM8086 mode enforces.
+ */
+static int
+add(struct regs *regs, unsigned prefix, unsigned opc)
+{
+       unsigned eip = regs->eip - 1;
+       unsigned modrm = fetch8(regs);
+       unsigned addr = operand(prefix, regs, modrm);
+       unsigned r = (modrm >> 3) & 7;
+
+       unsigned val1 = 0;
+       unsigned val2 = 0;
+       unsigned result = 0;
+       unsigned hi_bit;
+
+       if ((modrm & 0xC0) == 0xC0) /* no registers */
+               return 0;
+
+       switch (opc) {
+       case 0x00: /* addr32 add r8, r/m8 */
+               val1 = getreg8(regs, r);
+               val2 = read8(addr);
+               result = val1 + val2;
+               write8(addr, result);
+               TRACE((regs, regs->eip - eip,
+                       "addb %%e%s, *0x%x", rnames[r], addr));
+               break;
+               
+       case 0x01: /* addr32 add r16, r/m16 */
+               if (prefix & DATA32) {
+                       val1 = getreg32(regs, r);
+                       val2 = read32(addr);
+                       result = val1 + val2;
+                       write32(addr, result);
+                       TRACE((regs, regs->eip - eip,
+                               "addl %%e%s, *0x%x", rnames[r], addr));
+               } else {
+                       val1 = getreg16(regs, r);
+                       val2 = read16(addr);
+                       result = val1 + val2;
+                       write16(addr, result);
+                       TRACE((regs, regs->eip - eip,
+                               "addw %%e%s, *0x%x", rnames[r], addr));
+               }
+               break;
+               
+       case 0x03: /* addr32 add r/m16, r16 */
+               if (prefix & DATA32) {
+                       val1 = getreg32(regs, r);
+                       val2 = read32(addr);
+                       result = val1 + val2;
+                       setreg32(regs, r, result);
+                       TRACE((regs, regs->eip - eip,
+                               "addl *0x%x, %%e%s", addr, rnames[r]));
+               } else {
+                       val1 = getreg16(regs, r);
+                       val2 = read16(addr);
+                       result = val1 + val2;
+                       setreg16(regs, r, result);
+                       TRACE((regs, regs->eip - eip,
+                               "addw *0x%x, %%%s", addr, rnames[r]));
+               }
+               break;
+       }
+
+       if (opc == 0x00)
+               hi_bit = (1<<7);
+       else
+               hi_bit = (prefix & DATA32) ? (1<<31) : (1<<15);
+       set_eflags_add(hi_bit, val1, val2, result, regs);
+
+       return 1;
+}
+
+/*
  * We need to handle pop opcodes that address memory beyond the 64KB
  * segment limit that VM8086 mode enforces.
  */
@@ -798,12 +1050,78 @@ pop(struct regs *regs, unsigned prefix, unsigned opc)
        return 1;
 }
 
+static int
+mov_to_seg(struct regs *regs, unsigned prefix, unsigned opc)
+{
+       unsigned modrm = fetch8(regs);
+
+       /*
+        * Emulate segment loads in:
+        * 1) real->protected mode.
+        * 2) protected->real mode.
+        */
+       if (mode != VM86_REAL_TO_PROTECTED &&
+           mode != VM86_PROTECTED_TO_REAL)
+               return 0;
+
+       /* Register source only. */
+       if ((modrm & 0xC0) != 0xC0)
+               goto fail;
+
+       switch ((modrm & 0x38) >> 3) {
+       case 0: /* es */
+               regs->ves = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
+               saved_rm_regs.ves = 0;
+               oldctx.es_sel = regs->ves;
+               return 1;
+
+       /* case 1: cs */
+
+       case 2: /* ss */
+               regs->uss = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
+               saved_rm_regs.uss = 0;
+               oldctx.ss_sel = regs->uss;
+               return 1;
+       case 3: /* ds */
+               regs->vds = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
+               saved_rm_regs.vds = 0;
+               oldctx.ds_sel = regs->vds;
+               return 1;
+       case 4: /* fs */
+               regs->vfs = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
+               saved_rm_regs.vfs = 0;
+               oldctx.fs_sel = regs->vfs;
+               return 1;
+       case 5: /* gs */
+               regs->vgs = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
+               saved_rm_regs.vgs = 0;
+               oldctx.gs_sel = regs->vgs;
+               return 1;
+       }
+
+ fail:
+       printf("%s:%d: missed opcode %02x %02x\n",
+                  __FUNCTION__, __LINE__, opc, modrm);
+       return 0;
+}
+
 /*
  * Emulate a segment load in protected mode
  */
 static int
 load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes *arbytes)
 {
+       uint64_t gdt_phys_base;
        unsigned long long entry;
 
        /* protected mode: use seg as index into gdt */
@@ -815,8 +1133,12 @@ load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes
                return 1;
        }
 
-       entry = ((unsigned long long *)
-                 guest_linear_to_real(oldctx.gdtr_base))[sel >> 3];
+       gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base);
+       if (gdt_phys_base != (uint32_t)gdt_phys_base) {
+               printf("gdt base address above 4G\n");
+               cpuid_addr_value(gdt_phys_base + 8 * (sel >> 3), &entry);
+       } else
+               entry = ((unsigned long long *)(long)gdt_phys_base)[sel >> 3];
 
        /* Check the P bit first */
        if (!((entry >> (15+32)) & 0x1) && sel != 0)
@@ -826,11 +1148,11 @@ load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes
                  ((entry >> (32-16)) & 0x00FF0000) |
                  ((entry >> (   16)) & 0x0000FFFF));
        *limit = (((entry >> (48-16)) & 0x000F0000) |
-                 ((entry           ) & 0x0000FFFF));
+                 (entry & 0x0000FFFF));
 
        arbytes->bytes = 0;
        arbytes->fields.seg_type = (entry >> (8+32)) & 0xF; /* TYPE */
-       arbytes->fields.s =  (entry >> (12+32)) & 0x1; /* S */
+       arbytes->fields.s = (entry >> (12+32)) & 0x1; /* S */
        if (arbytes->fields.s)
                arbytes->fields.seg_type |= 1; /* accessed */
        arbytes->fields.dpl = (entry >> (13+32)) & 0x3; /* DPL */
@@ -847,82 +1169,57 @@ load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes
 }
 
 /*
+ * Emulate a protected mode segment load, falling back to clearing it if
+ * the descriptor was invalid.
+ */
+static void
+load_or_clear_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes *arbytes)
+{
+       if (!load_seg(sel, base, limit, arbytes))
+               load_seg(0, base, limit, arbytes);
+}
+
+static unsigned char rm_irqbase[2];
+
+/*
  * Transition to protected mode
  */
 static void
 protected_mode(struct regs *regs)
 {
+       extern char stack_top[];
+
+       oldctx.rm_irqbase[0] = rm_irqbase[0];
+       oldctx.rm_irqbase[1] = rm_irqbase[1];
+
        regs->eflags &= ~(EFLAGS_TF|EFLAGS_VM);
 
        oldctx.eip = regs->eip;
        oldctx.esp = regs->uesp;
        oldctx.eflags = regs->eflags;
 
-       memset(&saved_rm_regs, 0, sizeof(struct regs));
-
        /* reload all segment registers */
        if (!load_seg(regs->cs, &oldctx.cs_base,
                                &oldctx.cs_limit, &oldctx.cs_arbytes))
                panic("Invalid %%cs=0x%x for protected mode\n", regs->cs);
        oldctx.cs_sel = regs->cs;
 
-       if (load_seg(regs->ves, &oldctx.es_base,
-                               &oldctx.es_limit, &oldctx.es_arbytes))
-               oldctx.es_sel = regs->ves;
-       else {
-               load_seg(0, &oldctx.es_base,
-                           &oldctx.es_limit, &oldctx.es_arbytes);
-               oldctx.es_sel = 0;
-               saved_rm_regs.ves = regs->ves;
-       }
-
-       if (load_seg(regs->uss, &oldctx.ss_base,
-                               &oldctx.ss_limit, &oldctx.ss_arbytes))
-               oldctx.ss_sel = regs->uss;
-       else {
-               load_seg(0, &oldctx.ss_base,
-                           &oldctx.ss_limit, &oldctx.ss_arbytes);
-               oldctx.ss_sel = 0;
-               saved_rm_regs.uss = regs->uss;
-       }
-
-       if (load_seg(regs->vds, &oldctx.ds_base,
-                               &oldctx.ds_limit, &oldctx.ds_arbytes))
-               oldctx.ds_sel = regs->vds;
-       else {
-               load_seg(0, &oldctx.ds_base,
-                           &oldctx.ds_limit, &oldctx.ds_arbytes);
-               oldctx.ds_sel = 0;
-               saved_rm_regs.vds = regs->vds;
-       }
-
-       if (load_seg(regs->vfs, &oldctx.fs_base,
-                               &oldctx.fs_limit, &oldctx.fs_arbytes))
-               oldctx.fs_sel = regs->vfs;
-       else {
-               load_seg(0, &oldctx.fs_base,
-                           &oldctx.fs_limit, &oldctx.fs_arbytes);
-               oldctx.fs_sel = 0;
-               saved_rm_regs.vfs = regs->vfs;
-       }
-
-       if (load_seg(regs->vgs, &oldctx.gs_base,
-                               &oldctx.gs_limit, &oldctx.gs_arbytes))
-               oldctx.gs_sel = regs->vgs;
-       else {
-               load_seg(0, &oldctx.gs_base,
-                           &oldctx.gs_limit, &oldctx.gs_arbytes);
-               oldctx.gs_sel = 0;
-               saved_rm_regs.vgs = regs->vgs;
-       }
+       load_or_clear_seg(oldctx.es_sel, &oldctx.es_base,
+                         &oldctx.es_limit, &oldctx.es_arbytes);
+       load_or_clear_seg(oldctx.ss_sel, &oldctx.ss_base,
+                         &oldctx.ss_limit, &oldctx.ss_arbytes);
+       load_or_clear_seg(oldctx.ds_sel, &oldctx.ds_base,
+                         &oldctx.ds_limit, &oldctx.ds_arbytes);
+       load_or_clear_seg(oldctx.fs_sel, &oldctx.fs_base,
+                         &oldctx.fs_limit, &oldctx.fs_arbytes);
+       load_or_clear_seg(oldctx.gs_sel, &oldctx.gs_base,
+                         &oldctx.gs_limit, &oldctx.gs_arbytes);
 
        /* initialize jump environment to warp back to protected mode */
+       regs->uss = DATA_SELECTOR;
+       regs->uesp = (unsigned long)stack_top;
        regs->cs = CODE_SELECTOR;
-       regs->ds = DATA_SELECTOR;
-       regs->es = DATA_SELECTOR;
-       regs->fs = DATA_SELECTOR;
-       regs->gs = DATA_SELECTOR;
-       regs->eip = (unsigned) &switch_to_protected_mode;
+       regs->eip = (unsigned long)switch_to_protected_mode;
 
        /* this should get us into 32-bit mode */
 }
@@ -934,10 +1231,6 @@ static void
 real_mode(struct regs *regs)
 {
        regs->eflags |= EFLAGS_VM | 0x02;
-       regs->ds = DATA_SELECTOR;
-       regs->es = DATA_SELECTOR;
-       regs->fs = DATA_SELECTOR;
-       regs->gs = DATA_SELECTOR;
 
        /*
         * When we transition from protected to real-mode and we
@@ -951,21 +1244,21 @@ real_mode(struct regs *regs)
                        panic("%%ss 0x%lx higher than 1MB", regs->uss);
                regs->uss = address(regs, regs->uss, 0) >> 4;
        } else {
-         regs->uss = saved_rm_regs.uss;
+               regs->uss = saved_rm_regs.uss;
        }
        if (regs->vds != 0) {
                if (regs->vds >= HIGHMEM)
                        panic("%%ds 0x%lx higher than 1MB", regs->vds);
                regs->vds = address(regs, regs->vds, 0) >> 4;
        } else {
-         regs->vds = saved_rm_regs.vds;
+               regs->vds = saved_rm_regs.vds;
        }
        if (regs->ves != 0) {
                if (regs->ves >= HIGHMEM)
                        panic("%%es 0x%lx higher than 1MB", regs->ves);
                regs->ves = address(regs, regs->ves, 0) >> 4;
        } else {
-         regs->ves = saved_rm_regs.ves;
+               regs->ves = saved_rm_regs.ves;
        }
 
        /* this should get us into 16-bit mode */
@@ -988,47 +1281,46 @@ set_mode(struct regs *regs, enum vm86_mode newmode)
 {
        switch (newmode) {
        case VM86_REAL:
-               if ((mode == VM86_PROTECTED_TO_REAL) ||
-                   (mode == VM86_REAL_TO_PROTECTED)) {
+               if (mode == VM86_PROTECTED_TO_REAL ||
+                   mode == VM86_REAL_TO_PROTECTED) {
                        regs->eflags &= ~EFLAGS_TF;
                        real_mode(regs);
-                       break;
-               } else if (mode == VM86_REAL) {
-                       break;
-               } else
+               } else if (mode != VM86_REAL)
                        panic("unexpected real mode transition");
                break;
 
        case VM86_REAL_TO_PROTECTED:
                if (mode == VM86_REAL) {
                        regs->eflags |= EFLAGS_TF;
-                       break;
-               } else if (mode == VM86_REAL_TO_PROTECTED) {
-                       break;
-               } else
+                       saved_rm_regs.vds = regs->vds;
+                       saved_rm_regs.ves = regs->ves;
+                       saved_rm_regs.vfs = regs->vfs;
+                       saved_rm_regs.vgs = regs->vgs;
+                       saved_rm_regs.uss = regs->uss;
+                       oldctx.ds_sel = 0;
+                       oldctx.es_sel = 0;
+                       oldctx.fs_sel = 0;
+                       oldctx.gs_sel = 0;
+                       oldctx.ss_sel = 0;
+               } else if (mode != VM86_REAL_TO_PROTECTED)
                        panic("unexpected real-to-protected mode transition");
                break;
 
        case VM86_PROTECTED_TO_REAL:
-               if (mode == VM86_PROTECTED) {
-                       break;
-               } else
+               if (mode != VM86_PROTECTED)
                        panic("unexpected protected-to-real mode transition");
                break;
 
        case VM86_PROTECTED:
-               if (mode == VM86_REAL_TO_PROTECTED) {
-                       protected_mode(regs);
-//                     printf("<VM86_PROTECTED>\n");
-                       mode = newmode;
-                       return;
-               } else
+               if (mode != VM86_REAL_TO_PROTECTED)
                        panic("unexpected protected mode transition");
+               protected_mode(regs);
                break;
        }
 
        mode = newmode;
-       TRACE((regs, 0, states[mode]));
+       if (mode != VM86_PROTECTED)
+               TRACE((regs, 0, states[mode]));
 }
 
 static void
@@ -1037,25 +1329,19 @@ jmpl(struct regs *regs, int prefix)
        unsigned n = regs->eip;
        unsigned cs, eip;
 
-       if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */
-               eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs);
-               cs = fetch16(regs);
+       eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs);
+       cs = fetch16(regs);
 
-               TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
-
-                regs->cs = cs;
-                regs->eip = eip;
-               set_mode(regs, VM86_PROTECTED);
-       } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */
-               eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs);
-               cs = fetch16(regs);
+       TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
 
-               TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
+       regs->cs = cs;
+       regs->eip = eip;
 
-                regs->cs = cs;
-                regs->eip = eip;
+       if (mode == VM86_REAL_TO_PROTECTED)             /* jump to protected mode */
+               set_mode(regs, VM86_PROTECTED);
+       else if (mode == VM86_PROTECTED_TO_REAL)        /* jump to real mode */
                set_mode(regs, VM86_REAL);
-       } else
+       else
                panic("jmpl");
 }
 
@@ -1066,29 +1352,22 @@ jmpl_indirect(struct regs *regs, int prefix, unsigned modrm)
        unsigned cs, eip;
        unsigned addr;
 
-       addr  = operand(prefix, regs, modrm);
+       addr = operand(prefix, regs, modrm);
 
-       if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */
-               eip = (prefix & DATA32) ? read32(addr) : read16(addr);
-               addr += (prefix & DATA32) ? 4 : 2;
-               cs = read16(addr);
+       eip = (prefix & DATA32) ? read32(addr) : read16(addr);
+       addr += (prefix & DATA32) ? 4 : 2;
+       cs = read16(addr);
 
-               TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
+       TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
 
-                regs->cs = cs;
-                regs->eip = eip;
-               set_mode(regs, VM86_PROTECTED);
-       } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */
-               eip = (prefix & DATA32) ? read32(addr) : read16(addr);
-               addr += (prefix & DATA32) ? 4 : 2;
-               cs = read16(addr);
-
-               TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
+       regs->cs = cs;
+       regs->eip = eip;
 
-                regs->cs = cs;
-                regs->eip = eip;
+       if (mode == VM86_REAL_TO_PROTECTED)             /* jump to protected mode */
+               set_mode(regs, VM86_PROTECTED);
+       else if (mode == VM86_PROTECTED_TO_REAL)        /* jump to real mode */
                set_mode(regs, VM86_REAL);
-       } else
+       else
                panic("jmpl");
 }
 
@@ -1107,15 +1386,14 @@ retl(struct regs *regs, int prefix)
 
        TRACE((regs, 1, "retl (to 0x%x:0x%x)", cs, eip));
 
-       if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */
-                regs->cs = cs;
-                regs->eip = eip;
+       regs->cs = cs;
+       regs->eip = eip;
+
+       if (mode == VM86_REAL_TO_PROTECTED)             /* jump to protected mode */
                set_mode(regs, VM86_PROTECTED);
-       } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */
-                regs->cs = cs;
-                regs->eip = eip;
+       else if (mode == VM86_PROTECTED_TO_REAL)        /* jump to real mode */
                set_mode(regs, VM86_REAL);
-       } else
+       else
                panic("retl");
 }
 
@@ -1166,6 +1444,7 @@ outbyte(struct regs *regs, unsigned prefix, unsigned opc)
                        icw2[0] = 0;
                        printf("Remapping master: ICW2 0x%x -> 0x%x\n",
                                al, NR_EXCEPTION_HANDLER);
+                       rm_irqbase[0] = al;
                        al = NR_EXCEPTION_HANDLER;
                }
                break;
@@ -1179,6 +1458,7 @@ outbyte(struct regs *regs, unsigned prefix, unsigned opc)
                        icw2[1] = 0;
                        printf("Remapping slave: ICW2 0x%x -> 0x%x\n",
                                al, NR_EXCEPTION_HANDLER+8);
+                       rm_irqbase[1] = al;
                        al = NR_EXCEPTION_HANDLER+8;
                }
                break;
@@ -1215,8 +1495,8 @@ pushrm(struct regs *regs, int prefix, unsigned modrm)
        unsigned addr;
        unsigned data;
 
-       addr  = operand(prefix, regs, modrm);
-       
+       addr = operand(prefix, regs, modrm);
+
        if (prefix & DATA32) {
                data = read32(addr);
                push32(regs, data);
@@ -1254,14 +1534,34 @@ opcode(struct regs *regs)
        unsigned opc, modrm, disp;
        unsigned prefix = 0;
 
+       if (mode == VM86_PROTECTED_TO_REAL &&
+               oldctx.cs_arbytes.fields.default_ops_size) {
+               prefix |= DATA32;
+               prefix |= ADDR32;
+       }
+
        for (;;) {
                switch ((opc = fetch8(regs))) {
-               case 0x07:
-                       if (prefix & DATA32)
-                               regs->ves = pop32(regs);
-                       else
-                               regs->ves = pop16(regs);
+
+               case 0x00: /* addr32 add r8, r/m8 */
+               case 0x01: /* addr32 add r16, r/m16 */
+               case 0x03: /* addr32 add r/m16, r16 */
+                       if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
+                               goto invalid;
+                       if ((prefix & ADDR32) == 0)
+                               goto invalid;
+                       if (!add(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
+                       
+               case 0x07: /* pop %es */
+                       regs->ves = (prefix & DATA32) ?
+                               pop32(regs) : pop16(regs);
                        TRACE((regs, regs->eip - eip, "pop %%es"));
+                       if (mode == VM86_REAL_TO_PROTECTED) {
+                               saved_rm_regs.ves = 0;
+                               oldctx.es_sel = regs->ves;
+                       }
                        return OPC_EMULATED;
 
                case 0x0F: /* two byte opcode */
@@ -1293,6 +1593,9 @@ opcode(struct regs *regs)
                                        goto invalid;
                                }
                                break;
+                       case 0x06: /* clts */
+                               oldctx.cr0 &= ~CR0_TS;
+                               return OPC_EMULATED;
                        case 0x09: /* wbinvd */
                                return OPC_EMULATED;
                        case 0x20: /* mov Rd, Cd (1h) */
@@ -1311,6 +1614,16 @@ opcode(struct regs *regs)
                        }
                        goto invalid;
 
+               case 0x1F: /* pop %ds */
+                       regs->vds = (prefix & DATA32) ?
+                               pop32(regs) : pop16(regs);
+                       TRACE((regs, regs->eip - eip, "pop %%ds"));
+                       if (mode == VM86_REAL_TO_PROTECTED) {
+                               saved_rm_regs.vds = 0;
+                               oldctx.ds_sel = regs->vds;
+                       }
+                       return OPC_EMULATED;
+
                case 0x26:
                        TRACE((regs, regs->eip - eip, "%%es:"));
                        prefix |= SEG_ES;
@@ -1328,13 +1641,11 @@ opcode(struct regs *regs)
 
                case 0x39: /* addr32 cmp r16, r/m16 */
                case 0x3B: /* addr32 cmp r/m16, r16 */
-                       if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
+                       if (mode == VM86_PROTECTED_TO_REAL || !(prefix & ADDR32))
                                goto invalid;
-                        if ((prefix & ADDR32) == 0)
-                                goto invalid;
-                        if (!cmp(regs, prefix, opc))
-                                goto invalid;
-                        return OPC_EMULATED;
+                       if (!cmp(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
 
                case 0x3E:
                        TRACE((regs, regs->eip - eip, "%%ds:"));
@@ -1352,57 +1663,54 @@ opcode(struct regs *regs)
                        continue;
 
                case 0x66:
-                       TRACE((regs, regs->eip - eip, "data32"));
-                       prefix |= DATA32;
+                       if (mode == VM86_PROTECTED_TO_REAL &&
+                               oldctx.cs_arbytes.fields.default_ops_size) {
+                               TRACE((regs, regs->eip - eip, "data16"));
+                               prefix &= ~DATA32;
+                       } else {
+                               TRACE((regs, regs->eip - eip, "data32"));
+                               prefix |= DATA32;
+                       }
                        continue;
 
-               case 0x67: 
-                       TRACE((regs, regs->eip - eip, "addr32"));
-                       prefix |= ADDR32;
+               case 0x67:
+                       if (mode == VM86_PROTECTED_TO_REAL &&
+                               oldctx.cs_arbytes.fields.default_ops_size) {
+                               TRACE((regs, regs->eip - eip, "addr16"));
+                               prefix &= ~ADDR32;
+                       } else {
+                               TRACE((regs, regs->eip - eip, "addr32"));
+                               prefix |= ADDR32;
+                       }
                        continue;
 
                case 0x88: /* addr32 mov r8, r/m8 */
                case 0x8A: /* addr32 mov r/m8, r8 */
-                       if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
+                       if (mode == VM86_PROTECTED_TO_REAL || !(prefix & ADDR32))
                                goto invalid;
-                        if ((prefix & ADDR32) == 0)
-                                goto invalid;
-                        if (!movr(regs, prefix, opc))
-                                goto invalid;
-                        return OPC_EMULATED;
-
-               case 0x89: /* addr32 mov r16, r/m16 */
-                       if (mode == VM86_PROTECTED_TO_REAL) {
-                               unsigned modrm = fetch8(regs);
-                               unsigned addr = operand(prefix, regs, modrm);
-                               unsigned val, r = (modrm >> 3) & 7;
-                               
-                               if (prefix & DATA32) {
-                                       val = getreg16(regs, r);
-                                       write32(addr, val);
-                               } else {
-                                       val = getreg32(regs, r);
-                                       write16(addr, MASK16(val));
-                               }
-                               TRACE((regs, regs->eip - eip,
-                                       "mov %%%s, *0x%x", rnames[r], addr));
-                               return OPC_EMULATED;
-                       }
-               case 0x8B: /* addr32 mov r/m16, r16 */
-                       if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
+                       if (!movr(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
+
+               case 0x89: /* mov r16, r/m16 */
+               case 0x8B: /* mov r/m16, r16 */
+                       if (mode != VM86_PROTECTED_TO_REAL && !(prefix & ADDR32))
+                               goto invalid;
+                       if (!movr(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
+
+               case 0x8E: /* mov r16, sreg */
+                       if (!mov_to_seg(regs, prefix, opc))
                                goto invalid;
-                        if ((prefix & ADDR32) == 0)
-                                goto invalid;
-                        if (!movr(regs, prefix, opc))
-                                goto invalid;
-                        return OPC_EMULATED;
+                       return OPC_EMULATED;
 
                case 0x8F: /* addr32 pop r/m16 */
-                        if ((prefix & ADDR32) == 0)
-                                goto invalid;
-                        if (!pop(regs, prefix, opc))
-                                goto invalid;
-                        return OPC_EMULATED;
+                       if (!(prefix & ADDR32))
+                               goto invalid;
+                       if (!pop(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
 
                case 0x90: /* nop */
                        TRACE((regs, regs->eip - eip, "nop"));
@@ -1426,49 +1734,64 @@ opcode(struct regs *regs)
                        regs->eflags |= EFLAGS_VM;
                        return OPC_EMULATED;
 
-               case 0xA1: /* mov ax, r/m16 */ 
-                       {
-                               int addr, data;
-                               int seg = segment(prefix, regs, regs->vds);
-                               int offset = prefix & ADDR32? fetch32(regs) : fetch16(regs);
-
-                               if (prefix & DATA32) {
-                                       addr = address(regs, seg, offset);
-                                       data = read32(addr);
-                                       setreg32(regs, 0, data);
-                               } else {
-                                       addr = address(regs, seg, offset);
-                                       data = read16(addr);
-                                       setreg16(regs, 0, data);
-                               }
-                               TRACE((regs, regs->eip - eip, "mov *0x%x, %%ax", addr));
+               case 0xA1: /* mov ax, r/m16 */
+               {
+                       int addr, data;
+                       int seg = segment(prefix, regs, regs->vds);
+                       int offset = prefix & ADDR32 ? fetch32(regs) : fetch16(regs);
+
+                       if (prefix & DATA32) {
+                               addr = address(regs, seg, offset);
+                               data = read32(addr);
+                               setreg32(regs, 0, data);
+                       } else {
+                               addr = address(regs, seg, offset);
+                               data = read16(addr);
+                               setreg16(regs, 0, data);
                        }
+                       TRACE((regs, regs->eip - eip, "mov *0x%x, %%ax", addr));
+                       return OPC_EMULATED;
+               }
+
+               case 0xA4: /* movsb */
+               case 0xA5: /* movsw */
+                       if ((prefix & ADDR32) == 0)
+                               goto invalid;
+                       if (!movs(regs, prefix, opc))
+                               goto invalid;
                        return OPC_EMULATED;
 
+               case 0xAD: /* lodsw */
+                       if ((prefix & ADDR32) == 0)
+                               goto invalid;
+                       if (!lods(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
+                       
                case 0xBB: /* mov bx, imm16 */
-                       {
-                               int data;
-                               if (prefix & DATA32) {
-                                       data = fetch32(regs);
-                                       setreg32(regs, 3, data);
-                               } else {
-                                       data = fetch16(regs);
-                                       setreg16(regs, 3, data);
-                               }
-                               TRACE((regs, regs->eip - eip, "mov $0x%x, %%bx", data));
+               {
+                       int data;
+                       if (prefix & DATA32) {
+                               data = fetch32(regs);
+                               setreg32(regs, 3, data);
+                       } else {
+                               data = fetch16(regs);
+                               setreg16(regs, 3, data);
                        }
+                       TRACE((regs, regs->eip - eip, "mov $0x%x, %%bx", data));
                        return OPC_EMULATED;
+               }
 
                case 0xC6: /* addr32 movb $imm, r/m8 */
-                        if ((prefix & ADDR32) == 0)
-                                goto invalid;
-                        if (!movr(regs, prefix, opc))
-                                goto invalid;
+                       if (!(prefix & ADDR32))
+                               goto invalid;
+                       if (!movr(regs, prefix, opc))
+                               goto invalid;
                        return OPC_EMULATED;
 
                case 0xCB: /* retl */
-                       if ((mode == VM86_REAL_TO_PROTECTED) ||
-                           (mode == VM86_PROTECTED_TO_REAL)) {
+                       if (mode == VM86_REAL_TO_PROTECTED ||
+                               mode == VM86_PROTECTED_TO_REAL) {
                                retl(regs, prefix);
                                return OPC_INVALID;
                        }
@@ -1505,37 +1828,37 @@ opcode(struct regs *regs)
                        return OPC_EMULATED;
 
                case 0xEA: /* jmpl */
-                       if ((mode == VM86_REAL_TO_PROTECTED) ||
-                           (mode == VM86_PROTECTED_TO_REAL)) {
+                       if (mode == VM86_REAL_TO_PROTECTED ||
+                               mode == VM86_PROTECTED_TO_REAL) {
                                jmpl(regs, prefix);
                                return OPC_INVALID;
                        }
                        goto invalid;
 
-               case 0xFF: /* jmpl (indirect) */
-                       {
-                               unsigned modrm = fetch8(regs);
-                               switch((modrm >> 3) & 7) {
-                               case 5: /* jmpl (indirect) */
-                                       if ((mode == VM86_REAL_TO_PROTECTED) ||
-                                           (mode == VM86_PROTECTED_TO_REAL)) {
-                                               jmpl_indirect(regs, prefix, modrm);
-                                               return OPC_INVALID;
-                                       }
-                                       goto invalid;
+               case 0xFF:
+               {
+                       unsigned modrm = fetch8(regs);
+                       switch((modrm >> 3) & 7) {
+                       case 5: /* jmpl (indirect) */
+                               if (mode == VM86_REAL_TO_PROTECTED ||
+                                       mode == VM86_PROTECTED_TO_REAL) {
+                                       jmpl_indirect(regs, prefix, modrm);
+                                       return OPC_INVALID;
+                               }
+                               goto invalid;
 
-                               case 6: /* push r/m16 */
-                                       pushrm(regs, prefix, modrm);
-                                       return OPC_EMULATED;
+                       case 6: /* push r/m16 */
+                               pushrm(regs, prefix, modrm);
+                               return OPC_EMULATED;
 
-                               default:
-                                       goto invalid;
-                               }
+                       default:
+                               goto invalid;
                        }
+               }
 
                case 0xEB: /* short jump */
-                       if ((mode == VM86_REAL_TO_PROTECTED) ||
-                           (mode == VM86_PROTECTED_TO_REAL)) {
+                       if (mode == VM86_REAL_TO_PROTECTED ||
+                               mode == VM86_PROTECTED_TO_REAL) {
                                disp = (char) fetch8(regs);
                                TRACE((regs, 2, "jmp 0x%x", regs->eip + disp));
                                regs->eip += disp;
@@ -1557,11 +1880,21 @@ opcode(struct regs *regs)
                        TRACE((regs, regs->eip - eip, "lock"));
                        continue;
 
+               case 0xF4: /* hlt */
+                       TRACE((regs, regs->eip - eip, "hlt"));
+                       /* Do something power-saving here! */
+                       return OPC_EMULATED;
+
+               case 0xF3: /* rep/repe/repz */
+                       TRACE((regs, regs->eip - eip, "rep"));
+                       prefix |= REP;
+                       continue;
+
                case 0xF6: /* addr32 testb $imm, r/m8 */
-                        if ((prefix & ADDR32) == 0)
-                                goto invalid;
-                        if (!test(regs, prefix, opc))
-                                goto invalid;
+                       if (!(prefix & ADDR32))
+                               goto invalid;
+                       if (!test(regs, prefix, opc))
+                               goto invalid;
                        return OPC_EMULATED;
 
                case 0xFA: /* cli */
@@ -1590,6 +1923,7 @@ emulate(struct regs *regs)
 {
        unsigned flteip;
        int nemul = 0;
+       unsigned ip;
 
        /* emulate as many instructions as possible */
        while (opcode(regs) != OPC_INVALID)
@@ -1598,6 +1932,12 @@ emulate(struct regs *regs)
        /* detect the case where we are not making progress */
        if (nemul == 0 && prev_eip == regs->eip) {
                flteip = address(regs, MASK16(regs->cs), regs->eip);
+
+               printf("Undecoded sequence: \n");
+               for (ip=flteip; ip < flteip+16; ip++)
+                       printf("0x%02x ", read8(ip));
+               printf("\n");
+
                panic("Unknown opcode at %04x:%04x=0x%x",
                        MASK16(regs->cs), regs->eip, flteip);
        } else
@@ -1621,6 +1961,8 @@ trap(int trapno, int errno, struct regs *regs)
        case 1: /* Debug */
                if (regs->eflags & EFLAGS_VM) {
                        /* emulate any 8086 instructions  */
+                       if (mode == VM86_REAL)
+                               return;
                        if (mode != VM86_REAL_TO_PROTECTED)
                                panic("not in real-to-protected mode");
                        emulate(regs);
@@ -1641,7 +1983,7 @@ trap(int trapno, int errno, struct regs *regs)
        default:
        invalid:
                printf("Trap (0x%x) while in %s mode\n",
-                   trapno, regs->eflags & EFLAGS_VM ? "real" : "protected");
+                       trapno, regs->eflags & EFLAGS_VM ? "real" : "protected");
                if (trapno == 14)
                        printf("Page fault address 0x%x\n", get_cr2());
                dump_regs(regs);
index e0997e4..64c24aa 100644 (file)
 
 #include "vmx_assist.h"
 
-#define        NR_EXCEPTION_HANDLER    32
-#define        NR_INTERRUPT_HANDLERS   16
-#define        NR_TRAPS                (NR_EXCEPTION_HANDLER+NR_INTERRUPT_HANDLERS)
-
 #ifndef __ASSEMBLY__
 
 struct regs {
-        unsigned       edi, esi, ebp, esp, ebx, edx, ecx, eax;
-        unsigned       ds, es, fs, gs;
-        unsigned       trapno, errno;
-        unsigned       eip, cs, eflags, uesp, uss;
-        unsigned       ves, vds, vfs, vgs;
+       unsigned        edi, esi, ebp, esp, ebx, edx, ecx, eax;
+       unsigned        trapno, errno;
+       unsigned        eip, cs, eflags, uesp, uss;
+       unsigned        ves, vds, vfs, vgs;
 };
 
 enum vm86_mode {
@@ -55,7 +50,6 @@ enum vm86_mode {
 
 extern enum vm86_mode prevmode, mode;
 extern struct vmx_assist_context oldctx;
-extern struct vmx_assist_context newctx;
 
 extern void emulate(struct regs *);
 extern void dump_regs(struct regs *);
index f987b0f..4ef17fe 100644 (file)
@@ -1,6 +1,24 @@
 /*
  * vmx_assist.h: Context definitions for the VMXASSIST world switch.
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Leendert van Doorn, leendert@watson.ibm.com
  * Copyright (c) 2005, International Business Machines Corporation.
  */
 
 #ifndef __ASSEMBLY__
 
+#define NR_EXCEPTION_HANDLER    32
+#define NR_INTERRUPT_HANDLERS   16
+#define NR_TRAPS        (NR_EXCEPTION_HANDLER+NR_INTERRUPT_HANDLERS)
+
 union vmcs_arbytes {
     struct arbyte_fields {
         unsigned int seg_type : 4,
@@ -80,6 +102,8 @@ struct vmx_assist_context {
     uint32_t  ldtr_limit;
     uint32_t  ldtr_base;
     union vmcs_arbytes ldtr_arbytes;
+
+    unsigned char rm_irqbase[2];
 };
 typedef struct vmx_assist_context vmx_assist_context_t;
 
index 1cafb02..e93a308 100644 (file)
Binary files a/bios/vmxassist/vmxassist.bin and b/bios/vmxassist/vmxassist.bin differ
index e5fe949..eebd289 100644 (file)
@@ -70,10 +70,8 @@ struct vmx_basic_msr {
 }  __attribute__((packed));
 
 typedef enum { 
-    VMXASSIST_STARTUP,
-    VMXASSIST_V8086_BIOS,
-    VMXASSIST_V8086,
-    NORMAL 
+    VMXASSIST_DISABLED,
+    VMXASSIST_ENABLED
 } vmx_state_t;
 
 struct tss_descriptor {
diff --git a/palacios/include/palacios/vmx_assist.h b/palacios/include/palacios/vmx_assist.h
new file mode 100644 (file)
index 0000000..31b9a37
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * vmx_assist.h: Context definitions for the VMXASSIST world switch.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Leendert van Doorn, leendert@watson.ibm.com
+ * Copyright (c) 2005, International Business Machines Corporation.
+ */
+
+#ifndef _VMX_ASSIST_H_
+#define _VMX_ASSIST_H_
+
+#include <palacios/vm_guest.h>
+
+#define VMXASSIST_BASE         0xD0000
+#define VMXASSIST_MAGIC        0x17101966
+#define VMXASSIST_MAGIC_OFFSET (VMXASSIST_BASE+8)
+
+#define VMXASSIST_NEW_CONTEXT (VMXASSIST_BASE + 12)
+#define VMXASSIST_OLD_CONTEXT (VMXASSIST_NEW_CONTEXT + 4)
+
+#ifndef __ASSEMBLY__
+
+#define NR_EXCEPTION_HANDLER    32
+#define NR_INTERRUPT_HANDLERS   16
+#define NR_TRAPS        (NR_EXCEPTION_HANDLER+NR_INTERRUPT_HANDLERS)
+
+union vmcs_arbytes {
+    struct arbyte_fields {
+        unsigned int seg_type : 4,
+            s         : 1,
+            dpl       : 2,
+            p         : 1,
+            reserved0 : 4,
+            avl       : 1,
+            reserved1 : 1,
+            default_ops_size: 1,
+            g         : 1,
+            null_bit  : 1,
+            reserved2 : 15;
+    } fields;
+    unsigned int bytes;
+};
+
+/*
+ * World switch state
+ */
+struct vmx_assist_context {
+    uint32_t  eip;        /* execution pointer */
+    uint32_t  esp;        /* stack pointer */
+    uint32_t  eflags;     /* flags register */
+    uint32_t  cr0;
+    uint32_t  cr3;        /* page table directory */
+    uint32_t  cr4;
+    uint32_t  idtr_limit; /* idt */
+    uint32_t  idtr_base;
+    uint32_t  gdtr_limit; /* gdt */
+    uint32_t  gdtr_base;
+    uint32_t  cs_sel;     /* cs selector */
+    uint32_t  cs_limit;
+    uint32_t  cs_base;
+    union vmcs_arbytes cs_arbytes;
+    uint32_t  ds_sel;     /* ds selector */
+    uint32_t  ds_limit;
+    uint32_t  ds_base;
+    union vmcs_arbytes ds_arbytes;
+    uint32_t  es_sel;     /* es selector */
+    uint32_t  es_limit;
+    uint32_t  es_base;
+    union vmcs_arbytes es_arbytes;
+    uint32_t  ss_sel;     /* ss selector */
+    uint32_t  ss_limit;
+    uint32_t  ss_base;
+    union vmcs_arbytes ss_arbytes;
+    uint32_t  fs_sel;     /* fs selector */
+    uint32_t  fs_limit;
+    uint32_t  fs_base;
+    union vmcs_arbytes fs_arbytes;
+    uint32_t  gs_sel;     /* gs selector */
+    uint32_t  gs_limit;
+    uint32_t  gs_base;
+    union vmcs_arbytes gs_arbytes;
+    uint32_t  tr_sel;     /* task selector */
+    uint32_t  tr_limit;
+    uint32_t  tr_base;
+    union vmcs_arbytes tr_arbytes;
+    uint32_t  ldtr_sel;   /* ldtr selector */
+    uint32_t  ldtr_limit;
+    uint32_t  ldtr_base;
+    union vmcs_arbytes ldtr_arbytes;
+
+    unsigned char rm_irqbase[2];
+};
+typedef struct vmx_assist_context vmx_assist_context_t;
+
+int v3_vmxassist_ctx_switch(struct guest_info * info);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _VMX_ASSIST_H_ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/palacios/include/palacios/vmx_ctrl_regs.h b/palacios/include/palacios/vmx_ctrl_regs.h
new file mode 100644 (file)
index 0000000..f35e878
--- /dev/null
@@ -0,0 +1,5 @@
+
+#include <palacios/vm_guest.h>
+#include <palacios/vmm_ctrl_regs.h>
+
+int v3_vmx_handle_cr0_write(struct guest_info * info, v3_reg_t new_val);
index 7525edb..1507e4a 100644 (file)
@@ -85,7 +85,7 @@ struct vmexit_io_qual {
     uint32_t access_size : 3; // (0: 1 Byte ;; 1: 2 Bytes ;; 3: 4 Bytes)
     uint32_t dir        : 1; // (0: Out ;; 1: In)
     uint32_t string     : 1; // (0: not string ;; 1: string)
-    uint32_t REP        : 1; // (0: not REP ;; 1: REP)
+    uint32_t rep        : 1; // (0: not REP ;; 1: REP)
     uint32_t op_enc      : 1; // (0: DX ;; 1: immediate)
     uint32_t rsvd       : 9; // Set to 0
     uint32_t port       : 16; // IO Port Number
index 2530a7f..3c2ebbd 100644 (file)
@@ -1,3 +1,25 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+#ifndef __VMX_IO_H__
+#define __VMX_IO_H__
+
+#ifdef __V3VEE__
 
 #include <palacios/vm_guest.h>
 
@@ -8,3 +30,6 @@ int v3_handle_vmx_io_ins(struct guest_info * info);
 int v3_handle_vmx_io_out(struct guest_info * info);
 int v3_handle_vmx_io_outs(struct guest_info * info);
 
+
+#endif
+#endif
index b654414..81872e0 100644 (file)
@@ -22,6 +22,7 @@
 
 #ifdef __V3VEE__
 
+#include <palacios/vmcs.h>
 
 #define VMX_SUCCESS         0 
 #define VMX_FAIL_INVALID    1
@@ -134,10 +135,10 @@ static inline int vmcs_read(vmcs_field_t vmcs_field, void * dst) {
     __asm__ __volatile__ (  
                 VMREAD_OPCODE
                 EAX_ECX_MODRM
-                "seteb %0;" // fail valid
-                "setnaeb %1;" // fail invalid
-                : "=q"(ret_valid), "=q"(ret_invalid), "=c"(val) // Use ECX
-                : "a" (vmcs_field), "0"(ret_valid), "1"(ret_invalid)
+                "seteb %1;" // fail valid
+                "setnaeb %2;" // fail invalid
+                :  "=&c"(val), "=q"(ret_valid), "=q"(ret_invalid) // Use ECX
+                : "a" (vmcs_field), "1"(ret_valid), "2"(ret_invalid)
                 : "memory"
                 );
 
index 4b89491..ad1d7a9 100644 (file)
@@ -1,4 +1,30 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2009, Andy Gocke <agocke@gmail.com>
+ * Copyright (c) 2009, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Andy Gocke <agocke@gmail.com>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __VMX_MSR_H__
+#define __VMX_MSR_H__
+
+#ifdef __V3VEE__
 
 #include <palacios/vm_guest.h>
 
 int v3_init_vmx_msr_map(struct guest_info * info);
+
+#endif
+#endif
index 3138c64..e11944f 100644 (file)
@@ -1157,7 +1157,7 @@ static int ide_read_data_port(ushort_t port, void * dst, uint_t length, struct v
     struct ide_channel * channel = get_selected_channel(ide, port);
     struct ide_drive * drive = get_selected_drive(channel);
 
-    //    PrintDebug("IDE: Reading Data Port %x (len=%d)\n", port, length);
+       PrintDebug("IDE: Reading Data Port %x (len=%d)\n", port, length);
 
     if ((channel->cmd_reg == 0xec) ||
        (channel->cmd_reg == 0xa1)) {
index 87670c7..4a3baac 100644 (file)
@@ -93,7 +93,7 @@ static int cd_init(struct guest_info * vm, void * cfg_data) {
 
     cd = (struct cd_state *)V3_Malloc(sizeof(struct cd_state));
 
-    PrintDebug("Registering Ram CD at %p (size=%d)\n", (void *)ramdisk, size);
+    PrintDebug("Registering Ram CD at %p (size=%d)\n", (void *)cfg->ramdisk, cfg->size);
 
   
     cd->disk_image = cfg->ramdisk;
index 6a5730c..c90e5be 100644 (file)
@@ -108,7 +108,7 @@ static int hd_init(struct guest_info * vm, void * cfg_data) {
 
     hd = (struct hd_state *)V3_Malloc(sizeof(struct hd_state));
 
-    PrintDebug("Registering Ram HDD at %p (size=%d)\n", (void *)ramdisk, size);
+    PrintDebug("Registering Ram HDD at %p (size=%d)\n", (void *)cfg->ramdisk, cfg->size);
 
     hd->disk_image = cfg->ramdisk;
     hd->capacity = cfg->size;
index 87ee4d8..fb2623e 100644 (file)
@@ -46,7 +46,9 @@ obj-$(CONFIG_VMX) +=          vmx.o \
                        vmx_io.o \
                        vmx_lowlevel.o \
                        vmx_msr.o \
-                       vmcs.o
+                       vmcs.o \
+                       vmx_ctrl_regs.o \
+                       vmx_assist.o
 
 
 
index 632daab..226839c 100644 (file)
@@ -21,7 +21,6 @@
 
 
 #include <palacios/vmx.h>
-#include <palacios/vmcs.h>
 #include <palacios/vmm.h>
 #include <palacios/vmx_lowlevel.h>
 #include <palacios/vmm_lowlevel.h>
@@ -355,48 +354,6 @@ static addr_t allocate_vmcs()
     return (addr_t)V3_PAddr((void *)vmcs_page);
 }
 
-#if 0
-
-#endif
-
-#if 0
-static int init_vmcs_bios(struct guest_info * vm_info) 
-{
-#if 0
-
-    setup_v8086_mode_for_boot(vm_info);
-
-
-    // Setup guest state 
-    // TODO: This is not 32-bit safe!
-    vmx_ret |= check_vmcs_write(VMCS_GUEST_RIP, vm_info->rip);
-    vmx_ret |= check_vmcs_write(VMCS_GUEST_RSP, vm_info->vm_regs.rsp);
-    
-
-    vmx_ret |= check_vmcs_write(VMCS_GUEST_CR0, vm_info->ctrl_regs.cr0);
-    vmx_ret |= check_vmcs_write(VMCS_GUEST_CR4, vm_info->ctrl_regs.cr4);
-
-    vmx_ret |= vmcs_write_guest_segments(vm_info);
-
-    vmx_ret |= check_vmcs_write(VMCS_GUEST_RFLAGS, vm_info->ctrl_regs.rflags);
-#define DEBUGCTL_MSR 0x1d9
-
-    v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
-    vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
-
-    vmx_ret |= check_vmcs_write(VMCS_GUEST_DR7, 0x400);
-
-    vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, 0xffffffffffffffff);
-
-    if (vmx_ret != 0) {
-       PrintError("Could not initialize VMCS segments\n");
-        return -1;
-    }
-
-#endif
-    return 0;
-}
-#endif
 
 static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config_ptr) {
     v3_pre_config_guest(info, config_ptr);
@@ -492,6 +449,10 @@ static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config
   
 
     /********** Setup and VMX Control Fields from MSR ***********/
+    /* Setup IO map */
+    (void) v3_init_vmx_io_map(info);
+    (void) v3_init_vmx_msr_map(info);
+
     struct v3_msr tmp_msr;
 
     v3_get_msr(VMX_PINBASED_CTLS_MSR,&(tmp_msr.hi),&(tmp_msr.lo));
@@ -499,7 +460,15 @@ static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config
     vmx_data->pinbased_ctrls =  tmp_msr.lo | NMI_EXIT;
 
     v3_get_msr(VMX_PROCBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
-    vmx_data->pri_procbased_ctrls = tmp_msr.lo;
+
+    PrintDebug("MSR High: 0x%x\n", tmp_msr.hi);
+    vmx_data->pri_procbased_ctrls = tmp_msr.lo | USE_IO_BITMAPS ;
+
+    vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->io_map.arch_data));
+    vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
+            (addr_t)V3_PAddr(info->io_map.arch_data) + PAGE_SIZE_4KB); 
+
+    vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(info->msr_map.arch_data));
 
     v3_get_msr(VMX_EXIT_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
     vmx_data->exit_ctrls = tmp_msr.lo ;
@@ -509,6 +478,7 @@ static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config
 
     struct vmx_exception_bitmap excp_bmap;
     excp_bmap.value = 0xffffffff;
+    excp_bmap.gp = 0;
     vmx_ret |= check_vmcs_write(VMCS_EXCP_BITMAP, excp_bmap.value);
 
 
@@ -552,10 +522,9 @@ static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config
 
         // vmx_data->pinbased_ctrls |= NMI_EXIT;
 
-        /* Add unconditional I/O and CR exits */
-        vmx_data->pri_procbased_ctrls |= UNCOND_IO_EXIT  
-                                        | CR3_LOAD_EXIT  
-                                        | CR3_STORE_EXIT;
+        /* Add CR exits */
+        vmx_data->pri_procbased_ctrls |= CR3_LOAD_EXIT  
+                                      | CR3_STORE_EXIT;
  
         vmx_data->exit_ctrls |= HOST_ADDR_SPACE_SIZE;
     }
@@ -589,10 +558,7 @@ static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config
     info->segments.ldtr.present = 1;
     info->segments.ldtr.granularity = 0;
     
-    /* Setup IO map */
-    (void) v3_init_vmx_io_map(info);
-    (void) v3_init_vmx_msr_map(info);
-
+    
     /************* Map in GDT and vmxassist *************/
 
     uint64_t  gdt[] __attribute__ ((aligned(32))) = {
@@ -671,7 +637,7 @@ static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config
 
     v3_print_vmcs();
 
-    vmx_data->state = VMXASSIST_STARTUP;
+    vmx_data->state = VMXASSIST_DISABLED;
 
     v3_post_config_guest(info, config_ptr);
 
diff --git a/palacios/src/palacios/vmx_assist.c b/palacios/src/palacios/vmx_assist.c
new file mode 100644 (file)
index 0000000..8a12fe7
--- /dev/null
@@ -0,0 +1,217 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Andy Gocke <agocke@gmail.com>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Andy Gocke <agocke@gmail.com>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmx_assist.h>
+#include <palacios/vmx_lowlevel.h>
+#include <palacios/vm_guest_mem.h>
+#include <palacios/vmx.h>
+
+static int vmx_save_world_ctx(struct guest_info * info, struct vmx_assist_context * ctx);
+static int vmx_restore_world_ctx(struct guest_info * info, struct vmx_assist_context * ctx);
+
+int v3_vmxassist_ctx_switch(struct guest_info * info) {
+    uint32_t vmx_magic = 0; // Magic number to check for vmxassist
+    struct vmx_assist_context * old_ctx = NULL;
+    struct vmx_assist_context * new_ctx = NULL;        
+    uint32_t old_ctx_gpa = 0;
+    uint32_t new_ctx_gpa = 0;
+    vmx_state_t state = ((struct vmx_data *)info->vmm_data)->state;
+
+    /* Check validity of VMXASSIST_MAGIC field */
+    if (read_guest_pa_memory(info, VMXASSIST_MAGIC_OFFSET, sizeof(vmx_magic), (uint8_t *)&vmx_magic) != sizeof(vmx_magic)) {
+       PrintError("Could not read guest VMXASSIST magic field\n");
+       return -1;
+    }
+
+    if (vmx_magic != VMXASSIST_MAGIC) {
+       PrintError("VMXASSIT_MAGIC field is invalid\n");
+        return -1;
+    }
+
+
+    /* Retrieve the pointer to the Old Context struct */
+    if (read_guest_pa_memory(info, VMXASSIST_OLD_CONTEXT, sizeof(old_ctx_gpa), (uint8_t *)&old_ctx_gpa) != sizeof(old_ctx_gpa)) {
+       PrintError("Could not read Old Context pointer field\n");
+       return -1;
+    }
+    
+    guest_pa_to_host_va(info, (addr_t)old_ctx_gpa, (addr_t *)&(old_ctx));
+    
+
+    /* Retrieve the pointer to the New Context struct */
+    if (read_guest_pa_memory(info, VMXASSIST_NEW_CONTEXT, sizeof(new_ctx_gpa), (uint8_t *)&new_ctx_gpa) != sizeof(new_ctx_gpa)) {
+       PrintError("Could not read New Context pointer field\n");
+       return -1;
+    }
+    
+    guest_pa_to_host_va(info, (addr_t)new_ctx_gpa, (addr_t *)&(new_ctx));
+    
+
+
+    if (state == VMXASSIST_DISABLED) {
+
+       /* Save the old Context */
+        if (vmx_save_world_ctx(info, old_ctx) != 0) {
+           PrintError("Could not save VMXASSIST world context\n");
+            return -1;
+       }
+
+        /* restore new context, vmxassist should launch the bios the first time */
+        if (vmx_restore_world_ctx(info, new_ctx) != 0) {
+           PrintError("VMXASSIST could not restore new context\n");
+            return -1;
+       }
+
+    } else if (state == VMXASSIST_ENABLED) {
+        /* restore old context */
+        if (vmx_restore_world_ctx(info, old_ctx) != 0) {
+           PrintError("VMXASSIST could not restore old context\n");
+            return -1;
+       }
+    }
+
+    return 0;
+}
+
+        
+int vmx_save_world_ctx(struct guest_info * info, struct vmx_assist_context * ctx) {
+    int error = 0;
+
+    PrintDebug("Writing from RIP: 0x%p\n", (void *)info->rip);
+
+    error |= vmcs_read(VMCS_GUEST_RIP, &(ctx->eip));
+    error |= vmcs_read(VMCS_GUEST_RSP, &(ctx->esp));
+    error |= vmcs_read(VMCS_GUEST_RFLAGS, &(ctx->eflags));
+
+    error |= vmcs_read(VMCS_CR0_READ_SHDW, &(ctx->cr0));
+    ctx->cr3 = info->shdw_pg_state.guest_cr3;
+    error |= vmcs_read(VMCS_CR4_READ_SHDW, &(ctx->cr4));
+
+    error |= vmcs_read(VMCS_GUEST_IDTR_LIMIT, &(ctx->idtr_limit));
+    error |= vmcs_read(VMCS_GUEST_IDTR_BASE, &(ctx->idtr_base));
+
+    error |= vmcs_read(VMCS_GUEST_GDTR_LIMIT, &(ctx->gdtr_limit));
+    error |= vmcs_read(VMCS_GUEST_GDTR_BASE, &(ctx->gdtr_base));
+
+    error |= vmcs_read(VMCS_GUEST_CS_SELECTOR, &(ctx->cs_sel));
+    error |= vmcs_read(VMCS_GUEST_CS_LIMIT, &(ctx->cs_limit));
+    error |= vmcs_read(VMCS_GUEST_CS_BASE, &(ctx->cs_base));
+    error |= vmcs_read(VMCS_GUEST_CS_ACCESS, &(ctx->cs_arbytes.bytes));
+
+    error |= vmcs_read(VMCS_GUEST_DS_SELECTOR, &(ctx->ds_sel));
+    error |= vmcs_read(VMCS_GUEST_DS_LIMIT, &(ctx->ds_limit));
+    error |= vmcs_read(VMCS_GUEST_DS_BASE, &(ctx->ds_base));
+    error |= vmcs_read(VMCS_GUEST_DS_ACCESS, &(ctx->ds_arbytes.bytes));
+
+    error |= vmcs_read(VMCS_GUEST_ES_SELECTOR, &(ctx->es_sel));
+    error |= vmcs_read(VMCS_GUEST_ES_LIMIT, &(ctx->es_limit));
+    error |= vmcs_read(VMCS_GUEST_ES_BASE, &(ctx->es_base));
+    error |= vmcs_read(VMCS_GUEST_ES_ACCESS, &(ctx->es_arbytes.bytes));
+
+    error |= vmcs_read(VMCS_GUEST_SS_SELECTOR, &(ctx->ss_sel));
+    error |= vmcs_read(VMCS_GUEST_SS_LIMIT, &(ctx->ss_limit));
+    error |= vmcs_read(VMCS_GUEST_SS_BASE, &(ctx->ss_base));
+    error |= vmcs_read(VMCS_GUEST_SS_ACCESS, &(ctx->ss_arbytes.bytes));
+
+    error |= vmcs_read(VMCS_GUEST_FS_SELECTOR, &(ctx->fs_sel));
+    error |= vmcs_read(VMCS_GUEST_FS_LIMIT, &(ctx->fs_limit));
+    error |= vmcs_read(VMCS_GUEST_FS_BASE, &(ctx->fs_base));
+    error |= vmcs_read(VMCS_GUEST_FS_ACCESS, &(ctx->fs_arbytes.bytes));
+
+    error |= vmcs_read(VMCS_GUEST_GS_SELECTOR, &(ctx->gs_sel));
+    error |= vmcs_read(VMCS_GUEST_GS_LIMIT, &(ctx->gs_limit));
+    error |= vmcs_read(VMCS_GUEST_GS_BASE, &(ctx->gs_base));
+    error |= vmcs_read(VMCS_GUEST_GS_ACCESS, &(ctx->gs_arbytes.bytes));
+
+    error |= vmcs_read(VMCS_GUEST_TR_SELECTOR, &(ctx->tr_sel));
+    error |= vmcs_read(VMCS_GUEST_TR_LIMIT, &(ctx->tr_limit));
+    error |= vmcs_read(VMCS_GUEST_TR_BASE, &(ctx->tr_base));
+    error |= vmcs_read(VMCS_GUEST_TR_ACCESS, &(ctx->tr_arbytes.bytes));
+
+    error |= vmcs_read(VMCS_GUEST_LDTR_SELECTOR, &(ctx->ldtr_sel));
+    error |= vmcs_read(VMCS_GUEST_LDTR_LIMIT, &(ctx->ldtr_limit));
+    error |= vmcs_read(VMCS_GUEST_LDTR_BASE, &(ctx->ldtr_base));
+    error |= vmcs_read(VMCS_GUEST_LDTR_ACCESS, &(ctx->ldtr_arbytes.bytes));
+
+    return error;
+}
+
+int vmx_restore_world_ctx(struct guest_info * info, struct vmx_assist_context * ctx) {
+    int error = 0;
+
+    PrintDebug("ctx rip: %p\n", (void *)(addr_t)ctx->eip);
+
+    error |= vmcs_write(VMCS_GUEST_RIP, ctx->eip);
+    error |= vmcs_write(VMCS_GUEST_RSP, ctx->esp);
+    error |= vmcs_write(VMCS_GUEST_RFLAGS, ctx->eflags);
+
+    error |= vmcs_write(VMCS_CR0_READ_SHDW, ctx->cr0);
+    info->shdw_pg_state.guest_cr3 = ctx->cr3;
+    error |= vmcs_write(VMCS_CR4_READ_SHDW, ctx->cr4);
+
+    error |= vmcs_write(VMCS_GUEST_IDTR_LIMIT, ctx->idtr_limit);
+    error |= vmcs_write(VMCS_GUEST_IDTR_BASE, ctx->idtr_base);
+
+    error |= vmcs_write(VMCS_GUEST_GDTR_LIMIT, ctx->gdtr_limit);
+    error |= vmcs_write(VMCS_GUEST_GDTR_BASE, ctx->gdtr_base);
+
+    error |= vmcs_write(VMCS_GUEST_CS_SELECTOR, ctx->cs_sel);
+    error |= vmcs_write(VMCS_GUEST_CS_LIMIT, ctx->cs_limit);
+    error |= vmcs_write(VMCS_GUEST_CS_BASE, ctx->cs_base);
+    error |= vmcs_write(VMCS_GUEST_CS_ACCESS, ctx->cs_arbytes.bytes);
+
+    error |= vmcs_write(VMCS_GUEST_DS_SELECTOR, ctx->ds_sel);
+    error |= vmcs_write(VMCS_GUEST_DS_LIMIT, ctx->ds_limit);
+    error |= vmcs_write(VMCS_GUEST_DS_BASE, ctx->ds_base);
+    error |= vmcs_write(VMCS_GUEST_DS_ACCESS, ctx->ds_arbytes.bytes);
+
+    error |= vmcs_write(VMCS_GUEST_ES_SELECTOR, ctx->es_sel);
+    error |= vmcs_write(VMCS_GUEST_ES_LIMIT, ctx->es_limit);
+    error |= vmcs_write(VMCS_GUEST_ES_BASE, ctx->es_base);
+    error |= vmcs_write(VMCS_GUEST_ES_ACCESS, ctx->es_arbytes.bytes);
+
+    error |= vmcs_write(VMCS_GUEST_SS_SELECTOR, ctx->ss_sel);
+    error |= vmcs_write(VMCS_GUEST_SS_LIMIT, ctx->ss_limit);
+    error |= vmcs_write(VMCS_GUEST_SS_BASE, ctx->ss_base);
+    error |= vmcs_write(VMCS_GUEST_SS_ACCESS, ctx->ss_arbytes.bytes);
+
+    error |= vmcs_write(VMCS_GUEST_FS_SELECTOR, ctx->fs_sel);
+    error |= vmcs_write(VMCS_GUEST_FS_LIMIT, ctx->fs_limit);
+    error |= vmcs_write(VMCS_GUEST_FS_BASE, ctx->fs_base);
+    error |= vmcs_write(VMCS_GUEST_FS_ACCESS, ctx->fs_arbytes.bytes);
+
+    error |= vmcs_write(VMCS_GUEST_GS_SELECTOR, ctx->gs_sel);
+    error |= vmcs_write(VMCS_GUEST_GS_LIMIT, ctx->gs_limit);
+    error |= vmcs_write(VMCS_GUEST_GS_BASE, ctx->gs_base);
+    error |= vmcs_write(VMCS_GUEST_GS_ACCESS, ctx->gs_arbytes.bytes);
+
+    error |= vmcs_write(VMCS_GUEST_TR_SELECTOR, ctx->tr_sel);
+    error |= vmcs_write(VMCS_GUEST_TR_LIMIT, ctx->tr_limit);
+    error |= vmcs_write(VMCS_GUEST_TR_BASE, ctx->tr_base);
+    error |= vmcs_write(VMCS_GUEST_TR_ACCESS, ctx->tr_arbytes.bytes);
+
+    error |= vmcs_write(VMCS_GUEST_LDTR_SELECTOR, ctx->ldtr_sel);
+    error |= vmcs_write(VMCS_GUEST_LDTR_LIMIT, ctx->ldtr_limit);
+    error |= vmcs_write(VMCS_GUEST_LDTR_BASE, ctx->ldtr_base);
+    error |= vmcs_write(VMCS_GUEST_LDTR_ACCESS, ctx->ldtr_arbytes.bytes);
+
+    return error;
+}
+
+
diff --git a/palacios/src/palacios/vmx_ctrl_regs.c b/palacios/src/palacios/vmx_ctrl_regs.c
new file mode 100644 (file)
index 0000000..10503b4
--- /dev/null
@@ -0,0 +1,65 @@
+
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Andy Gocke <agocke@gmail.com>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Andy Gocke <agocke@gmail.com>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmx_ctrl_regs.h>
+#include <palacios/vmm.h>
+#include <palacios/vmx_lowlevel.h>
+#include <palacios/vmx.h>
+#include <palacios/vmx_assist.h>
+#include <palacios/vm_guest_mem.h>
+
+static int handle_mov_to_cr0(struct guest_info * info, v3_reg_t new_val);
+
+int v3_vmx_handle_cr0_write(struct guest_info * info, v3_reg_t new_val) {
+    return handle_mov_to_cr0(info, new_val);
+}
+
+static int handle_mov_to_cr0(struct guest_info * info, v3_reg_t new_val) {
+    PrintDebug("CR0 RIP: %p\n", (void *)info->rip);
+
+    struct cr0_32 * guest_cr0 = (struct cr0_32 *)&(info->ctrl_regs.cr0);
+    struct cr0_32 * new_cr0 = (struct cr0_32 *)&new_val;
+    struct cr0_32 * shadow_cr0 = (struct cr0_32 *)&(info->shdw_pg_state.guest_cr0);
+
+    // PG and PE are always enabled for VMX
+
+    // Check if this is a paging transition
+    PrintDebug("Old CR0: 0x%x\n", *(uint32_t *)guest_cr0);
+    PrintDebug("Old shadow CR0: 0x%x\n", *(uint32_t *)shadow_cr0);
+    PrintDebug("New CR0: 0x%x\n", *(uint32_t *)new_cr0);
+            
+    if ( new_cr0->pe ) {
+
+        if (v3_vmxassist_ctx_switch(info) != 0) {
+            PrintError("Unable to execute VMXASSIST context switch!\n");
+            return -1;
+        }
+
+        ((struct vmx_data *)info->vmm_data)->state = VMXASSIST_DISABLED;
+
+        PrintDebug("New Shadow: 0x%x\n", *(uint32_t *)shadow_cr0);
+        PrintDebug("mem_mode: %s\n", v3_mem_mode_to_str(v3_get_vm_mem_mode(info))); 
+
+        return 0;
+    }
+
+    return -1;
+}
+
index d6eebdc..f872aee 100644 (file)
 #include <palacios/vmx_io.h>
 #include <palacios/vmx.h>
 #include <palacios/vmm_ctrl_regs.h>
+#include <palacios/vmm_lowlevel.h>
+#include <palacios/vmx_ctrl_regs.h>
+#include <palacios/vmx_assist.h>
 
 
 static int inline check_vmcs_write(vmcs_field_t field, addr_t val)
 {
     int ret = 0;
-    ret = vmcs_write(field,val);
+    ret = vmcs_write(field, val);
 
     if (ret != VMX_SUCCESS) {
         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
-        return 1;
     }
 
-    return 0;
+    return ret;
 }
 
 static int inline check_vmcs_read(vmcs_field_t field, void * val)
 {
     int ret = 0;
-    ret = vmcs_read(field,val);
+    ret = vmcs_read(field, val);
 
-    if(ret != VMX_SUCCESS) {
+    if (ret != VMX_SUCCESS) {
         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
-        return ret;
     }
 
-    return 0;
+    return ret;
 }
 
 static void inline translate_access_to_v3_seg(struct vmcs_segment_access * access, 
-        struct v3_segment * v3_seg)
-{
+                                             struct v3_segment * v3_seg) {
     v3_seg->type = access->type;
     v3_seg->system = access->desc_type;
     v3_seg->dpl = access->dpl;
@@ -66,15 +66,13 @@ static void inline translate_access_to_v3_seg(struct vmcs_segment_access * acces
     v3_seg->granularity = access->granularity;
 }
 
-static void load_vmcs_guest_state(struct guest_info * info)
+static int load_vmcs_guest_state(struct guest_info * info)
 {
-    check_vmcs_read(VMCS_GUEST_RIP, &(info->rip));
-    check_vmcs_read(VMCS_GUEST_RSP, &(info->vm_regs.rsp));
-    check_vmcs_read(VMCS_GUEST_CR0, &(info->ctrl_regs.cr0));
-    check_vmcs_read(VMCS_GUEST_CR3, &(info->ctrl_regs.cr3));
-    check_vmcs_read(VMCS_GUEST_CR4, &(info->ctrl_regs.cr4));
 
     struct vmcs_segment_access access;
+    int ret = 0;
+
+    // JRL: Add error checking
 
     memset(&access, 0, sizeof(access));
 
@@ -150,9 +148,23 @@ static void load_vmcs_guest_state(struct guest_info * info)
     /* IDTR Segment */
     check_vmcs_read(VMCS_GUEST_IDTR_BASE, &(info->segments.idtr.base));
     check_vmcs_read(VMCS_GUEST_IDTR_LIMIT, &(info->segments.idtr.limit));
+
+
+    /* 
+     *  Read the control state
+     */
+    check_vmcs_read(VMCS_GUEST_RIP, &(info->rip));
+    check_vmcs_read(VMCS_GUEST_RSP, &(info->vm_regs.rsp));
+    check_vmcs_read(VMCS_GUEST_CR0, &(info->ctrl_regs.cr0));
+    check_vmcs_read(VMCS_CR0_READ_SHDW, &(info->shdw_pg_state.guest_cr0));
+    check_vmcs_read(VMCS_GUEST_CR3, &(info->ctrl_regs.cr3));
+    check_vmcs_read(VMCS_GUEST_CR4, &(info->ctrl_regs.cr4));
+
+    return ret;
 }
 
 
+#if 0
 static void setup_v8086_mode_for_boot(struct guest_info * info)
 {
 
@@ -163,7 +175,6 @@ static void setup_v8086_mode_for_boot(struct guest_info * info)
     flags->iopl = 3;
 
     info->rip = 0xfff0;
-    //info->vm_regs.rsp = 0x0;
    
     /* Zero the segment registers */
     memset(&(info->segments), 0, sizeof(struct v3_segment)*6);
@@ -193,32 +204,19 @@ static void setup_v8086_mode_for_boot(struct guest_info * info)
         seg_ptr[i].granularity = 0;
     }
 
-    PrintDebug("END INFO!\n");
-#if 0
-    for(i = 6; i < 10; i++) {
-        seg_ptr[i].base = 0x0;
-        seg_ptr[i].limit = 0xffff;
-    }
-
-    info->segments.ldtr.type = 2;
-    info->segments.ldtr.system = 0;
-    info->segments.ldtr.present = 1;
-    info->segments.ldtr.granularity = 0;
-
-    info->segments.tr.type = 3;
-    info->segments.tr.system = 0;
-    info->segments.tr.present = 1;
-    info->segments.tr.granularity = 0;
-#endif
 }
 
-static int inline handle_cr_access(struct guest_info * info, ulong_t exit_qual)
-{
+#endif
+    
+static int inline handle_cr_access(struct guest_info * info, ulong_t exit_qual) {
     struct vmexit_cr_qual * cr_qual = (struct vmexit_cr_qual *)&exit_qual;
 
-    if(cr_qual->access_type < 2) {
-        ulong_t reg = 0;
-        switch(cr_qual->gpr) {
+    PrintDebug("Control register: %d\n", cr_qual->access_type);
+
+    if (cr_qual->access_type < 2) {
+        v3_reg_t reg = 0;
+       
+       switch(cr_qual->gpr) {
             case 0:
                 reg = info->vm_regs.rax;
                 break;
@@ -268,97 +266,138 @@ static int inline handle_cr_access(struct guest_info * info, ulong_t exit_qual)
                 reg = info->vm_regs.r15;
                 break;
         }
-        PrintDebug("RAX: %p\n", (void *)info->vm_regs.rax);
-
-        if(cr_qual->cr_id == 0
-                && (~reg & CR0_PE)
-                && ((struct vmx_data*)info->vmm_data)->state == VMXASSIST_STARTUP) {
-            setup_v8086_mode_for_boot(info);
-            info->shdw_pg_state.guest_cr0 = 0x0;
-            v3_update_vmcs_guest_state(info);
+
+        if (cr_qual->cr_id == 0) {
+            uint32_t instr_len;
+
+            vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
+
+            if ( ~reg & CR0_PE ) {
+
+                if (v3_vmxassist_ctx_switch(info) != 0) {
+                    PrintError("Unable to execute VMXASSIST context switch!\n");
+                    return -1;
+                }
+
+                load_vmcs_guest_state(info);
+
+                ((struct vmx_data *)info->vmm_data)->state = VMXASSIST_ENABLED;
+
+                PrintDebug("Loading vmxassist at RIP: 0x%p\n", (void *)info->rip);
+                return 0;
+            } else if (v3_vmx_handle_cr0_write(info, reg) != 0) {
+               PrintError("Could not handle CR0 Write\n");
+                return -1;
+            }
+
+            load_vmcs_guest_state(info);
+
+            PrintDebug("Leaving VMXASSIST and entering protected mode at RIP: 0x%p\n", (void *)info->rip);
+
             return 0;
         }
     }
+
     PrintError("Unhandled CR access\n");
     return -1;
 }
 
 
-int v3_handle_vmx_exit(struct v3_gprs * gprs, struct guest_info * info)
-{
+/* At this point the GPRs are already copied into the guest_info state */
+int v3_handle_vmx_exit(struct v3_gprs * gprs, struct guest_info * info) {
     uint32_t exit_reason;
     ulong_t exit_qual;
 
     check_vmcs_read(VMCS_EXIT_REASON, &exit_reason);
     check_vmcs_read(VMCS_EXIT_QUAL, &exit_qual);
 
-    PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_reason, exit_qual);
+    // PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_reason, exit_qual);
 
     /* Update guest state */
     load_vmcs_guest_state(info);
   
-    switch(exit_reason)
-    {
-        case VMEXIT_INFO_EXCEPTION_OR_NMI:
-        {
-            uint32_t int_info;
-            pf_error_t error_code;
-            check_vmcs_read(VMCS_EXIT_INT_INFO, &int_info);
-            check_vmcs_read(VMCS_EXIT_INT_ERR, &error_code);
-
-            if((uint8_t)int_info == 0x0e) {
-                PrintDebug("Page Fault at %p\n", (void*)exit_qual);
-                if(info->shdw_pg_mode == SHADOW_PAGING) {
-                    if(v3_handle_shadow_pagefault(info, (addr_t)exit_qual, error_code) == -1) {
-                        return -1;
-                    }
-                } else {
-                    PrintError("Page fault in unimplemented paging mode\n");
-                    return -1;
-                }
-            } else {
-                PrintDebug("Unknown exception: 0x%x\n", (uint8_t)int_info);
-                v3_print_GPRs(info);
-                return -1;
-            }
-            break;
-        }
-
-        case VMEXIT_IO_INSTR: 
-        {
-            struct vmexit_io_qual * io_qual = (struct vmexit_io_qual *)&exit_qual;
-
-            if(io_qual->dir == 0) {
-                if(io_qual->string) {
-                    if(v3_handle_vmx_io_outs(info) == -1) {
-                        return -1;
-                    }
-                } else {
-                    if(v3_handle_vmx_io_out(info) == -1) {
-                        return -1;
-                    }
-                }
-            } else {
-                if(io_qual->string) {
-                    if(v3_handle_vmx_io_ins(info) == -1) {
-                        return -1;
-                    }
-                } else {
-                    if(v3_handle_vmx_io_in(info) == -1) {
-                        return -1;
-                    }
-                }
-            }
-            break;
-        }
-
+    switch (exit_reason) {
+        case VMEXIT_INFO_EXCEPTION_OR_NMI: {
+           uint32_t int_info;
+           pf_error_t error_code;
+
+           check_vmcs_read(VMCS_EXIT_INT_INFO, &int_info);
+           check_vmcs_read(VMCS_EXIT_INT_ERR, &error_code);
+           
+           // JRL: Change "0x0e" to a macro value
+           if ((uint8_t)int_info == 0x0e) {
+               PrintDebug("Page Fault at %p\n", (void *)exit_qual);
+               
+               if (info->shdw_pg_mode == SHADOW_PAGING) {
+                   if (v3_handle_shadow_pagefault(info, (addr_t)exit_qual, error_code) == -1) {
+                       PrintError("Error handling shadow page fault\n");
+                       return -1;
+                   }
+               } else {
+                   PrintError("Page fault in unimplemented paging mode\n");
+                   return -1;
+               }
+           } else {
+               PrintDebug("Unknown exception: 0x%x\n", (uint8_t)int_info);
+               v3_print_GPRs(info);
+               return -1;
+           }
+           break;
+       }
+           
+        case VMEXIT_CPUID: {
+           int instr_len;
+
+           v3_cpuid(info->vm_regs.rax, (addr_t *)&(info->vm_regs.rax), (addr_t *)&(info->vm_regs.rbx), 
+                    (addr_t *)&(info->vm_regs.rcx), (addr_t *)&(info->vm_regs.rdx));
+
+           check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
+
+           info->rip += instr_len;
+           break;
+       }
+           
+        case VMEXIT_IO_INSTR: {
+           struct vmexit_io_qual * io_qual = (struct vmexit_io_qual *)&exit_qual;
+           
+           if (io_qual->dir == 0) {
+               if (io_qual->string) {
+                   if (v3_handle_vmx_io_outs(info) == -1) {
+                       PrintError("Error in outs IO handler\n");
+                       return -1;
+                   }
+               } else {
+                   if (v3_handle_vmx_io_out(info) == -1) {
+                       PrintError("Error in out IO handler\n");
+                       return -1;
+                   }
+               }
+           } else {
+               if (io_qual->string) {
+                   if(v3_handle_vmx_io_ins(info) == -1) {
+                       PrintError("Error in ins IO handler\n");
+                       return -1;
+                   }
+               } else {
+                   if (v3_handle_vmx_io_in(info) == -1) {
+                       PrintError("Error in in IO handler\n");
+                       return -1;
+                   }
+               }
+           }
+           break;
+       }
+           
         case VMEXIT_CR_REG_ACCESSES:
-            if(handle_cr_access(info,exit_qual) != 0)
+            if (handle_cr_access(info,exit_qual) != 0) {
+               PrintError("Error handling CR access\n");
                 return -1;
+           }
+
             break;
 
         default:
-            PrintError("Unhandled VMEXIT\n");
+            PrintError("Unhandled VMEXIT: %u (0x%x), %lu (0x%lx)\n", exit_reason, exit_reason, exit_qual, exit_qual);
             return -1;
     }
 
index 2f2596d..0f841e4 100644 (file)
@@ -1,3 +1,21 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Andy Gocke <agocke@gmail.com>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Andy Gocke <agocke@gmail.com>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
 
 #include <palacios/vmx_io.h>
 #include <palacios/vmm_io.h>
@@ -5,6 +23,15 @@
 #include <palacios/vmx_lowlevel.h>
 #include <palacios/vmm.h>
 #include <palacios/vmx_handler.h>
+#include <palacios/vmm_ctrl_regs.h>
+#include <palacios/vm_guest_mem.h>
+#include <palacios/vmm_decoder.h>
+
+#ifndef CONFIG_DEBUG_IO
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
 
 /* Same as SVM */
 static int update_map(struct guest_info * info, uint16_t port, int hook_read, int hook_write)
@@ -32,34 +59,34 @@ int v3_init_vmx_io_map(struct guest_info * info)
     return 0;
 }
 
-int v3_handle_vmx_io_in(struct guest_info * info)
-{
+int v3_handle_vmx_io_in(struct guest_info * info) {
     ulong_t exit_qual;
+    uint32_t instr_length = 0;
 
     vmcs_read(VMCS_EXIT_QUAL, &exit_qual);
 
     struct vmexit_io_qual * io_qual = (struct vmexit_io_qual *)&exit_qual;
 
-    struct v3_io_hook * hook = v3_get_io_hook(info,io_qual->port);
+    struct v3_io_hook * hook = v3_get_io_hook(info, io_qual->port);
     int read_size = 0;
 
-    if(hook == NULL) {
+    if (hook == NULL) {
         PrintError("Hook not present for IN on port %x\n", io_qual->port);
         return -1;
     }
 
-    read_size = 1<<(io_qual->access_size);
+    read_size = io_qual->access_size + 1;
 
     PrintDebug("IN of %d bytes on port %d (0x%x)\n", read_size, io_qual->port, io_qual->port);
 
-    if(hook->read(io_qual->port, &(info->vm_regs.rax), read_size, hook->priv_data) != read_size) {
+    if (hook->read(io_qual->port, &(info->vm_regs.rax), read_size, hook->priv_data) != read_size) {
         PrintError("Read failure for IN on port %x\n", io_qual->port);
         return -1;
     }
 
-    uint32_t instr_length = 0;
 
-    if(vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_length) != VMX_SUCCESS) {
+
+    if (vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_length) != VMX_SUCCESS) {
         PrintError("Could not read instruction length\n");
         return -1;
     }
@@ -71,12 +98,74 @@ int v3_handle_vmx_io_in(struct guest_info * info)
 
 int v3_handle_vmx_io_ins(struct guest_info * info)
 {
-    PrintDebug("INS not implemented\n");
-    return -1;
+    ulong_t exit_qual;
+
+    vmcs_read(VMCS_EXIT_QUAL, &exit_qual);
+
+    struct vmexit_io_qual * io_qual = (struct vmexit_io_qual *)&exit_qual;
+    struct v3_io_hook * hook = v3_get_io_hook(info, io_qual->port);
+    int read_size;
+    addr_t guest_va;
+    addr_t host_addr;
+    int rdi_change;
+    ulong_t rep_num = 1;
+
+    if(hook == NULL) {
+        PrintError("Hook not present for INS on port 0x%x\n", io_qual->port);
+        return -1;
+    }
+
+    PrintDebug("INS on port 0x%x\n", io_qual->port);
+
+    read_size = io_qual->access_size + 1;
+
+    if (io_qual->rep) {
+        rep_num = info->vm_regs.rcx & get_gpr_mask(info);
+    }
+    
+    if ( ((struct rflags *)&(info->ctrl_regs.rflags))->df ) {
+        rdi_change = -read_size;
+    } else {
+        rdi_change = read_size;
+    }
+
+    PrintDebug("INS size=%d for %ld steps\n", read_size, rep_num);
+
+    vmcs_read(VMCS_GUEST_LINEAR_ADDR, &guest_va);
+
+    if (guest_va_to_host_va(info, guest_va, &host_addr) == -1) {
+        PrintError("Could not convert Guest VA to host VA\n");
+        return -1;
+    }
+
+    do {
+        if (hook->read(io_qual->port, (char *)host_addr, read_size, hook->priv_data) != read_size) {
+            PrintError("Read Failure for INS on port 0x%x\n", io_qual->port);
+            return -1;
+        }
+
+        host_addr += rdi_change;
+        info->vm_regs.rdi += rdi_change;
+
+        if (io_qual->rep) {
+            --info->vm_regs.rcx;
+        }
+        --rep_num;
+
+    } while (rep_num > 0);
+
+    int instr_len = 0;
+
+    vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
+
+    info->rip += instr_len;
+
+    return 0;
 }
 
-int v3_handle_vmx_io_out(struct guest_info * info)
-{
+
+
+int v3_handle_vmx_io_out(struct guest_info * info) {
     ulong_t exit_qual;
 
     vmcs_read(VMCS_EXIT_QUAL, &exit_qual);
@@ -85,24 +174,24 @@ int v3_handle_vmx_io_out(struct guest_info * info)
 
     struct v3_io_hook * hook = v3_get_io_hook(info, io_qual->port);
 
-    if(hook == NULL) {
+    if (hook == NULL) {
         PrintError("Hook not present for out on port %x\n", io_qual->port);
         return -1;
     }
 
-    int write_size = 1<<(io_qual->access_size);
+    int write_size = io_qual->access_size + 1;
     
     PrintDebug("OUT of %d bytes on port %d (0x%x)\n", write_size, io_qual->port, io_qual->port);
 
 
-    if(hook->write(io_qual->port, &(info->vm_regs.rax), write_size, hook->priv_data) != write_size) {
+    if (hook->write(io_qual->port, &(info->vm_regs.rax), write_size, hook->priv_data) != write_size) {
         PrintError("Write failure for out on port %x\n",io_qual->port);
         return -1;
     }
 
     uint32_t instr_length = 0;
 
-    if(vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_length) != VMX_SUCCESS) {
+    if (vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_length) != VMX_SUCCESS) {
         PrintError("Could not read instruction length\n");
         return -1;
     } 
@@ -112,14 +201,72 @@ int v3_handle_vmx_io_out(struct guest_info * info)
     return 0;
 }
 
-int v3_handle_vmx_io_outs(struct guest_info * info)
-{
+
+
+int v3_handle_vmx_io_outs(struct guest_info * info) {
     ulong_t exit_qual;
 
     vmcs_read(VMCS_EXIT_QUAL, &exit_qual);
 
     struct vmexit_io_qual * io_qual = (struct vmexit_io_qual *)&exit_qual;
+    struct v3_io_hook * hook = v3_get_io_hook(info, io_qual->port);
+    int write_size;
+    addr_t guest_va;
+    addr_t host_addr;
+    int rsi_change;
+    ulong_t rep_num = 1;
+
+    if (hook == NULL) {
+        PrintError("Hook not present for OUTS on port 0x%x\n", io_qual->port);
+        return -1;
+    }
+
+    PrintDebug("OUTS on port 0x%x\n", io_qual->port);
+
+    write_size = io_qual->access_size + 1;
+
+    if (io_qual->rep) {
+        // Grab the address sized bits of rcx
+        rep_num = info->vm_regs.rcx & get_gpr_mask(info);
+    }
 
-    PrintDebug("OUTS on port %d, (0x%x)\n", io_qual->port, io_qual->port);
-    return -1;
+    if ( ((struct rflags *)&(info->ctrl_regs.rflags))->df ) {
+        rsi_change = -write_size;
+    } else {
+        rsi_change = write_size;
+    }
+
+    vmcs_read(VMCS_GUEST_LINEAR_ADDR, &guest_va);
+
+    PrintDebug("OUTS size=%d for %ld steps\n", write_size, rep_num);
+
+    if (guest_va_to_host_va(info, guest_va, &host_addr) == -1) {
+        PrintError("Could not convert guest VA to host VA\n");
+        return -1;
+    }
+
+    do {
+       if (hook->write(io_qual->port, (char *)host_addr, write_size, hook->priv_data) != write_size) {
+           PrintError("Read failure for INS on port 0x%x\n", io_qual->port);
+           return -1;
+       }
+
+       host_addr += rsi_change;
+       info->vm_regs.rsi += rsi_change;
+
+       if (io_qual->rep) {
+           --info->vm_regs.rcx;
+       }
+       --rep_num;
+
+    } while (rep_num > 0);
+
+    int instr_len = 0;
+
+    vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
+
+    info->rip += instr_len;
+
+    return 0;
 }
+
index 0b46b88..fa53ffc 100644 (file)
@@ -1,10 +1,27 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Andy Gocke <agocke@gmail.com>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Andy Gocke <agocke@gmail.com>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
 
 #include <palacios/vmm.h>
 #include <palacios/vm_guest.h>
 
 /* Same as SVM */
-static int update_map(struct guest_info * info, uint_t msr, int hook_reads, int hook_writes)
-{
+static int update_map(struct guest_info * info, uint_t msr, int hook_reads, int hook_writes) {
 
 #if 0
     int index = get_bitmap_index(msr);
@@ -29,8 +46,7 @@ static int update_map(struct guest_info * info, uint_t msr, int hook_reads, int
     return 0;
 }
 
-int v3_init_vmx_msr_map(struct guest_info * info)
-{
+int v3_init_vmx_msr_map(struct guest_info * info) {
    struct v3_msr_map * msr_map = &(info->msr_map);
 
    msr_map->update_map = update_map;