DEFINES=-DDEBUG -DTEXTADDR=$(TEXTADDR)
 
 # Disable PIE/SSP if GCC supports them. They can break us.
-CFLAGS  += $(call test-gcc-flag,$(CC),-nopie)
-CFLAGS  += $(call test-gcc-flag,$(CC),-fno-stack-protector)
-CFLAGS  += $(call test-gcc-flag,$(CC),-fno-stack-protector-all)
+CFLAGS  += $(call cc-option,$(CC),-nopie,)
+CFLAGS  += $(call cc-option,$(CC),-fno-stack-protector,)
+CFLAGS  += $(call cc-option,$(CC),-fno-stack-protector-all,)
 
 CPP      = cpp -P
 OBJCOPY  = objcopy -p -O binary -R .note -R .comment -R .bss -S --gap-fill=0
 
-#ifndef __XEN_PUBLIC_HVM_E820_H__
-#define __XEN_PUBLIC_HVM_E820_H__
+#ifndef __HVMLOADER_E820_H__
+#define __HVMLOADER_E820_H__
 
-/* PC BIOS standard E820 types. */
+/* E820 location in HVM virtual address space. */
+#define HVM_E820_PAGE        0x00090000
+#define HVM_E820_NR_OFFSET   0x000001E8
+#define HVM_E820_OFFSET      0x000002D0
+
+#define HVM_BELOW_4G_RAM_END        0xF0000000
+#define HVM_BELOW_4G_MMIO_START     HVM_BELOW_4G_RAM_END
+#define HVM_BELOW_4G_MMIO_LENGTH    ((1ULL << 32) - HVM_BELOW_4G_MMIO_START)
+
+
+/*
+ * PC BIOS standard E820 types and structure.
+ */
 #define E820_RAM          1
 #define E820_RESERVED     2
 #define E820_ACPI         3
 #define E820_NVS          4
 
-/* Xen HVM extended E820 types. */
-#define E820_IO          16
-#define E820_SHARED_PAGE 17
-#define E820_XENSTORE    18
-#define E820_BUFFERED_IO 19
-
-/* E820 location in HVM virtual address space. */
-#define E820_MAP_PAGE        0x00090000
-#define E820_MAP_NR_OFFSET   0x000001E8
-#define E820_MAP_OFFSET      0x000002D0
-
 struct e820entry {
     uint64_t addr;
     uint64_t size;
     uint32_t type;
 } __attribute__((packed));
 
-#define HVM_BELOW_4G_RAM_END        0xF0000000
-
-#define HVM_BELOW_4G_MMIO_START     HVM_BELOW_4G_RAM_END
-#define HVM_BELOW_4G_MMIO_LENGTH    ((1ULL << 32) - HVM_BELOW_4G_MMIO_START)
+#define HVM_E820_NR ((unsigned char *)HVM_E820_PAGE + HVM_E820_NR_OFFSET)
+#define HVM_E820    ((struct e820entry *)(HVM_E820_PAGE + HVM_E820_OFFSET))
 
-#endif /* __XEN_PUBLIC_HVM_E820_H__ */
+#endif /* __HVMLOADER_E820_H__ */
 
  * switch happens to the environment below. The magic indicates
  * that this is a valid context.
  */
-#ifdef TEST
-       .byte 0x55, 0xaa
-       .byte 0x80
-       .code16
-       jmp     _start16
-#else
        jmp     _start
-#endif
 
        .align  8
        .long   VMXASSIST_MAGIC
        .long   newctx                  /* new context */
        .long   oldctx                  /* old context */
 
-#ifdef TEST
-/*
- * We are running in 16-bit. Get into the protected mode as soon as
- * possible. We use our own (minimal) GDT to get started.
- *
- * ROM is a misnomer as this code isn't really rommable (although it
- * only requires a few changes) but it does live in a BIOS ROM segment.
- * This code allows me to debug vmxassists under (a modified version of)
- * Bochs and load it as a "optromimage1".
- */
-       .code16
-       .globl  _start16
-_start16:
-        cli
-
-        /* load our own global descriptor table */
-        data32 addr32 lgdt %cs:(rom_gdtr - TEXTADDR)
-
-        /* go to protected mode */
-        movl    %cr0, %eax
-        orl     $CR0_PE, %eax
-        movl    %eax, %cr0
-        data32  ljmp $0x08, $1f
-
-        .align  32
-        .globl  rom_gdt
-rom_gdt:
-        .word   0, 0            /* 0x00: reserved */
-        .byte   0, 0, 0, 0
-
-        .word   0xFFFF, 0       /* 0x08: CS 32-bit */
-        .byte   0, 0x9A, 0xCF, 0
-
-        .word   0xFFFF, 0       /* 0x10: CS 32-bit */
-        .byte   0, 0x92, 0xCF, 0
-rom_gdt_end:
-
-        .align  4
-        .globl  rom_gdtr
-rom_gdtr:
-        .word   rom_gdt_end - rom_gdt - 1
-        .long   rom_gdt
-
-        .code32
-1:
-        /* welcome to the 32-bit world */
-        movw    $0x10, %ax
-        movw    %ax, %ds
-        movw    %ax, %es
-        movw    %ax, %ss
-        movw    %ax, %fs
-        movw    %ax, %gs
-
-        /* enable Bochs debug facilities */
-        movw    $0x8A00, %dx
-        movw    $0x8A00, %ax
-        outw    %ax, (%dx)
-
-       jmp     _start
-#endif /* TEST */
-
 /*
  * This is the real start. Control was transfered to this point
  * with CR0_PE set and executing in some 32-bit segment. We call
        cli
 
        /* save register parameters to C land */
-#ifdef TEST
-       xorl    %edx, %edx
-#endif
 
        /* clear bss */
        cld
        clts
 
        /* setup my own stack */
-       movl    $stack_top - 4*4, %esp
+       movl    $stack_top, %esp
        movl    %esp, %ebp
 
        /* go ... */
 halt:
        push    $halt_msg
        call    printf
-#ifdef TEST
-        movw    $0x8A00, %dx
-        movw    $0x8AE0, %ax
-        outw    %ax, (%dx)
-#endif
        cli
        jmp     .
 
 
 #define CR4_PSE                (1 << 4)
 #define CR4_PAE                (1 << 5)
 
+#define EFLAGS_CF      (1 << 0)
+#define EFLAGS_PF      (1 << 2)
+#define EFLAGS_AF      (1 << 4)
 #define EFLAGS_ZF      (1 << 6)
+#define EFLAGS_SF      (1 << 7)
 #define EFLAGS_TF      (1 << 8)
 #define EFLAGS_IF      (1 << 9)
 #define EFLAGS_DF      (1 << 10)
+#define EFLAGS_OF      (1 << 11)
 #define EFLAGS_IOPL    (3 << 12)
 #define EFLAGS_VM      ((1 << 17) | EFLAGS_IOPL)
 #define EFLAGS_VIF     (1 << 19)
 #define        LPGSIZE         (1 << LOG_PDSIZE)       /* large page size */
 #define        LPGMASK         (~(LPGSIZE - 1))        /* large page mask */
 
-#ifdef TEST
-#define        PTE_P           (1 << 0)        /* Present */
-#define        PTE_RW          (1 << 1)        /* Read/Write */
-#define        PTE_US          (1 << 2)        /* User/Supervisor */
-#define        PTE_PS          (1 << 7)        /* Page Size */
-#endif
-
 /* Programmable Interrupt Contoller (PIC) defines */
 #define        PIC_MASTER      0x20
 #define        PIC_SLAVE       0xA0
 #ifdef ENABLE_VME
        unsigned long   int_redir[8];
 #endif
-       unsigned char   iomap[8192];
+       unsigned char   iomap[8193];
 };
 
 static inline void
        __asm__ __volatile__("movl %0, %%cr4" : /* no outputs */ : "r"(value));
 }
 
-#ifdef TEST
-static inline void
-breakpoint(void)
-{
-       outw(0x8A00, 0x8AE0);
-}
-#endif /* TEST */
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* __MACHINE_H__ */
 
 
 struct dtr idtr = { sizeof(idt)-1, (unsigned long) &idt };
 
-#ifdef TEST
-unsigned pgd[NR_PGD] __attribute__ ((aligned(PGSIZE))) = { 0 };
-
-struct e820entry e820map[] = {
-       { 0x0000000000000000ULL, 0x000000000009F800ULL, E820_RAM },
-       { 0x000000000009F800ULL, 0x0000000000000800ULL, E820_RESERVED },
-       { 0x00000000000A0000ULL, 0x0000000000020000ULL, E820_IO },
-       { 0x00000000000C0000ULL, 0x0000000000040000ULL, E820_RESERVED },
-       { 0x0000000000100000ULL, 0x0000000000000000ULL, E820_RAM },
-       { 0x0000000000000000ULL, 0x0000000000001000ULL, E820_SHARED_PAGE },
-       { 0x0000000000000000ULL, 0x0000000000003000ULL, E820_NVS },
-       { 0x0000000000003000ULL, 0x000000000000A000ULL, E820_ACPI },
-       { 0x00000000FEC00000ULL, 0x0000000001400000ULL, E820_IO },
-};
-#endif /* TEST */
-
 struct vmx_assist_context oldctx;
 struct vmx_assist_context newctx;
 
 unsigned long memory_size;
 int initialize_real_mode;
 
-extern char stack[], stack_top[];
+extern char stack_top[];
 extern unsigned trap_handlers[];
 
 void
                    (((get_cmos(0x31) << 8) | get_cmos(0x30)) + 0x400) << 10;
        memory_size += 0x400 << 10; /* + 1MB */
 
-#ifdef TEST
-       /* Create an SMAP for our debug environment */
-       e820map[4].size = memory_size - e820map[4].addr - PGSIZE;
-       e820map[5].addr = memory_size - PGSIZE;
-       e820map[6].addr = memory_size;
-       e820map[7].addr += memory_size;
-
-       *E820_MAP_NR = sizeof(e820map)/sizeof(e820map[0]);
-       memcpy(E820_MAP, e820map, sizeof(e820map));
-#endif
-
        printf("Memory size %ld MB\n", memory_size >> 20);
        printf("E820 map:\n");
-       print_e820_map(E820_MAP, *E820_MAP_NR);
+       print_e820_map(HVM_E820, *HVM_E820_NR);
        printf("\n");
 }
 
-#ifdef TEST
-void
-setup_paging(void)
-{
-       unsigned long i;
-
-       if (((unsigned)pgd & ~PGMASK) != 0)
-               panic("PGD not page aligned");
-       set_cr4(get_cr4() | CR4_PSE);
-       for (i = 0; i < NR_PGD; i++)
-               pgd[i] = (i * LPGSIZE)| PTE_PS | PTE_US | PTE_RW | PTE_P;
-       set_cr3((unsigned) pgd);
-       set_cr0(get_cr0() | (CR0_PE|CR0_PG));
-}
-#endif /* TEST */
-
 void
 setup_gdt(void)
 {
        /* setup task state segment */
        memset(&tss, 0, sizeof(tss));
        tss.ss0 = DATA_SELECTOR;
-       tss.esp0 = (unsigned) stack_top - 4*4;
+       tss.esp0 = (unsigned) stack_top;
        tss.iomap_base = offsetof(struct tss, iomap);
+       tss.iomap[sizeof(tss.iomap)-1] = 0xff;
 
        /* initialize gdt's tss selector */
        gdt[TSS_SELECTOR / sizeof(gdt[0])] |=
 enter_real_mode(struct regs *regs)
 {
        /* mask off TSS busy bit */
-        gdt[TSS_SELECTOR / sizeof(gdt[0])] &= ~0x0000020000000000ULL;
+       gdt[TSS_SELECTOR / sizeof(gdt[0])] &= ~0x0000020000000000ULL;
 
        /* start 8086 emulation of BIOS */
        if (initialize_real_mode) {
                regs->ves = regs->vds = regs->vfs = regs->vgs = 0xF000;
                if (booting_cpu == 0) {
                        regs->cs = 0xF000; /* ROM BIOS POST entry point */
-#ifdef TEST
-                       regs->eip = 0xFFE0;
-#else
                        regs->eip = 0xFFF0;
-#endif
                } else {
                        regs->cs = booting_vector << 8; /* AP entry point */
                        regs->eip = 0;
                }
-               regs->uesp = 0;
-               regs->uss = 0;
+
+               regs->uesp = regs->uss = 0;
+               regs->eax = regs->ecx = regs->edx = regs->ebx = 0;
+               regs->esp = regs->ebp = regs->esi = regs->edi = 0;
 
                /* intercept accesses to the PIC */
                setiomap(PIC_MASTER+PIC_CMD);
 
                /* this should get us into 16-bit mode */
                return;
-       } else {
-               /* go from protected to real mode */
-               regs->eflags |= EFLAGS_VM;
-
-               set_mode(regs, VM86_PROTECTED_TO_REAL);
-
-               emulate(regs);
        }
+
+       /* go from protected to real mode */
+       set_mode(regs, VM86_PROTECTED_TO_REAL);
+       emulate(regs);
+       if (mode != VM86_REAL)
+               panic("failed to emulate between clear PE and long jump.\n");
 }
 
 /*
 
        memset(c, 0, sizeof(*c));
        c->eip = (unsigned long) switch_to_real_mode;
-       c->esp = (unsigned) stack_top - 4*4;
+       c->esp = (unsigned) stack_top;
        c->eflags = 0x2; /* no interrupts, please */
 
        /*
         * more natural to enable CR0.PE to cause a world switch to
         * protected mode rather than disabling it.
         */
-#ifdef TEST
-       c->cr0 = (get_cr0() | CR0_NE | CR0_PG) & ~CR0_PE;
-       c->cr3 = (unsigned long) pgd;
-#else
        c->cr0 = (get_cr0() | CR0_NE) & ~CR0_PE;
        c->cr3 = 0;
-#endif
        c->cr4 = get_cr4();
 
        c->idtr_limit = sizeof(idt)-1;
 int
 main(void)
 {
-    printf("Hello from VMXAssist\n");
-
        if (booting_cpu == 0)
                banner();
 
-#ifdef TEST
-       setup_paging();
-#endif
-
        setup_gdt();
        setup_idt();
 
-#ifndef        TEST
        set_cr4(get_cr4() | CR4_VME);
-#endif
 
        setup_ctx();
 
 
        .code32
        .align  16
 common_trap:                           /* common trap handler */
-       pushl   %gs
-       pushl   %fs
-       pushl   %ds
-       pushl   %es
        pushal
 
-       movl    $DATA_SELECTOR, %eax    /* make sure these are sane */
+       movl    $(DATA_SELECTOR), %eax  /* make sure these are sane */
        movl    %eax, %ds
        movl    %eax, %es
        movl    %eax, %fs
        movl    %esp, %ebp
 
        pushl   %ebp
-       pushl   52(%ebp)
-       pushl   48(%ebp)
+       pushl   36(%ebp)
+       pushl   32(%ebp)
        call    trap                    /* trap(trapno, errno, regs) */
        addl    $12, %esp
 
 trap_return:
        popal
-       popl    %es
-       popl    %ds
-       popl    %fs
-       popl    %gs
        addl    $8, %esp                /* skip trapno, errno */
        iret
        /* NOT REACHED */
        pushl   oldctx+VMX_ASSIST_CTX_EIP
        pushl   $-1                     /* trapno, errno */
        pushl   $-1
-       pushl   %gs
-       pushl   %fs
-       pushl   %ds
-       pushl   %es
        pushal
 
        movl    %esp, %ebp
 
 
 static void putchar(int);
 static char *printnum(char *, unsigned long, int);
-static void _doprint(void (*)(int), char const *, va_list);
+static void _doprint(void (*)(int), const char *, va_list);
 
+void
+cpuid_addr_value(uint64_t addr, uint64_t *value)
+{
+       uint32_t addr_low   = (uint32_t)addr;
+       uint32_t addr_high  = (uint32_t)(addr >> 32);
+       uint32_t value_low, value_high;
+       static unsigned int addr_leaf;
+
+       if (!addr_leaf) {
+               unsigned int eax, ebx, ecx, edx;
+               __asm__ __volatile__(
+                       "cpuid"
+                       : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+                       : "0" (0x40000000));
+               addr_leaf = eax + 1;
+       }
+
+       __asm__ __volatile__(
+               "cpuid"
+               : "=c" (value_low), "=d" (value_high)
+               : "a" (addr_leaf), "0" (addr_low), "1" (addr_high)
+               : "ebx");
+
+       *value = (uint64_t)value_high << 32 | value_low;
+}
 
 void
 dump_regs(struct regs *regs)
                regs->eax, regs->ecx, regs->edx, regs->ebx);
        printf("esp    %8x ebp    %8x esi    %8x edi    %8x\n",
                regs->esp, regs->ebp, regs->esi, regs->edi);
-       printf("eip    %8x eflags %8x cs     %8x ds     %8x\n",
-               regs->eip, regs->eflags, regs->cs, regs->ds);
-       printf("es     %8x fs     %8x uss    %8x uesp   %8x\n",
-               regs->es, regs->fs, regs->uss, regs->uesp);
+       printf("trapno %8x errno  %8x\n", regs->trapno, regs->errno);
+       printf("eip    %8x cs     %8x eflags %8x\n",
+               regs->eip, regs->cs, regs->eflags);
+       printf("uesp   %8x uss    %8x\n",
+               regs->uesp, regs->uss);
        printf("ves    %8x vds    %8x vfs    %8x vgs    %8x\n",
                regs->ves, regs->vds, regs->vfs, regs->vgs);
-       if (regs->trapno != -1 || regs->errno != -1)
-               printf("trapno %8x errno  %8x\n", regs->trapno, regs->errno);
 
-       printf("cr0    %8lx cr2    %8x cr3    %8lx cr4    %8lx\n",
+       printf("cr0    %8lx cr2    %8x cr3    %8lx cr4    %8lx\n\n",
                (long)oldctx.cr0, get_cr2(),
                (long)oldctx.cr3, (long)oldctx.cr4);
 }
  * but still powerful enough for most tasks.
  */
 static void
-_doprint(void (*put)(int), char const *fmt, va_list ap)
+_doprint(void (*put)(int), const char *fmt, va_list ap)
 {
        register char *str, c;
        int lflag, zflag, nflag;
 
 #include <stdarg.h>
 #include <vm86.h>
 
-#include <e820.h>
-#define E820_MAP_NR ((unsigned char *)E820_MAP_PAGE + E820_MAP_NR_OFFSET)
-#define E820_MAP    ((struct e820entry *)(E820_MAP_PAGE + E820_MAP_OFFSET))
-
 #define        offsetof(type, member)  ((unsigned) &((type *)0)->member)
 
 struct vmx_assist_context;
 
+#include "e820.h"
+
+extern void cpuid_addr_value(uint64_t addr, uint64_t *value);
 extern void hexdump(unsigned char *, int);
 extern void dump_regs(struct regs *);
 extern void dump_vmx_context(struct vmx_assist_context *);
 
 /*
  * vm86.c: A vm86 emulator. The main purpose of this emulator is to do as
- * little work as possible. 
+ * little work as possible.
  *
  * Leendert van Doorn, leendert@watson.ibm.com
  * Copyright (c) 2005-2006, International Business Machines Corporation.
 #define        SEG_SS          0x0020
 #define        SEG_FS          0x0040
 #define        SEG_GS          0x0080
+#define REP            0x0100
 
 static unsigned prev_eip = 0;
 enum vm86_mode mode = 0;
 static char *rnames[] = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" };
 #endif /* DEBUG */
 
-#define PDE_PS           (1 << 7)
-#define PT_ENTRY_PRESENT 0x1
+#define PDE_PS                         (1 << 7)
+#define PT_ENTRY_PRESENT       0x1
 
 /* We only support access to <=4G physical memory due to 1:1 mapping */
-static unsigned
-guest_linear_to_real(uint32_t base)
+static uint64_t
+guest_linear_to_phys(uint32_t base)
 {
        uint32_t gcr3 = oldctx.cr3;
        uint64_t l2_mfn;
                l2_mfn = ((uint64_t *)(long)gcr3)[(base >> 30) & 0x3];
                if (!(l2_mfn & PT_ENTRY_PRESENT))
                        panic("l3 entry not present\n");
-               l2_mfn &= 0x3fffff000ULL;
+               l2_mfn &= 0xffffff000ULL;
 
-               l1_mfn = ((uint64_t *)(long)l2_mfn)[(base >> 21) & 0x1ff];
+               if (l2_mfn & 0xf00000000ULL) {
+                       printf("l2 page above 4G\n");
+                       cpuid_addr_value(l2_mfn + 8 * ((base >> 21) & 0x1ff), &l1_mfn);
+               } else
+                       l1_mfn = ((uint64_t *)(long)l2_mfn)[(base >> 21) & 0x1ff];
                if (!(l1_mfn & PT_ENTRY_PRESENT))
                        panic("l2 entry not present\n");
 
                if (l1_mfn & PDE_PS) { /* CR4.PSE is ignored in PAE mode */
-                       l0_mfn = l1_mfn & 0x3ffe00000ULL;
+                       l0_mfn = l1_mfn & 0xfffe00000ULL;
                        return l0_mfn + (base & 0x1fffff);
                }
 
-               l1_mfn &= 0x3fffff000ULL;
+               l1_mfn &= 0xffffff000ULL;
 
-               l0_mfn = ((uint64_t *)(long)l1_mfn)[(base >> 12) & 0x1ff];
+               if (l1_mfn & 0xf00000000ULL) {
+                       printf("l1 page above 4G\n");
+                       cpuid_addr_value(l1_mfn + 8 * ((base >> 12) & 0x1ff), &l0_mfn);
+               } else
+                       l0_mfn = ((uint64_t *)(long)l1_mfn)[(base >> 12) & 0x1ff];
                if (!(l0_mfn & PT_ENTRY_PRESENT))
                        panic("l1 entry not present\n");
-               l0_mfn &= 0x3fffff000ULL;
+
+               l0_mfn &= 0xffffff000ULL;
 
                return l0_mfn + (base & 0xfff);
        }
 static unsigned
 address(struct regs *regs, unsigned seg, unsigned off)
 {
+       uint64_t gdt_phys_base;
        unsigned long long entry;
        unsigned seg_base, seg_limit;
        unsigned entry_low, entry_high;
        }
 
        if (mode == VM86_REAL || seg > oldctx.gdtr_limit ||
-           (mode == VM86_REAL_TO_PROTECTED && regs->cs == seg))
+               (mode == VM86_REAL_TO_PROTECTED && regs->cs == seg))
                return ((seg & 0xFFFF) << 4) + off;
 
-       entry = ((unsigned long long *)
-                 guest_linear_to_real(oldctx.gdtr_base))[seg >> 3];
+       gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base);
+       if (gdt_phys_base != (uint32_t)gdt_phys_base) {
+               printf("gdt base address above 4G\n");
+               cpuid_addr_value(gdt_phys_base + 8 * (seg >> 3), &entry);
+       } else
+               entry = ((unsigned long long *)(long)gdt_phys_base)[seg >> 3];
+
        entry_high = entry >> 32;
        entry_low = entry & 0xFFFFFFFF;
 
        seg_limit = (entry_high & 0xF0000) | (entry_low & 0xFFFF);
 
        if (entry_high & 0x8000 &&
-           ((entry_high & 0x800000 && off >> 12 <= seg_limit) ||
-           (!(entry_high & 0x800000) && off <= seg_limit)))
+               ((entry_high & 0x800000 && off >> 12 <= seg_limit) ||
+               (!(entry_high & 0x800000) && off <= seg_limit)))
                return seg_base + off;
 
        panic("should never reach here in function address():\n\t"
-             "entry=0x%08x%08x, mode=%d, seg=0x%08x, offset=0x%08x\n",
-             entry_high, entry_low, mode, seg, off);
+                 "entry=0x%08x%08x, mode=%d, seg=0x%08x, offset=0x%08x\n",
+                 entry_high, entry_low, mode, seg, off);
 
        return 0;
 }
        va_list ap;
 
        if ((traceset & (1 << mode)) &&
-          (mode == VM86_REAL_TO_PROTECTED || mode == VM86_REAL)) {
+               (mode == VM86_REAL_TO_PROTECTED || mode == VM86_REAL)) {
                /* 16-bit, seg:off addressing */
                unsigned addr = address(regs, regs->cs, off);
                printf("0x%08x: 0x%x:0x%04x ", addr, regs->cs, off);
                printf("\n");
        }
        if ((traceset & (1 << mode)) &&
-          (mode == VM86_PROTECTED_TO_REAL || mode == VM86_PROTECTED)) {
+               (mode == VM86_PROTECTED_TO_REAL || mode == VM86_PROTECTED)) {
                /* 16-bit, gdt addressing */
                unsigned addr = address(regs, regs->cs, off);
                printf("0x%08x: 0x%x:0x%08x ", addr, regs->cs, off);
        case 1: return regs->ecx;
        case 2: return regs->edx;
        case 3: return regs->ebx;
-       case 4: return regs->esp;
+       case 4: return regs->uesp;
        case 5: return regs->ebp;
        case 6: return regs->esi;
        case 7: return regs->edi;
        case 1: return regs->ecx & 0xFF; /* cl */
        case 2: return regs->edx & 0xFF; /* dl */
        case 3: return regs->ebx & 0xFF; /* bl */
-       case 4: return (regs->esp >> 8) & 0xFF; /* ah */
-       case 5: return (regs->ebp >> 8) & 0xFF; /* ch */
-       case 6: return (regs->esi >> 8) & 0xFF; /* dh */
-       case 7: return (regs->edi >> 8) & 0xFF; /* bh */
+       case 4: return (regs->eax >> 8) & 0xFF; /* ah */
+       case 5: return (regs->ecx >> 8) & 0xFF; /* ch */
+       case 6: return (regs->edx >> 8) & 0xFF; /* dh */
+       case 7: return (regs->ebx >> 8) & 0xFF; /* bh */
        }
        return ~0;
 }
        case 1: regs->ecx = v; break;
        case 2: regs->edx = v; break;
        case 3: regs->ebx = v; break;
-       case 4: regs->esp = v; break;
+       case 4: regs->uesp = v; break;
        case 5: regs->ebp = v; break;
        case 6: regs->esi = v; break;
        case 7: regs->edi = v; break;
        case 1: regs->ecx = (regs->ecx & ~0xFF) | v; break;
        case 2: regs->edx = (regs->edx & ~0xFF) | v; break;
        case 3: regs->ebx = (regs->ebx & ~0xFF) | v; break;
-       case 4: regs->esp = (regs->esp & ~0xFF00) | (v << 8); break;
-       case 5: regs->ebp = (regs->ebp & ~0xFF00) | (v << 8); break;
-       case 6: regs->esi = (regs->esi & ~0xFF00) | (v << 8); break;
-       case 7: regs->edi = (regs->edi & ~0xFF00) | (v << 8); break;
+       case 4: regs->eax = (regs->eax & ~0xFF00) | (v << 8); break;
+       case 5: regs->ecx = (regs->ecx & ~0xFF00) | (v << 8); break;
+       case 6: regs->edx = (regs->edx & ~0xFF00) | (v << 8); break;
+       case 7: regs->ebx = (regs->ebx & ~0xFF00) | (v << 8); break;
        }
 }
 
        if (prefix & SEG_SS)
                seg = regs->uss;
        if (prefix & SEG_FS)
-               seg = regs->fs;
+               seg = regs->vfs;
        if (prefix & SEG_GS)
-               seg = regs->gs;
+               seg = regs->vgs;
        return seg;
 }
 
                        case 2: return address(regs, seg, regs->edx);
                        case 3: return address(regs, seg, regs->ebx);
                        case 4: return address(regs, seg,
-                                              sib(regs, mod, fetch8(regs)));
+                                                  sib(regs, mod, fetch8(regs)));
                        case 5: return address(regs, seg, fetch32(regs));
                        case 6: return address(regs, seg, regs->esi);
                        case 7: return address(regs, seg, regs->edi);
                        case 2: return address(regs, seg, regs->edx + disp);
                        case 3: return address(regs, seg, regs->ebx + disp);
                        case 4: return address(regs, seg,
-                                              sib(regs, mod, fetch8(regs)));
+                                                  sib(regs, mod, fetch8(regs)));
                        case 5: return address(regs, seg, regs->ebp + disp);
                        case 6: return address(regs, seg, regs->esi + disp);
                        case 7: return address(regs, seg, regs->edi + disp);
                }
        }
 
-       return 0; 
+       return 0;
 }
 
 /*
        unsigned cr0 = (oldctx.cr0 & 0xFFFFFFF0) | ax;
 
        TRACE((regs, regs->eip - eip, "lmsw 0x%x", ax));
-#ifndef TEST
        oldctx.cr0 = cr0 | CR0_PE | CR0_NE;
-#else
-       oldctx.cr0 = cr0 | CR0_PE | CR0_NE | CR0_PG;
-#endif
        if (cr0 & CR0_PE)
                set_mode(regs, VM86_REAL_TO_PROTECTED);
 
        unsigned addr = operand(prefix, regs, modrm);
        unsigned val, r = (modrm >> 3) & 7;
 
-       if ((modrm & 0xC0) == 0xC0) /* no registers */
-               return 0;
+       if ((modrm & 0xC0) == 0xC0) {
+               /*
+                * Emulate all guest instructions in protected to real mode.
+                */
+               if (mode != VM86_PROTECTED_TO_REAL)
+                       return 0;
+       }
 
        switch (opc) {
        case 0x88: /* addr32 mov r8, r/m8 */
                TRACE((regs, regs->eip - eip,
                        "movb %%e%s, *0x%x", rnames[r], addr));
                write8(addr, val);
-               break;
+               return 1;
 
        case 0x8A: /* addr32 mov r/m8, r8 */
                TRACE((regs, regs->eip - eip,
                        "movb *0x%x, %%%s", addr, rnames[r]));
                setreg8(regs, r, read8(addr));
-               break;
+               return 1;
 
        case 0x89: /* addr32 mov r16, r/m16 */
                val = getreg32(regs, r);
+               if ((modrm & 0xC0) == 0xC0) {
+                       if (prefix & DATA32)
+                               setreg32(regs, modrm & 7, val);
+                       else
+                               setreg16(regs, modrm & 7, MASK16(val));
+                       return 1;
+               }
+
                if (prefix & DATA32) {
                        TRACE((regs, regs->eip - eip,
                                "movl %%e%s, *0x%x", rnames[r], addr));
                                "movw %%%s, *0x%x", rnames[r], addr));
                        write16(addr, MASK16(val));
                }
-               break;
+               return 1;
+
+       case 0x8B: /* mov r/m16, r16 */
+               if ((modrm & 0xC0) == 0xC0) {
+                       if (prefix & DATA32)
+                               setreg32(regs, r, addr);
+                       else
+                               setreg16(regs, r, MASK16(addr));
+                       return 1;
+               }
 
-       case 0x8B: /* addr32 mov r/m16, r16 */
                if (prefix & DATA32) {
                        TRACE((regs, regs->eip - eip,
                                "movl *0x%x, %%e%s", addr, rnames[r]));
                                "movw *0x%x, %%%s", addr, rnames[r]));
                        setreg16(regs, r, read16(addr));
                }
-               break;
+               return 1;
 
        case 0xC6: /* addr32 movb $imm, r/m8 */
                if ((modrm >> 3) & 7)
                write8(addr, val);
                TRACE((regs, regs->eip - eip, "movb $0x%x, *0x%x",
                                                        val, addr));
+               return 1;
+       }
+       return 0;
+}
+
+/*
+ * We need to handle string moves that address memory beyond the 64KB segment
+ * limit that VM8086 mode enforces.
+ */
+static inline int
+movs(struct regs *regs, unsigned prefix, unsigned opc)
+{
+       unsigned eip = regs->eip - 1;
+       unsigned sseg = segment(prefix, regs, regs->vds);
+       unsigned dseg = regs->ves;
+       unsigned saddr, daddr;
+       unsigned count = 1;
+       int incr = ((regs->eflags & EFLAGS_DF) == 0) ? 1 : -1;
+
+       saddr = address(regs, sseg, regs->esi);
+       daddr = address(regs, dseg, regs->edi);
+
+       if ((prefix & REP) != 0) {
+               count = regs->ecx;
+               regs->ecx = 0;
+       }
+
+       switch (opc) {
+       case 0xA4: /* movsb */
+               regs->esi += (incr * count);
+               regs->edi += (incr * count);
+
+               while (count-- != 0) {
+                       write8(daddr, read8(saddr));
+                       daddr += incr;
+                       saddr += incr;
+               }
+               TRACE((regs, regs->eip - eip, "movsb (%%esi),%%es:(%%edi)"));
+               break;
+
+       case 0xA5: /* movsw */
+               if ((prefix & DATA32) == 0) {
+                       incr = 2 * incr;
+                       regs->esi += (incr * count);
+                       regs->edi += (incr * count);
+
+                       while (count-- != 0) {
+                               write16(daddr, read16(saddr));
+                               daddr += incr;
+                               saddr += incr;
+                       }
+               } else {
+                       incr = 4 * incr;
+                       regs->esi += (incr * count);
+                       regs->edi += (incr * count);
+
+                       while (count-- != 0) {
+                               write32(daddr, read32(saddr));
+                               daddr += incr;
+                               saddr += incr;
+                       }
+               }                       
+               TRACE((regs, regs->eip - eip, "movsw %s(%%esi),%%es:(%%edi)"));
                break;
        }
+
        return 1;
 }
 
+static inline int
+lods(struct regs *regs, unsigned prefix, unsigned opc)
+{
+       unsigned eip = regs->eip - 1;
+       unsigned seg = segment(prefix, regs, regs->vds);
+       unsigned addr = address(regs, seg, regs->esi);
+       unsigned count = 1;
+       int incr = ((regs->eflags & EFLAGS_DF) == 0) ? 1 : -1;
+
+       if ((prefix & REP) != 0) {
+               count = regs->ecx;
+               regs->ecx = 0;
+       }
+
+       switch (opc) {
+       case 0xAD: /* lodsw */
+               if ((prefix & DATA32) == 0) {
+                       incr = 2 * incr;
+                       regs->esi += (incr * count);
+                       while (count-- != 0) {
+                               setreg16(regs, 0, read16(addr));
+                               addr += incr;
+                       }
+
+                       TRACE((regs, regs->eip - eip, "lodsw (%%esi),%%ax"));
+               } else {
+                       incr = 4 * incr;
+                       regs->esi += (incr * count);
+                       while (count-- != 0) {
+                               setreg32(regs, 0, read32(addr));
+                               addr += incr;
+                       }
+                       TRACE((regs, regs->eip - eip, "lodsw (%%esi),%%eax"));
+               }
+               break;
+       }
+       return 1;
+}
 /*
  * Move to and from a control register.
  */
                TRACE((regs, regs->eip - eip, "movl %%cr%d, %%eax", cr));
                switch (cr) {
                case 0:
-#ifndef TEST
                        setreg32(regs, modrm,
                                oldctx.cr0 & ~(CR0_PE | CR0_NE));
-#else
-                       setreg32(regs, modrm,
-                               oldctx.cr0 & ~(CR0_PE | CR0_NE | CR0_PG));
-#endif
                        break;
                case 2:
                        setreg32(regs, modrm, get_cr2());
                switch (cr) {
                case 0:
                        oldctx.cr0 = getreg32(regs, modrm) | (CR0_PE | CR0_NE);
-#ifdef TEST
-                       oldctx.cr0 |= CR0_PG;
-#endif
                        if (getreg32(regs, modrm) & CR0_PE)
                                set_mode(regs, VM86_REAL_TO_PROTECTED);
-                       else
-                               set_mode(regs, VM86_REAL);
+                       //else
+                       //      set_mode(regs, VM86_REAL);
                        break;
                case 3:
                        oldctx.cr3 = getreg32(regs, modrm);
                regs->eflags &= ~EFLAGS_ZF;
 }
 
+static void set_eflags_add(unsigned hi_bit_mask, unsigned v1, unsigned v2,
+                               unsigned result, struct regs *regs)
+{
+       int bit_count;
+       unsigned tmp;
+       unsigned full_mask;
+       unsigned nonsign_mask;
+
+       /* Carry out of high order bit? */
+       if ( v1 & v2 & hi_bit_mask )
+               regs->eflags |= EFLAGS_CF;
+       else
+               regs->eflags &= ~EFLAGS_CF;
+
+       /* Even parity in least significant byte? */
+       tmp = result & 0xff;
+       for (bit_count = 0; tmp != 0; bit_count++)
+               tmp &= (tmp - 1);
+
+       if (bit_count & 1)
+               regs->eflags &= ~EFLAGS_PF;
+       else
+               regs->eflags |= EFLAGS_PF;
+
+       /* Carry out of least significant BCD digit? */
+       if ( v1 & v2 & (1<<3) )
+               regs->eflags |= EFLAGS_AF;
+       else
+               regs->eflags &= ~EFLAGS_AF;
+
+       /* Result is zero? */
+       full_mask = (hi_bit_mask - 1) | hi_bit_mask;
+       set_eflags_ZF(full_mask, result, regs);
+
+       /* Sign of result? */
+       if ( result & hi_bit_mask )
+               regs->eflags |= EFLAGS_SF;
+       else
+               regs->eflags &= ~EFLAGS_SF;
+
+       /* Carry out of highest non-sign bit? */
+       nonsign_mask = (hi_bit_mask >> 1) & ~hi_bit_mask;
+       if ( v1 & v2 & hi_bit_mask )
+               regs->eflags |= EFLAGS_OF;
+       else
+               regs->eflags &= ~EFLAGS_OF;
+
+}
+
 /*
  * We need to handle cmp opcodes that address memory beyond the 64KB
  * segment limit that VM8086 mode enforces.
 }
 
 /*
+ * We need to handle add opcodes that address memory beyond the 64KB
+ * segment limit that VM8086 mode enforces.
+ */
+static int
+add(struct regs *regs, unsigned prefix, unsigned opc)
+{
+       unsigned eip = regs->eip - 1;
+       unsigned modrm = fetch8(regs);
+       unsigned addr = operand(prefix, regs, modrm);
+       unsigned r = (modrm >> 3) & 7;
+
+       unsigned val1 = 0;
+       unsigned val2 = 0;
+       unsigned result = 0;
+       unsigned hi_bit;
+
+       if ((modrm & 0xC0) == 0xC0) /* no registers */
+               return 0;
+
+       switch (opc) {
+       case 0x00: /* addr32 add r8, r/m8 */
+               val1 = getreg8(regs, r);
+               val2 = read8(addr);
+               result = val1 + val2;
+               write8(addr, result);
+               TRACE((regs, regs->eip - eip,
+                       "addb %%e%s, *0x%x", rnames[r], addr));
+               break;
+               
+       case 0x01: /* addr32 add r16, r/m16 */
+               if (prefix & DATA32) {
+                       val1 = getreg32(regs, r);
+                       val2 = read32(addr);
+                       result = val1 + val2;
+                       write32(addr, result);
+                       TRACE((regs, regs->eip - eip,
+                               "addl %%e%s, *0x%x", rnames[r], addr));
+               } else {
+                       val1 = getreg16(regs, r);
+                       val2 = read16(addr);
+                       result = val1 + val2;
+                       write16(addr, result);
+                       TRACE((regs, regs->eip - eip,
+                               "addw %%e%s, *0x%x", rnames[r], addr));
+               }
+               break;
+               
+       case 0x03: /* addr32 add r/m16, r16 */
+               if (prefix & DATA32) {
+                       val1 = getreg32(regs, r);
+                       val2 = read32(addr);
+                       result = val1 + val2;
+                       setreg32(regs, r, result);
+                       TRACE((regs, regs->eip - eip,
+                               "addl *0x%x, %%e%s", addr, rnames[r]));
+               } else {
+                       val1 = getreg16(regs, r);
+                       val2 = read16(addr);
+                       result = val1 + val2;
+                       setreg16(regs, r, result);
+                       TRACE((regs, regs->eip - eip,
+                               "addw *0x%x, %%%s", addr, rnames[r]));
+               }
+               break;
+       }
+
+       if (opc == 0x00)
+               hi_bit = (1<<7);
+       else
+               hi_bit = (prefix & DATA32) ? (1<<31) : (1<<15);
+       set_eflags_add(hi_bit, val1, val2, result, regs);
+
+       return 1;
+}
+
+/*
  * We need to handle pop opcodes that address memory beyond the 64KB
  * segment limit that VM8086 mode enforces.
  */
        return 1;
 }
 
+static int
+mov_to_seg(struct regs *regs, unsigned prefix, unsigned opc)
+{
+       unsigned modrm = fetch8(regs);
+
+       /*
+        * Emulate segment loads in:
+        * 1) real->protected mode.
+        * 2) protected->real mode.
+        */
+       if (mode != VM86_REAL_TO_PROTECTED &&
+           mode != VM86_PROTECTED_TO_REAL)
+               return 0;
+
+       /* Register source only. */
+       if ((modrm & 0xC0) != 0xC0)
+               goto fail;
+
+       switch ((modrm & 0x38) >> 3) {
+       case 0: /* es */
+               regs->ves = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
+               saved_rm_regs.ves = 0;
+               oldctx.es_sel = regs->ves;
+               return 1;
+
+       /* case 1: cs */
+
+       case 2: /* ss */
+               regs->uss = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
+               saved_rm_regs.uss = 0;
+               oldctx.ss_sel = regs->uss;
+               return 1;
+       case 3: /* ds */
+               regs->vds = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
+               saved_rm_regs.vds = 0;
+               oldctx.ds_sel = regs->vds;
+               return 1;
+       case 4: /* fs */
+               regs->vfs = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
+               saved_rm_regs.vfs = 0;
+               oldctx.fs_sel = regs->vfs;
+               return 1;
+       case 5: /* gs */
+               regs->vgs = getreg16(regs, modrm);
+               if (mode == VM86_PROTECTED_TO_REAL)
+                       return 1;
+               saved_rm_regs.vgs = 0;
+               oldctx.gs_sel = regs->vgs;
+               return 1;
+       }
+
+ fail:
+       printf("%s:%d: missed opcode %02x %02x\n",
+                  __FUNCTION__, __LINE__, opc, modrm);
+       return 0;
+}
+
 /*
  * Emulate a segment load in protected mode
  */
 static int
 load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes *arbytes)
 {
+       uint64_t gdt_phys_base;
        unsigned long long entry;
 
        /* protected mode: use seg as index into gdt */
                return 1;
        }
 
-       entry = ((unsigned long long *)
-                 guest_linear_to_real(oldctx.gdtr_base))[sel >> 3];
+       gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base);
+       if (gdt_phys_base != (uint32_t)gdt_phys_base) {
+               printf("gdt base address above 4G\n");
+               cpuid_addr_value(gdt_phys_base + 8 * (sel >> 3), &entry);
+       } else
+               entry = ((unsigned long long *)(long)gdt_phys_base)[sel >> 3];
 
        /* Check the P bit first */
        if (!((entry >> (15+32)) & 0x1) && sel != 0)
                  ((entry >> (32-16)) & 0x00FF0000) |
                  ((entry >> (   16)) & 0x0000FFFF));
        *limit = (((entry >> (48-16)) & 0x000F0000) |
-                 ((entry           ) & 0x0000FFFF));
+                 (entry & 0x0000FFFF));
 
        arbytes->bytes = 0;
        arbytes->fields.seg_type = (entry >> (8+32)) & 0xF; /* TYPE */
-       arbytes->fields.s =  (entry >> (12+32)) & 0x1; /* S */
+       arbytes->fields.s = (entry >> (12+32)) & 0x1; /* S */
        if (arbytes->fields.s)
                arbytes->fields.seg_type |= 1; /* accessed */
        arbytes->fields.dpl = (entry >> (13+32)) & 0x3; /* DPL */
 }
 
 /*
+ * Emulate a protected mode segment load, falling back to clearing it if
+ * the descriptor was invalid.
+ */
+static void
+load_or_clear_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes *arbytes)
+{
+       if (!load_seg(sel, base, limit, arbytes))
+               load_seg(0, base, limit, arbytes);
+}
+
+static unsigned char rm_irqbase[2];
+
+/*
  * Transition to protected mode
  */
 static void
 protected_mode(struct regs *regs)
 {
+       extern char stack_top[];
+
+       oldctx.rm_irqbase[0] = rm_irqbase[0];
+       oldctx.rm_irqbase[1] = rm_irqbase[1];
+
        regs->eflags &= ~(EFLAGS_TF|EFLAGS_VM);
 
        oldctx.eip = regs->eip;
        oldctx.esp = regs->uesp;
        oldctx.eflags = regs->eflags;
 
-       memset(&saved_rm_regs, 0, sizeof(struct regs));
-
        /* reload all segment registers */
        if (!load_seg(regs->cs, &oldctx.cs_base,
                                &oldctx.cs_limit, &oldctx.cs_arbytes))
                panic("Invalid %%cs=0x%x for protected mode\n", regs->cs);
        oldctx.cs_sel = regs->cs;
 
-       if (load_seg(regs->ves, &oldctx.es_base,
-                               &oldctx.es_limit, &oldctx.es_arbytes))
-               oldctx.es_sel = regs->ves;
-       else {
-               load_seg(0, &oldctx.es_base,
-                           &oldctx.es_limit, &oldctx.es_arbytes);
-               oldctx.es_sel = 0;
-               saved_rm_regs.ves = regs->ves;
-       }
-
-       if (load_seg(regs->uss, &oldctx.ss_base,
-                               &oldctx.ss_limit, &oldctx.ss_arbytes))
-               oldctx.ss_sel = regs->uss;
-       else {
-               load_seg(0, &oldctx.ss_base,
-                           &oldctx.ss_limit, &oldctx.ss_arbytes);
-               oldctx.ss_sel = 0;
-               saved_rm_regs.uss = regs->uss;
-       }
-
-       if (load_seg(regs->vds, &oldctx.ds_base,
-                               &oldctx.ds_limit, &oldctx.ds_arbytes))
-               oldctx.ds_sel = regs->vds;
-       else {
-               load_seg(0, &oldctx.ds_base,
-                           &oldctx.ds_limit, &oldctx.ds_arbytes);
-               oldctx.ds_sel = 0;
-               saved_rm_regs.vds = regs->vds;
-       }
-
-       if (load_seg(regs->vfs, &oldctx.fs_base,
-                               &oldctx.fs_limit, &oldctx.fs_arbytes))
-               oldctx.fs_sel = regs->vfs;
-       else {
-               load_seg(0, &oldctx.fs_base,
-                           &oldctx.fs_limit, &oldctx.fs_arbytes);
-               oldctx.fs_sel = 0;
-               saved_rm_regs.vfs = regs->vfs;
-       }
-
-       if (load_seg(regs->vgs, &oldctx.gs_base,
-                               &oldctx.gs_limit, &oldctx.gs_arbytes))
-               oldctx.gs_sel = regs->vgs;
-       else {
-               load_seg(0, &oldctx.gs_base,
-                           &oldctx.gs_limit, &oldctx.gs_arbytes);
-               oldctx.gs_sel = 0;
-               saved_rm_regs.vgs = regs->vgs;
-       }
+       load_or_clear_seg(oldctx.es_sel, &oldctx.es_base,
+                         &oldctx.es_limit, &oldctx.es_arbytes);
+       load_or_clear_seg(oldctx.ss_sel, &oldctx.ss_base,
+                         &oldctx.ss_limit, &oldctx.ss_arbytes);
+       load_or_clear_seg(oldctx.ds_sel, &oldctx.ds_base,
+                         &oldctx.ds_limit, &oldctx.ds_arbytes);
+       load_or_clear_seg(oldctx.fs_sel, &oldctx.fs_base,
+                         &oldctx.fs_limit, &oldctx.fs_arbytes);
+       load_or_clear_seg(oldctx.gs_sel, &oldctx.gs_base,
+                         &oldctx.gs_limit, &oldctx.gs_arbytes);
 
        /* initialize jump environment to warp back to protected mode */
+       regs->uss = DATA_SELECTOR;
+       regs->uesp = (unsigned long)stack_top;
        regs->cs = CODE_SELECTOR;
-       regs->ds = DATA_SELECTOR;
-       regs->es = DATA_SELECTOR;
-       regs->fs = DATA_SELECTOR;
-       regs->gs = DATA_SELECTOR;
-       regs->eip = (unsigned) &switch_to_protected_mode;
+       regs->eip = (unsigned long)switch_to_protected_mode;
 
        /* this should get us into 32-bit mode */
 }
 real_mode(struct regs *regs)
 {
        regs->eflags |= EFLAGS_VM | 0x02;
-       regs->ds = DATA_SELECTOR;
-       regs->es = DATA_SELECTOR;
-       regs->fs = DATA_SELECTOR;
-       regs->gs = DATA_SELECTOR;
 
        /*
         * When we transition from protected to real-mode and we
                        panic("%%ss 0x%lx higher than 1MB", regs->uss);
                regs->uss = address(regs, regs->uss, 0) >> 4;
        } else {
-         regs->uss = saved_rm_regs.uss;
+               regs->uss = saved_rm_regs.uss;
        }
        if (regs->vds != 0) {
                if (regs->vds >= HIGHMEM)
                        panic("%%ds 0x%lx higher than 1MB", regs->vds);
                regs->vds = address(regs, regs->vds, 0) >> 4;
        } else {
-         regs->vds = saved_rm_regs.vds;
+               regs->vds = saved_rm_regs.vds;
        }
        if (regs->ves != 0) {
                if (regs->ves >= HIGHMEM)
                        panic("%%es 0x%lx higher than 1MB", regs->ves);
                regs->ves = address(regs, regs->ves, 0) >> 4;
        } else {
-         regs->ves = saved_rm_regs.ves;
+               regs->ves = saved_rm_regs.ves;
        }
 
        /* this should get us into 16-bit mode */
 {
        switch (newmode) {
        case VM86_REAL:
-               if ((mode == VM86_PROTECTED_TO_REAL) ||
-                   (mode == VM86_REAL_TO_PROTECTED)) {
+               if (mode == VM86_PROTECTED_TO_REAL ||
+                   mode == VM86_REAL_TO_PROTECTED) {
                        regs->eflags &= ~EFLAGS_TF;
                        real_mode(regs);
-                       break;
-               } else if (mode == VM86_REAL) {
-                       break;
-               } else
+               } else if (mode != VM86_REAL)
                        panic("unexpected real mode transition");
                break;
 
        case VM86_REAL_TO_PROTECTED:
                if (mode == VM86_REAL) {
                        regs->eflags |= EFLAGS_TF;
-                       break;
-               } else if (mode == VM86_REAL_TO_PROTECTED) {
-                       break;
-               } else
+                       saved_rm_regs.vds = regs->vds;
+                       saved_rm_regs.ves = regs->ves;
+                       saved_rm_regs.vfs = regs->vfs;
+                       saved_rm_regs.vgs = regs->vgs;
+                       saved_rm_regs.uss = regs->uss;
+                       oldctx.ds_sel = 0;
+                       oldctx.es_sel = 0;
+                       oldctx.fs_sel = 0;
+                       oldctx.gs_sel = 0;
+                       oldctx.ss_sel = 0;
+               } else if (mode != VM86_REAL_TO_PROTECTED)
                        panic("unexpected real-to-protected mode transition");
                break;
 
        case VM86_PROTECTED_TO_REAL:
-               if (mode == VM86_PROTECTED) {
-                       break;
-               } else
+               if (mode != VM86_PROTECTED)
                        panic("unexpected protected-to-real mode transition");
                break;
 
        case VM86_PROTECTED:
-               if (mode == VM86_REAL_TO_PROTECTED) {
-                       protected_mode(regs);
-//                     printf("<VM86_PROTECTED>\n");
-                       mode = newmode;
-                       return;
-               } else
+               if (mode != VM86_REAL_TO_PROTECTED)
                        panic("unexpected protected mode transition");
+               protected_mode(regs);
                break;
        }
 
        mode = newmode;
-       TRACE((regs, 0, states[mode]));
+       if (mode != VM86_PROTECTED)
+               TRACE((regs, 0, states[mode]));
 }
 
 static void
        unsigned n = regs->eip;
        unsigned cs, eip;
 
-       if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */
-               eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs);
-               cs = fetch16(regs);
+       eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs);
+       cs = fetch16(regs);
 
-               TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
-
-                regs->cs = cs;
-                regs->eip = eip;
-               set_mode(regs, VM86_PROTECTED);
-       } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */
-               eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs);
-               cs = fetch16(regs);
+       TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
 
-               TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
+       regs->cs = cs;
+       regs->eip = eip;
 
-                regs->cs = cs;
-                regs->eip = eip;
+       if (mode == VM86_REAL_TO_PROTECTED)             /* jump to protected mode */
+               set_mode(regs, VM86_PROTECTED);
+       else if (mode == VM86_PROTECTED_TO_REAL)        /* jump to real mode */
                set_mode(regs, VM86_REAL);
-       } else
+       else
                panic("jmpl");
 }
 
        unsigned cs, eip;
        unsigned addr;
 
-       addr  = operand(prefix, regs, modrm);
+       addr = operand(prefix, regs, modrm);
 
-       if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */
-               eip = (prefix & DATA32) ? read32(addr) : read16(addr);
-               addr += (prefix & DATA32) ? 4 : 2;
-               cs = read16(addr);
+       eip = (prefix & DATA32) ? read32(addr) : read16(addr);
+       addr += (prefix & DATA32) ? 4 : 2;
+       cs = read16(addr);
 
-               TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
+       TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
 
-                regs->cs = cs;
-                regs->eip = eip;
-               set_mode(regs, VM86_PROTECTED);
-       } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */
-               eip = (prefix & DATA32) ? read32(addr) : read16(addr);
-               addr += (prefix & DATA32) ? 4 : 2;
-               cs = read16(addr);
-
-               TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
+       regs->cs = cs;
+       regs->eip = eip;
 
-                regs->cs = cs;
-                regs->eip = eip;
+       if (mode == VM86_REAL_TO_PROTECTED)             /* jump to protected mode */
+               set_mode(regs, VM86_PROTECTED);
+       else if (mode == VM86_PROTECTED_TO_REAL)        /* jump to real mode */
                set_mode(regs, VM86_REAL);
-       } else
+       else
                panic("jmpl");
 }
 
 
        TRACE((regs, 1, "retl (to 0x%x:0x%x)", cs, eip));
 
-       if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */
-                regs->cs = cs;
-                regs->eip = eip;
+       regs->cs = cs;
+       regs->eip = eip;
+
+       if (mode == VM86_REAL_TO_PROTECTED)             /* jump to protected mode */
                set_mode(regs, VM86_PROTECTED);
-       } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */
-                regs->cs = cs;
-                regs->eip = eip;
+       else if (mode == VM86_PROTECTED_TO_REAL)        /* jump to real mode */
                set_mode(regs, VM86_REAL);
-       } else
+       else
                panic("retl");
 }
 
                        icw2[0] = 0;
                        printf("Remapping master: ICW2 0x%x -> 0x%x\n",
                                al, NR_EXCEPTION_HANDLER);
+                       rm_irqbase[0] = al;
                        al = NR_EXCEPTION_HANDLER;
                }
                break;
                        icw2[1] = 0;
                        printf("Remapping slave: ICW2 0x%x -> 0x%x\n",
                                al, NR_EXCEPTION_HANDLER+8);
+                       rm_irqbase[1] = al;
                        al = NR_EXCEPTION_HANDLER+8;
                }
                break;
        unsigned addr;
        unsigned data;
 
-       addr  = operand(prefix, regs, modrm);
-       
+       addr = operand(prefix, regs, modrm);
+
        if (prefix & DATA32) {
                data = read32(addr);
                push32(regs, data);
        unsigned opc, modrm, disp;
        unsigned prefix = 0;
 
+       if (mode == VM86_PROTECTED_TO_REAL &&
+               oldctx.cs_arbytes.fields.default_ops_size) {
+               prefix |= DATA32;
+               prefix |= ADDR32;
+       }
+
        for (;;) {
                switch ((opc = fetch8(regs))) {
-               case 0x07:
-                       if (prefix & DATA32)
-                               regs->ves = pop32(regs);
-                       else
-                               regs->ves = pop16(regs);
+
+               case 0x00: /* addr32 add r8, r/m8 */
+               case 0x01: /* addr32 add r16, r/m16 */
+               case 0x03: /* addr32 add r/m16, r16 */
+                       if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
+                               goto invalid;
+                       if ((prefix & ADDR32) == 0)
+                               goto invalid;
+                       if (!add(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
+                       
+               case 0x07: /* pop %es */
+                       regs->ves = (prefix & DATA32) ?
+                               pop32(regs) : pop16(regs);
                        TRACE((regs, regs->eip - eip, "pop %%es"));
+                       if (mode == VM86_REAL_TO_PROTECTED) {
+                               saved_rm_regs.ves = 0;
+                               oldctx.es_sel = regs->ves;
+                       }
                        return OPC_EMULATED;
 
                case 0x0F: /* two byte opcode */
                                        goto invalid;
                                }
                                break;
+                       case 0x06: /* clts */
+                               oldctx.cr0 &= ~CR0_TS;
+                               return OPC_EMULATED;
                        case 0x09: /* wbinvd */
                                return OPC_EMULATED;
                        case 0x20: /* mov Rd, Cd (1h) */
                        }
                        goto invalid;
 
+               case 0x1F: /* pop %ds */
+                       regs->vds = (prefix & DATA32) ?
+                               pop32(regs) : pop16(regs);
+                       TRACE((regs, regs->eip - eip, "pop %%ds"));
+                       if (mode == VM86_REAL_TO_PROTECTED) {
+                               saved_rm_regs.vds = 0;
+                               oldctx.ds_sel = regs->vds;
+                       }
+                       return OPC_EMULATED;
+
                case 0x26:
                        TRACE((regs, regs->eip - eip, "%%es:"));
                        prefix |= SEG_ES;
 
                case 0x39: /* addr32 cmp r16, r/m16 */
                case 0x3B: /* addr32 cmp r/m16, r16 */
-                       if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
+                       if (mode == VM86_PROTECTED_TO_REAL || !(prefix & ADDR32))
                                goto invalid;
-                        if ((prefix & ADDR32) == 0)
-                                goto invalid;
-                        if (!cmp(regs, prefix, opc))
-                                goto invalid;
-                        return OPC_EMULATED;
+                       if (!cmp(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
 
                case 0x3E:
                        TRACE((regs, regs->eip - eip, "%%ds:"));
                        continue;
 
                case 0x66:
-                       TRACE((regs, regs->eip - eip, "data32"));
-                       prefix |= DATA32;
+                       if (mode == VM86_PROTECTED_TO_REAL &&
+                               oldctx.cs_arbytes.fields.default_ops_size) {
+                               TRACE((regs, regs->eip - eip, "data16"));
+                               prefix &= ~DATA32;
+                       } else {
+                               TRACE((regs, regs->eip - eip, "data32"));
+                               prefix |= DATA32;
+                       }
                        continue;
 
-               case 0x67: 
-                       TRACE((regs, regs->eip - eip, "addr32"));
-                       prefix |= ADDR32;
+               case 0x67:
+                       if (mode == VM86_PROTECTED_TO_REAL &&
+                               oldctx.cs_arbytes.fields.default_ops_size) {
+                               TRACE((regs, regs->eip - eip, "addr16"));
+                               prefix &= ~ADDR32;
+                       } else {
+                               TRACE((regs, regs->eip - eip, "addr32"));
+                               prefix |= ADDR32;
+                       }
                        continue;
 
                case 0x88: /* addr32 mov r8, r/m8 */
                case 0x8A: /* addr32 mov r/m8, r8 */
-                       if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
+                       if (mode == VM86_PROTECTED_TO_REAL || !(prefix & ADDR32))
                                goto invalid;
-                        if ((prefix & ADDR32) == 0)
-                                goto invalid;
-                        if (!movr(regs, prefix, opc))
-                                goto invalid;
-                        return OPC_EMULATED;
-
-               case 0x89: /* addr32 mov r16, r/m16 */
-                       if (mode == VM86_PROTECTED_TO_REAL) {
-                               unsigned modrm = fetch8(regs);
-                               unsigned addr = operand(prefix, regs, modrm);
-                               unsigned val, r = (modrm >> 3) & 7;
-                               
-                               if (prefix & DATA32) {
-                                       val = getreg16(regs, r);
-                                       write32(addr, val);
-                               } else {
-                                       val = getreg32(regs, r);
-                                       write16(addr, MASK16(val));
-                               }
-                               TRACE((regs, regs->eip - eip,
-                                       "mov %%%s, *0x%x", rnames[r], addr));
-                               return OPC_EMULATED;
-                       }
-               case 0x8B: /* addr32 mov r/m16, r16 */
-                       if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
+                       if (!movr(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
+
+               case 0x89: /* mov r16, r/m16 */
+               case 0x8B: /* mov r/m16, r16 */
+                       if (mode != VM86_PROTECTED_TO_REAL && !(prefix & ADDR32))
+                               goto invalid;
+                       if (!movr(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
+
+               case 0x8E: /* mov r16, sreg */
+                       if (!mov_to_seg(regs, prefix, opc))
                                goto invalid;
-                        if ((prefix & ADDR32) == 0)
-                                goto invalid;
-                        if (!movr(regs, prefix, opc))
-                                goto invalid;
-                        return OPC_EMULATED;
+                       return OPC_EMULATED;
 
                case 0x8F: /* addr32 pop r/m16 */
-                        if ((prefix & ADDR32) == 0)
-                                goto invalid;
-                        if (!pop(regs, prefix, opc))
-                                goto invalid;
-                        return OPC_EMULATED;
+                       if (!(prefix & ADDR32))
+                               goto invalid;
+                       if (!pop(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
 
                case 0x90: /* nop */
                        TRACE((regs, regs->eip - eip, "nop"));
                        regs->eflags |= EFLAGS_VM;
                        return OPC_EMULATED;
 
-               case 0xA1: /* mov ax, r/m16 */ 
-                       {
-                               int addr, data;
-                               int seg = segment(prefix, regs, regs->vds);
-                               int offset = prefix & ADDR32? fetch32(regs) : fetch16(regs);
-
-                               if (prefix & DATA32) {
-                                       addr = address(regs, seg, offset);
-                                       data = read32(addr);
-                                       setreg32(regs, 0, data);
-                               } else {
-                                       addr = address(regs, seg, offset);
-                                       data = read16(addr);
-                                       setreg16(regs, 0, data);
-                               }
-                               TRACE((regs, regs->eip - eip, "mov *0x%x, %%ax", addr));
+               case 0xA1: /* mov ax, r/m16 */
+               {
+                       int addr, data;
+                       int seg = segment(prefix, regs, regs->vds);
+                       int offset = prefix & ADDR32 ? fetch32(regs) : fetch16(regs);
+
+                       if (prefix & DATA32) {
+                               addr = address(regs, seg, offset);
+                               data = read32(addr);
+                               setreg32(regs, 0, data);
+                       } else {
+                               addr = address(regs, seg, offset);
+                               data = read16(addr);
+                               setreg16(regs, 0, data);
                        }
+                       TRACE((regs, regs->eip - eip, "mov *0x%x, %%ax", addr));
+                       return OPC_EMULATED;
+               }
+
+               case 0xA4: /* movsb */
+               case 0xA5: /* movsw */
+                       if ((prefix & ADDR32) == 0)
+                               goto invalid;
+                       if (!movs(regs, prefix, opc))
+                               goto invalid;
                        return OPC_EMULATED;
 
+               case 0xAD: /* lodsw */
+                       if ((prefix & ADDR32) == 0)
+                               goto invalid;
+                       if (!lods(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
+                       
                case 0xBB: /* mov bx, imm16 */
-                       {
-                               int data;
-                               if (prefix & DATA32) {
-                                       data = fetch32(regs);
-                                       setreg32(regs, 3, data);
-                               } else {
-                                       data = fetch16(regs);
-                                       setreg16(regs, 3, data);
-                               }
-                               TRACE((regs, regs->eip - eip, "mov $0x%x, %%bx", data));
+               {
+                       int data;
+                       if (prefix & DATA32) {
+                               data = fetch32(regs);
+                               setreg32(regs, 3, data);
+                       } else {
+                               data = fetch16(regs);
+                               setreg16(regs, 3, data);
                        }
+                       TRACE((regs, regs->eip - eip, "mov $0x%x, %%bx", data));
                        return OPC_EMULATED;
+               }
 
                case 0xC6: /* addr32 movb $imm, r/m8 */
-                        if ((prefix & ADDR32) == 0)
-                                goto invalid;
-                        if (!movr(regs, prefix, opc))
-                                goto invalid;
+                       if (!(prefix & ADDR32))
+                               goto invalid;
+                       if (!movr(regs, prefix, opc))
+                               goto invalid;
                        return OPC_EMULATED;
 
                case 0xCB: /* retl */
-                       if ((mode == VM86_REAL_TO_PROTECTED) ||
-                           (mode == VM86_PROTECTED_TO_REAL)) {
+                       if (mode == VM86_REAL_TO_PROTECTED ||
+                               mode == VM86_PROTECTED_TO_REAL) {
                                retl(regs, prefix);
                                return OPC_INVALID;
                        }
                        return OPC_EMULATED;
 
                case 0xEA: /* jmpl */
-                       if ((mode == VM86_REAL_TO_PROTECTED) ||
-                           (mode == VM86_PROTECTED_TO_REAL)) {
+                       if (mode == VM86_REAL_TO_PROTECTED ||
+                               mode == VM86_PROTECTED_TO_REAL) {
                                jmpl(regs, prefix);
                                return OPC_INVALID;
                        }
                        goto invalid;
 
-               case 0xFF: /* jmpl (indirect) */
-                       {
-                               unsigned modrm = fetch8(regs);
-                               switch((modrm >> 3) & 7) {
-                               case 5: /* jmpl (indirect) */
-                                       if ((mode == VM86_REAL_TO_PROTECTED) ||
-                                           (mode == VM86_PROTECTED_TO_REAL)) {
-                                               jmpl_indirect(regs, prefix, modrm);
-                                               return OPC_INVALID;
-                                       }
-                                       goto invalid;
+               case 0xFF:
+               {
+                       unsigned modrm = fetch8(regs);
+                       switch((modrm >> 3) & 7) {
+                       case 5: /* jmpl (indirect) */
+                               if (mode == VM86_REAL_TO_PROTECTED ||
+                                       mode == VM86_PROTECTED_TO_REAL) {
+                                       jmpl_indirect(regs, prefix, modrm);
+                                       return OPC_INVALID;
+                               }
+                               goto invalid;
 
-                               case 6: /* push r/m16 */
-                                       pushrm(regs, prefix, modrm);
-                                       return OPC_EMULATED;
+                       case 6: /* push r/m16 */
+                               pushrm(regs, prefix, modrm);
+                               return OPC_EMULATED;
 
-                               default:
-                                       goto invalid;
-                               }
+                       default:
+                               goto invalid;
                        }
+               }
 
                case 0xEB: /* short jump */
-                       if ((mode == VM86_REAL_TO_PROTECTED) ||
-                           (mode == VM86_PROTECTED_TO_REAL)) {
+                       if (mode == VM86_REAL_TO_PROTECTED ||
+                               mode == VM86_PROTECTED_TO_REAL) {
                                disp = (char) fetch8(regs);
                                TRACE((regs, 2, "jmp 0x%x", regs->eip + disp));
                                regs->eip += disp;
                        TRACE((regs, regs->eip - eip, "lock"));
                        continue;
 
+               case 0xF4: /* hlt */
+                       TRACE((regs, regs->eip - eip, "hlt"));
+                       /* Do something power-saving here! */
+                       return OPC_EMULATED;
+
+               case 0xF3: /* rep/repe/repz */
+                       TRACE((regs, regs->eip - eip, "rep"));
+                       prefix |= REP;
+                       continue;
+
                case 0xF6: /* addr32 testb $imm, r/m8 */
-                        if ((prefix & ADDR32) == 0)
-                                goto invalid;
-                        if (!test(regs, prefix, opc))
-                                goto invalid;
+                       if (!(prefix & ADDR32))
+                               goto invalid;
+                       if (!test(regs, prefix, opc))
+                               goto invalid;
                        return OPC_EMULATED;
 
                case 0xFA: /* cli */
 {
        unsigned flteip;
        int nemul = 0;
+       unsigned ip;
 
        /* emulate as many instructions as possible */
        while (opcode(regs) != OPC_INVALID)
        /* detect the case where we are not making progress */
        if (nemul == 0 && prev_eip == regs->eip) {
                flteip = address(regs, MASK16(regs->cs), regs->eip);
+
+               printf("Undecoded sequence: \n");
+               for (ip=flteip; ip < flteip+16; ip++)
+                       printf("0x%02x ", read8(ip));
+               printf("\n");
+
                panic("Unknown opcode at %04x:%04x=0x%x",
                        MASK16(regs->cs), regs->eip, flteip);
        } else
        case 1: /* Debug */
                if (regs->eflags & EFLAGS_VM) {
                        /* emulate any 8086 instructions  */
+                       if (mode == VM86_REAL)
+                               return;
                        if (mode != VM86_REAL_TO_PROTECTED)
                                panic("not in real-to-protected mode");
                        emulate(regs);
        default:
        invalid:
                printf("Trap (0x%x) while in %s mode\n",
-                   trapno, regs->eflags & EFLAGS_VM ? "real" : "protected");
+                       trapno, regs->eflags & EFLAGS_VM ? "real" : "protected");
                if (trapno == 14)
                        printf("Page fault address 0x%x\n", get_cr2());
                dump_regs(regs);
 
 
 #include "vmx_assist.h"
 
-#define        NR_EXCEPTION_HANDLER    32
-#define        NR_INTERRUPT_HANDLERS   16
-#define        NR_TRAPS                (NR_EXCEPTION_HANDLER+NR_INTERRUPT_HANDLERS)
-
 #ifndef __ASSEMBLY__
 
 struct regs {
-        unsigned       edi, esi, ebp, esp, ebx, edx, ecx, eax;
-        unsigned       ds, es, fs, gs;
-        unsigned       trapno, errno;
-        unsigned       eip, cs, eflags, uesp, uss;
-        unsigned       ves, vds, vfs, vgs;
+       unsigned        edi, esi, ebp, esp, ebx, edx, ecx, eax;
+       unsigned        trapno, errno;
+       unsigned        eip, cs, eflags, uesp, uss;
+       unsigned        ves, vds, vfs, vgs;
 };
 
 enum vm86_mode {
 
 extern enum vm86_mode prevmode, mode;
 extern struct vmx_assist_context oldctx;
-extern struct vmx_assist_context newctx;
 
 extern void emulate(struct regs *);
 extern void dump_regs(struct regs *);
 
 /*
  * vmx_assist.h: Context definitions for the VMXASSIST world switch.
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Leendert van Doorn, leendert@watson.ibm.com
  * Copyright (c) 2005, International Business Machines Corporation.
  */
 
 #ifndef __ASSEMBLY__
 
+#define NR_EXCEPTION_HANDLER    32
+#define NR_INTERRUPT_HANDLERS   16
+#define NR_TRAPS        (NR_EXCEPTION_HANDLER+NR_INTERRUPT_HANDLERS)
+
 union vmcs_arbytes {
     struct arbyte_fields {
         unsigned int seg_type : 4,
     uint32_t  ldtr_limit;
     uint32_t  ldtr_base;
     union vmcs_arbytes ldtr_arbytes;
+
+    unsigned char rm_irqbase[2];
 };
 typedef struct vmx_assist_context vmx_assist_context_t;
 
 
 }  __attribute__((packed));
 
 typedef enum { 
-    VMXASSIST_STARTUP,
-    VMXASSIST_V8086_BIOS,
-    VMXASSIST_V8086,
-    NORMAL 
+    VMXASSIST_DISABLED,
+    VMXASSIST_ENABLED
 } vmx_state_t;
 
 struct tss_descriptor {
 
--- /dev/null
+/*
+ * vmx_assist.h: Context definitions for the VMXASSIST world switch.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Leendert van Doorn, leendert@watson.ibm.com
+ * Copyright (c) 2005, International Business Machines Corporation.
+ */
+
+#ifndef _VMX_ASSIST_H_
+#define _VMX_ASSIST_H_
+
+#include <palacios/vm_guest.h>
+
+#define VMXASSIST_BASE         0xD0000
+#define VMXASSIST_MAGIC        0x17101966
+#define VMXASSIST_MAGIC_OFFSET (VMXASSIST_BASE+8)
+
+#define VMXASSIST_NEW_CONTEXT (VMXASSIST_BASE + 12)
+#define VMXASSIST_OLD_CONTEXT (VMXASSIST_NEW_CONTEXT + 4)
+
+#ifndef __ASSEMBLY__
+
+#define NR_EXCEPTION_HANDLER    32
+#define NR_INTERRUPT_HANDLERS   16
+#define NR_TRAPS        (NR_EXCEPTION_HANDLER+NR_INTERRUPT_HANDLERS)
+
+union vmcs_arbytes {
+    struct arbyte_fields {
+        unsigned int seg_type : 4,
+            s         : 1,
+            dpl       : 2,
+            p         : 1,
+            reserved0 : 4,
+            avl       : 1,
+            reserved1 : 1,
+            default_ops_size: 1,
+            g         : 1,
+            null_bit  : 1,
+            reserved2 : 15;
+    } fields;
+    unsigned int bytes;
+};
+
+/*
+ * World switch state
+ */
+struct vmx_assist_context {
+    uint32_t  eip;        /* execution pointer */
+    uint32_t  esp;        /* stack pointer */
+    uint32_t  eflags;     /* flags register */
+    uint32_t  cr0;
+    uint32_t  cr3;        /* page table directory */
+    uint32_t  cr4;
+    uint32_t  idtr_limit; /* idt */
+    uint32_t  idtr_base;
+    uint32_t  gdtr_limit; /* gdt */
+    uint32_t  gdtr_base;
+    uint32_t  cs_sel;     /* cs selector */
+    uint32_t  cs_limit;
+    uint32_t  cs_base;
+    union vmcs_arbytes cs_arbytes;
+    uint32_t  ds_sel;     /* ds selector */
+    uint32_t  ds_limit;
+    uint32_t  ds_base;
+    union vmcs_arbytes ds_arbytes;
+    uint32_t  es_sel;     /* es selector */
+    uint32_t  es_limit;
+    uint32_t  es_base;
+    union vmcs_arbytes es_arbytes;
+    uint32_t  ss_sel;     /* ss selector */
+    uint32_t  ss_limit;
+    uint32_t  ss_base;
+    union vmcs_arbytes ss_arbytes;
+    uint32_t  fs_sel;     /* fs selector */
+    uint32_t  fs_limit;
+    uint32_t  fs_base;
+    union vmcs_arbytes fs_arbytes;
+    uint32_t  gs_sel;     /* gs selector */
+    uint32_t  gs_limit;
+    uint32_t  gs_base;
+    union vmcs_arbytes gs_arbytes;
+    uint32_t  tr_sel;     /* task selector */
+    uint32_t  tr_limit;
+    uint32_t  tr_base;
+    union vmcs_arbytes tr_arbytes;
+    uint32_t  ldtr_sel;   /* ldtr selector */
+    uint32_t  ldtr_limit;
+    uint32_t  ldtr_base;
+    union vmcs_arbytes ldtr_arbytes;
+
+    unsigned char rm_irqbase[2];
+};
+typedef struct vmx_assist_context vmx_assist_context_t;
+
+int v3_vmxassist_ctx_switch(struct guest_info * info);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _VMX_ASSIST_H_ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
 
--- /dev/null
+
+#include <palacios/vm_guest.h>
+#include <palacios/vmm_ctrl_regs.h>
+
+int v3_vmx_handle_cr0_write(struct guest_info * info, v3_reg_t new_val);
 
     uint32_t access_size : 3; // (0: 1 Byte ;; 1: 2 Bytes ;; 3: 4 Bytes)
     uint32_t dir        : 1; // (0: Out ;; 1: In)
     uint32_t string     : 1; // (0: not string ;; 1: string)
-    uint32_t REP        : 1; // (0: not REP ;; 1: REP)
+    uint32_t rep        : 1; // (0: not REP ;; 1: REP)
     uint32_t op_enc      : 1; // (0: DX ;; 1: immediate)
     uint32_t rsvd       : 9; // Set to 0
     uint32_t port       : 16; // IO Port Number
 
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+#ifndef __VMX_IO_H__
+#define __VMX_IO_H__
+
+#ifdef __V3VEE__
 
 #include <palacios/vm_guest.h>
 
 int v3_handle_vmx_io_out(struct guest_info * info);
 int v3_handle_vmx_io_outs(struct guest_info * info);
 
+
+#endif
+#endif
 
 
 #ifdef __V3VEE__
 
+#include <palacios/vmcs.h>
 
 #define VMX_SUCCESS         0 
 #define VMX_FAIL_INVALID    1
     __asm__ __volatile__ (  
                 VMREAD_OPCODE
                 EAX_ECX_MODRM
-                "seteb %0;" // fail valid
-                "setnaeb %1;" // fail invalid
-                : "=q"(ret_valid), "=q"(ret_invalid), "=c"(val) // Use ECX
-                : "a" (vmcs_field), "0"(ret_valid), "1"(ret_invalid)
+                "seteb %1;" // fail valid
+                "setnaeb %2;" // fail invalid
+                :  "=&c"(val), "=q"(ret_valid), "=q"(ret_invalid) // Use ECX
+                : "a" (vmcs_field), "1"(ret_valid), "2"(ret_invalid)
                 : "memory"
                 );
 
 
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2009, Andy Gocke <agocke@gmail.com>
+ * Copyright (c) 2009, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Andy Gocke <agocke@gmail.com>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __VMX_MSR_H__
+#define __VMX_MSR_H__
+
+#ifdef __V3VEE__
 
 #include <palacios/vm_guest.h>
 
 int v3_init_vmx_msr_map(struct guest_info * info);
+
+#endif
+#endif
 
     struct ide_channel * channel = get_selected_channel(ide, port);
     struct ide_drive * drive = get_selected_drive(channel);
 
-    //    PrintDebug("IDE: Reading Data Port %x (len=%d)\n", port, length);
+       PrintDebug("IDE: Reading Data Port %x (len=%d)\n", port, length);
 
     if ((channel->cmd_reg == 0xec) ||
        (channel->cmd_reg == 0xa1)) {
 
 
     cd = (struct cd_state *)V3_Malloc(sizeof(struct cd_state));
 
-    PrintDebug("Registering Ram CD at %p (size=%d)\n", (void *)ramdisk, size);
+    PrintDebug("Registering Ram CD at %p (size=%d)\n", (void *)cfg->ramdisk, cfg->size);
 
   
     cd->disk_image = cfg->ramdisk;
 
 
     hd = (struct hd_state *)V3_Malloc(sizeof(struct hd_state));
 
-    PrintDebug("Registering Ram HDD at %p (size=%d)\n", (void *)ramdisk, size);
+    PrintDebug("Registering Ram HDD at %p (size=%d)\n", (void *)cfg->ramdisk, cfg->size);
 
     hd->disk_image = cfg->ramdisk;
     hd->capacity = cfg->size;
 
                        vmx_io.o \
                        vmx_lowlevel.o \
                        vmx_msr.o \
-                       vmcs.o
+                       vmcs.o \
+                       vmx_ctrl_regs.o \
+                       vmx_assist.o
 
 
 
 
 
 
 #include <palacios/vmx.h>
-#include <palacios/vmcs.h>
 #include <palacios/vmm.h>
 #include <palacios/vmx_lowlevel.h>
 #include <palacios/vmm_lowlevel.h>
     return (addr_t)V3_PAddr((void *)vmcs_page);
 }
 
-#if 0
-
-#endif
-
-#if 0
-static int init_vmcs_bios(struct guest_info * vm_info) 
-{
-#if 0
-
-    setup_v8086_mode_for_boot(vm_info);
-
-
-    // Setup guest state 
-    // TODO: This is not 32-bit safe!
-    vmx_ret |= check_vmcs_write(VMCS_GUEST_RIP, vm_info->rip);
-    vmx_ret |= check_vmcs_write(VMCS_GUEST_RSP, vm_info->vm_regs.rsp);
-    
-
-    vmx_ret |= check_vmcs_write(VMCS_GUEST_CR0, vm_info->ctrl_regs.cr0);
-    vmx_ret |= check_vmcs_write(VMCS_GUEST_CR4, vm_info->ctrl_regs.cr4);
-
-    vmx_ret |= vmcs_write_guest_segments(vm_info);
-
-    vmx_ret |= check_vmcs_write(VMCS_GUEST_RFLAGS, vm_info->ctrl_regs.rflags);
-#define DEBUGCTL_MSR 0x1d9
-
-    v3_get_msr(DEBUGCTL_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
-    vmx_ret |= check_vmcs_write(VMCS_GUEST_DBG_CTL, tmp_msr.value);
-
-    vmx_ret |= check_vmcs_write(VMCS_GUEST_DR7, 0x400);
-
-    vmx_ret |= check_vmcs_write(VMCS_LINK_PTR, 0xffffffffffffffff);
-
-    if (vmx_ret != 0) {
-       PrintError("Could not initialize VMCS segments\n");
-        return -1;
-    }
-
-#endif
-    return 0;
-}
-#endif
 
 static int init_vmx_guest(struct guest_info * info, struct v3_vm_config * config_ptr) {
     v3_pre_config_guest(info, config_ptr);
   
 
     /********** Setup and VMX Control Fields from MSR ***********/
+    /* Setup IO map */
+    (void) v3_init_vmx_io_map(info);
+    (void) v3_init_vmx_msr_map(info);
+
     struct v3_msr tmp_msr;
 
     v3_get_msr(VMX_PINBASED_CTLS_MSR,&(tmp_msr.hi),&(tmp_msr.lo));
     vmx_data->pinbased_ctrls =  tmp_msr.lo | NMI_EXIT;
 
     v3_get_msr(VMX_PROCBASED_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
-    vmx_data->pri_procbased_ctrls = tmp_msr.lo;
+
+    PrintDebug("MSR High: 0x%x\n", tmp_msr.hi);
+    vmx_data->pri_procbased_ctrls = tmp_msr.lo | USE_IO_BITMAPS ;
+
+    vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->io_map.arch_data));
+    vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
+            (addr_t)V3_PAddr(info->io_map.arch_data) + PAGE_SIZE_4KB); 
+
+    vmx_ret |= check_vmcs_write(VMCS_MSR_BITMAP, (addr_t)V3_PAddr(info->msr_map.arch_data));
 
     v3_get_msr(VMX_EXIT_CTLS_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
     vmx_data->exit_ctrls = tmp_msr.lo ;
 
     struct vmx_exception_bitmap excp_bmap;
     excp_bmap.value = 0xffffffff;
+    excp_bmap.gp = 0;
     vmx_ret |= check_vmcs_write(VMCS_EXCP_BITMAP, excp_bmap.value);
 
 
 
         // vmx_data->pinbased_ctrls |= NMI_EXIT;
 
-        /* Add unconditional I/O and CR exits */
-        vmx_data->pri_procbased_ctrls |= UNCOND_IO_EXIT  
-                                        | CR3_LOAD_EXIT  
-                                        | CR3_STORE_EXIT;
+        /* Add CR exits */
+        vmx_data->pri_procbased_ctrls |= CR3_LOAD_EXIT  
+                                      | CR3_STORE_EXIT;
  
         vmx_data->exit_ctrls |= HOST_ADDR_SPACE_SIZE;
     }
     info->segments.ldtr.present = 1;
     info->segments.ldtr.granularity = 0;
     
-    /* Setup IO map */
-    (void) v3_init_vmx_io_map(info);
-    (void) v3_init_vmx_msr_map(info);
-
+    
     /************* Map in GDT and vmxassist *************/
 
     uint64_t  gdt[] __attribute__ ((aligned(32))) = {
 
     v3_print_vmcs();
 
-    vmx_data->state = VMXASSIST_STARTUP;
+    vmx_data->state = VMXASSIST_DISABLED;
 
     v3_post_config_guest(info, config_ptr);
 
 
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Andy Gocke <agocke@gmail.com>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Andy Gocke <agocke@gmail.com>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmx_assist.h>
+#include <palacios/vmx_lowlevel.h>
+#include <palacios/vm_guest_mem.h>
+#include <palacios/vmx.h>
+
+static int vmx_save_world_ctx(struct guest_info * info, struct vmx_assist_context * ctx);
+static int vmx_restore_world_ctx(struct guest_info * info, struct vmx_assist_context * ctx);
+
+int v3_vmxassist_ctx_switch(struct guest_info * info) {
+    uint32_t vmx_magic = 0; // Magic number to check for vmxassist
+    struct vmx_assist_context * old_ctx = NULL;
+    struct vmx_assist_context * new_ctx = NULL;        
+    uint32_t old_ctx_gpa = 0;
+    uint32_t new_ctx_gpa = 0;
+    vmx_state_t state = ((struct vmx_data *)info->vmm_data)->state;
+
+    /* Check validity of VMXASSIST_MAGIC field */
+    if (read_guest_pa_memory(info, VMXASSIST_MAGIC_OFFSET, sizeof(vmx_magic), (uint8_t *)&vmx_magic) != sizeof(vmx_magic)) {
+       PrintError("Could not read guest VMXASSIST magic field\n");
+       return -1;
+    }
+
+    if (vmx_magic != VMXASSIST_MAGIC) {
+       PrintError("VMXASSIT_MAGIC field is invalid\n");
+        return -1;
+    }
+
+
+    /* Retrieve the pointer to the Old Context struct */
+    if (read_guest_pa_memory(info, VMXASSIST_OLD_CONTEXT, sizeof(old_ctx_gpa), (uint8_t *)&old_ctx_gpa) != sizeof(old_ctx_gpa)) {
+       PrintError("Could not read Old Context pointer field\n");
+       return -1;
+    }
+    
+    guest_pa_to_host_va(info, (addr_t)old_ctx_gpa, (addr_t *)&(old_ctx));
+    
+
+    /* Retrieve the pointer to the New Context struct */
+    if (read_guest_pa_memory(info, VMXASSIST_NEW_CONTEXT, sizeof(new_ctx_gpa), (uint8_t *)&new_ctx_gpa) != sizeof(new_ctx_gpa)) {
+       PrintError("Could not read New Context pointer field\n");
+       return -1;
+    }
+    
+    guest_pa_to_host_va(info, (addr_t)new_ctx_gpa, (addr_t *)&(new_ctx));
+    
+
+
+    if (state == VMXASSIST_DISABLED) {
+
+       /* Save the old Context */
+        if (vmx_save_world_ctx(info, old_ctx) != 0) {
+           PrintError("Could not save VMXASSIST world context\n");
+            return -1;
+       }
+
+        /* restore new context, vmxassist should launch the bios the first time */
+        if (vmx_restore_world_ctx(info, new_ctx) != 0) {
+           PrintError("VMXASSIST could not restore new context\n");
+            return -1;
+       }
+
+    } else if (state == VMXASSIST_ENABLED) {
+        /* restore old context */
+        if (vmx_restore_world_ctx(info, old_ctx) != 0) {
+           PrintError("VMXASSIST could not restore old context\n");
+            return -1;
+       }
+    }
+
+    return 0;
+}
+
+        
+int vmx_save_world_ctx(struct guest_info * info, struct vmx_assist_context * ctx) {
+    int error = 0;
+
+    PrintDebug("Writing from RIP: 0x%p\n", (void *)info->rip);
+
+    error |= vmcs_read(VMCS_GUEST_RIP, &(ctx->eip));
+    error |= vmcs_read(VMCS_GUEST_RSP, &(ctx->esp));
+    error |= vmcs_read(VMCS_GUEST_RFLAGS, &(ctx->eflags));
+
+    error |= vmcs_read(VMCS_CR0_READ_SHDW, &(ctx->cr0));
+    ctx->cr3 = info->shdw_pg_state.guest_cr3;
+    error |= vmcs_read(VMCS_CR4_READ_SHDW, &(ctx->cr4));
+
+    error |= vmcs_read(VMCS_GUEST_IDTR_LIMIT, &(ctx->idtr_limit));
+    error |= vmcs_read(VMCS_GUEST_IDTR_BASE, &(ctx->idtr_base));
+
+    error |= vmcs_read(VMCS_GUEST_GDTR_LIMIT, &(ctx->gdtr_limit));
+    error |= vmcs_read(VMCS_GUEST_GDTR_BASE, &(ctx->gdtr_base));
+
+    error |= vmcs_read(VMCS_GUEST_CS_SELECTOR, &(ctx->cs_sel));
+    error |= vmcs_read(VMCS_GUEST_CS_LIMIT, &(ctx->cs_limit));
+    error |= vmcs_read(VMCS_GUEST_CS_BASE, &(ctx->cs_base));
+    error |= vmcs_read(VMCS_GUEST_CS_ACCESS, &(ctx->cs_arbytes.bytes));
+
+    error |= vmcs_read(VMCS_GUEST_DS_SELECTOR, &(ctx->ds_sel));
+    error |= vmcs_read(VMCS_GUEST_DS_LIMIT, &(ctx->ds_limit));
+    error |= vmcs_read(VMCS_GUEST_DS_BASE, &(ctx->ds_base));
+    error |= vmcs_read(VMCS_GUEST_DS_ACCESS, &(ctx->ds_arbytes.bytes));
+
+    error |= vmcs_read(VMCS_GUEST_ES_SELECTOR, &(ctx->es_sel));
+    error |= vmcs_read(VMCS_GUEST_ES_LIMIT, &(ctx->es_limit));
+    error |= vmcs_read(VMCS_GUEST_ES_BASE, &(ctx->es_base));
+    error |= vmcs_read(VMCS_GUEST_ES_ACCESS, &(ctx->es_arbytes.bytes));
+
+    error |= vmcs_read(VMCS_GUEST_SS_SELECTOR, &(ctx->ss_sel));
+    error |= vmcs_read(VMCS_GUEST_SS_LIMIT, &(ctx->ss_limit));
+    error |= vmcs_read(VMCS_GUEST_SS_BASE, &(ctx->ss_base));
+    error |= vmcs_read(VMCS_GUEST_SS_ACCESS, &(ctx->ss_arbytes.bytes));
+
+    error |= vmcs_read(VMCS_GUEST_FS_SELECTOR, &(ctx->fs_sel));
+    error |= vmcs_read(VMCS_GUEST_FS_LIMIT, &(ctx->fs_limit));
+    error |= vmcs_read(VMCS_GUEST_FS_BASE, &(ctx->fs_base));
+    error |= vmcs_read(VMCS_GUEST_FS_ACCESS, &(ctx->fs_arbytes.bytes));
+
+    error |= vmcs_read(VMCS_GUEST_GS_SELECTOR, &(ctx->gs_sel));
+    error |= vmcs_read(VMCS_GUEST_GS_LIMIT, &(ctx->gs_limit));
+    error |= vmcs_read(VMCS_GUEST_GS_BASE, &(ctx->gs_base));
+    error |= vmcs_read(VMCS_GUEST_GS_ACCESS, &(ctx->gs_arbytes.bytes));
+
+    error |= vmcs_read(VMCS_GUEST_TR_SELECTOR, &(ctx->tr_sel));
+    error |= vmcs_read(VMCS_GUEST_TR_LIMIT, &(ctx->tr_limit));
+    error |= vmcs_read(VMCS_GUEST_TR_BASE, &(ctx->tr_base));
+    error |= vmcs_read(VMCS_GUEST_TR_ACCESS, &(ctx->tr_arbytes.bytes));
+
+    error |= vmcs_read(VMCS_GUEST_LDTR_SELECTOR, &(ctx->ldtr_sel));
+    error |= vmcs_read(VMCS_GUEST_LDTR_LIMIT, &(ctx->ldtr_limit));
+    error |= vmcs_read(VMCS_GUEST_LDTR_BASE, &(ctx->ldtr_base));
+    error |= vmcs_read(VMCS_GUEST_LDTR_ACCESS, &(ctx->ldtr_arbytes.bytes));
+
+    return error;
+}
+
+int vmx_restore_world_ctx(struct guest_info * info, struct vmx_assist_context * ctx) {
+    int error = 0;
+
+    PrintDebug("ctx rip: %p\n", (void *)(addr_t)ctx->eip);
+
+    error |= vmcs_write(VMCS_GUEST_RIP, ctx->eip);
+    error |= vmcs_write(VMCS_GUEST_RSP, ctx->esp);
+    error |= vmcs_write(VMCS_GUEST_RFLAGS, ctx->eflags);
+
+    error |= vmcs_write(VMCS_CR0_READ_SHDW, ctx->cr0);
+    info->shdw_pg_state.guest_cr3 = ctx->cr3;
+    error |= vmcs_write(VMCS_CR4_READ_SHDW, ctx->cr4);
+
+    error |= vmcs_write(VMCS_GUEST_IDTR_LIMIT, ctx->idtr_limit);
+    error |= vmcs_write(VMCS_GUEST_IDTR_BASE, ctx->idtr_base);
+
+    error |= vmcs_write(VMCS_GUEST_GDTR_LIMIT, ctx->gdtr_limit);
+    error |= vmcs_write(VMCS_GUEST_GDTR_BASE, ctx->gdtr_base);
+
+    error |= vmcs_write(VMCS_GUEST_CS_SELECTOR, ctx->cs_sel);
+    error |= vmcs_write(VMCS_GUEST_CS_LIMIT, ctx->cs_limit);
+    error |= vmcs_write(VMCS_GUEST_CS_BASE, ctx->cs_base);
+    error |= vmcs_write(VMCS_GUEST_CS_ACCESS, ctx->cs_arbytes.bytes);
+
+    error |= vmcs_write(VMCS_GUEST_DS_SELECTOR, ctx->ds_sel);
+    error |= vmcs_write(VMCS_GUEST_DS_LIMIT, ctx->ds_limit);
+    error |= vmcs_write(VMCS_GUEST_DS_BASE, ctx->ds_base);
+    error |= vmcs_write(VMCS_GUEST_DS_ACCESS, ctx->ds_arbytes.bytes);
+
+    error |= vmcs_write(VMCS_GUEST_ES_SELECTOR, ctx->es_sel);
+    error |= vmcs_write(VMCS_GUEST_ES_LIMIT, ctx->es_limit);
+    error |= vmcs_write(VMCS_GUEST_ES_BASE, ctx->es_base);
+    error |= vmcs_write(VMCS_GUEST_ES_ACCESS, ctx->es_arbytes.bytes);
+
+    error |= vmcs_write(VMCS_GUEST_SS_SELECTOR, ctx->ss_sel);
+    error |= vmcs_write(VMCS_GUEST_SS_LIMIT, ctx->ss_limit);
+    error |= vmcs_write(VMCS_GUEST_SS_BASE, ctx->ss_base);
+    error |= vmcs_write(VMCS_GUEST_SS_ACCESS, ctx->ss_arbytes.bytes);
+
+    error |= vmcs_write(VMCS_GUEST_FS_SELECTOR, ctx->fs_sel);
+    error |= vmcs_write(VMCS_GUEST_FS_LIMIT, ctx->fs_limit);
+    error |= vmcs_write(VMCS_GUEST_FS_BASE, ctx->fs_base);
+    error |= vmcs_write(VMCS_GUEST_FS_ACCESS, ctx->fs_arbytes.bytes);
+
+    error |= vmcs_write(VMCS_GUEST_GS_SELECTOR, ctx->gs_sel);
+    error |= vmcs_write(VMCS_GUEST_GS_LIMIT, ctx->gs_limit);
+    error |= vmcs_write(VMCS_GUEST_GS_BASE, ctx->gs_base);
+    error |= vmcs_write(VMCS_GUEST_GS_ACCESS, ctx->gs_arbytes.bytes);
+
+    error |= vmcs_write(VMCS_GUEST_TR_SELECTOR, ctx->tr_sel);
+    error |= vmcs_write(VMCS_GUEST_TR_LIMIT, ctx->tr_limit);
+    error |= vmcs_write(VMCS_GUEST_TR_BASE, ctx->tr_base);
+    error |= vmcs_write(VMCS_GUEST_TR_ACCESS, ctx->tr_arbytes.bytes);
+
+    error |= vmcs_write(VMCS_GUEST_LDTR_SELECTOR, ctx->ldtr_sel);
+    error |= vmcs_write(VMCS_GUEST_LDTR_LIMIT, ctx->ldtr_limit);
+    error |= vmcs_write(VMCS_GUEST_LDTR_BASE, ctx->ldtr_base);
+    error |= vmcs_write(VMCS_GUEST_LDTR_ACCESS, ctx->ldtr_arbytes.bytes);
+
+    return error;
+}
+
+
 
--- /dev/null
+
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Andy Gocke <agocke@gmail.com>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Andy Gocke <agocke@gmail.com>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmx_ctrl_regs.h>
+#include <palacios/vmm.h>
+#include <palacios/vmx_lowlevel.h>
+#include <palacios/vmx.h>
+#include <palacios/vmx_assist.h>
+#include <palacios/vm_guest_mem.h>
+
+static int handle_mov_to_cr0(struct guest_info * info, v3_reg_t new_val);
+
+int v3_vmx_handle_cr0_write(struct guest_info * info, v3_reg_t new_val) {
+    return handle_mov_to_cr0(info, new_val);
+}
+
+static int handle_mov_to_cr0(struct guest_info * info, v3_reg_t new_val) {
+    PrintDebug("CR0 RIP: %p\n", (void *)info->rip);
+
+    struct cr0_32 * guest_cr0 = (struct cr0_32 *)&(info->ctrl_regs.cr0);
+    struct cr0_32 * new_cr0 = (struct cr0_32 *)&new_val;
+    struct cr0_32 * shadow_cr0 = (struct cr0_32 *)&(info->shdw_pg_state.guest_cr0);
+
+    // PG and PE are always enabled for VMX
+
+    // Check if this is a paging transition
+    PrintDebug("Old CR0: 0x%x\n", *(uint32_t *)guest_cr0);
+    PrintDebug("Old shadow CR0: 0x%x\n", *(uint32_t *)shadow_cr0);
+    PrintDebug("New CR0: 0x%x\n", *(uint32_t *)new_cr0);
+            
+    if ( new_cr0->pe ) {
+
+        if (v3_vmxassist_ctx_switch(info) != 0) {
+            PrintError("Unable to execute VMXASSIST context switch!\n");
+            return -1;
+        }
+
+        ((struct vmx_data *)info->vmm_data)->state = VMXASSIST_DISABLED;
+
+        PrintDebug("New Shadow: 0x%x\n", *(uint32_t *)shadow_cr0);
+        PrintDebug("mem_mode: %s\n", v3_mem_mode_to_str(v3_get_vm_mem_mode(info))); 
+
+        return 0;
+    }
+
+    return -1;
+}
+
 
 #include <palacios/vmx_io.h>
 #include <palacios/vmx.h>
 #include <palacios/vmm_ctrl_regs.h>
+#include <palacios/vmm_lowlevel.h>
+#include <palacios/vmx_ctrl_regs.h>
+#include <palacios/vmx_assist.h>
 
 
 static int inline check_vmcs_write(vmcs_field_t field, addr_t val)
 {
     int ret = 0;
-    ret = vmcs_write(field,val);
+    ret = vmcs_write(field, val);
 
     if (ret != VMX_SUCCESS) {
         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
-        return 1;
     }
 
-    return 0;
+    return ret;
 }
 
 static int inline check_vmcs_read(vmcs_field_t field, void * val)
 {
     int ret = 0;
-    ret = vmcs_read(field,val);
+    ret = vmcs_read(field, val);
 
-    if(ret != VMX_SUCCESS) {
+    if (ret != VMX_SUCCESS) {
         PrintError("VMREAD error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
-        return ret;
     }
 
-    return 0;
+    return ret;
 }
 
 static void inline translate_access_to_v3_seg(struct vmcs_segment_access * access, 
-        struct v3_segment * v3_seg)
-{
+                                             struct v3_segment * v3_seg) {
     v3_seg->type = access->type;
     v3_seg->system = access->desc_type;
     v3_seg->dpl = access->dpl;
     v3_seg->granularity = access->granularity;
 }
 
-static void load_vmcs_guest_state(struct guest_info * info)
+static int load_vmcs_guest_state(struct guest_info * info)
 {
-    check_vmcs_read(VMCS_GUEST_RIP, &(info->rip));
-    check_vmcs_read(VMCS_GUEST_RSP, &(info->vm_regs.rsp));
-    check_vmcs_read(VMCS_GUEST_CR0, &(info->ctrl_regs.cr0));
-    check_vmcs_read(VMCS_GUEST_CR3, &(info->ctrl_regs.cr3));
-    check_vmcs_read(VMCS_GUEST_CR4, &(info->ctrl_regs.cr4));
 
     struct vmcs_segment_access access;
+    int ret = 0;
+
+    // JRL: Add error checking
 
     memset(&access, 0, sizeof(access));
 
     /* IDTR Segment */
     check_vmcs_read(VMCS_GUEST_IDTR_BASE, &(info->segments.idtr.base));
     check_vmcs_read(VMCS_GUEST_IDTR_LIMIT, &(info->segments.idtr.limit));
+
+
+    /* 
+     *  Read the control state
+     */
+    check_vmcs_read(VMCS_GUEST_RIP, &(info->rip));
+    check_vmcs_read(VMCS_GUEST_RSP, &(info->vm_regs.rsp));
+    check_vmcs_read(VMCS_GUEST_CR0, &(info->ctrl_regs.cr0));
+    check_vmcs_read(VMCS_CR0_READ_SHDW, &(info->shdw_pg_state.guest_cr0));
+    check_vmcs_read(VMCS_GUEST_CR3, &(info->ctrl_regs.cr3));
+    check_vmcs_read(VMCS_GUEST_CR4, &(info->ctrl_regs.cr4));
+
+    return ret;
 }
 
 
+#if 0
 static void setup_v8086_mode_for_boot(struct guest_info * info)
 {
 
     flags->iopl = 3;
 
     info->rip = 0xfff0;
-    //info->vm_regs.rsp = 0x0;
    
     /* Zero the segment registers */
     memset(&(info->segments), 0, sizeof(struct v3_segment)*6);
         seg_ptr[i].granularity = 0;
     }
 
-    PrintDebug("END INFO!\n");
-#if 0
-    for(i = 6; i < 10; i++) {
-        seg_ptr[i].base = 0x0;
-        seg_ptr[i].limit = 0xffff;
-    }
-
-    info->segments.ldtr.type = 2;
-    info->segments.ldtr.system = 0;
-    info->segments.ldtr.present = 1;
-    info->segments.ldtr.granularity = 0;
-
-    info->segments.tr.type = 3;
-    info->segments.tr.system = 0;
-    info->segments.tr.present = 1;
-    info->segments.tr.granularity = 0;
-#endif
 }
 
-static int inline handle_cr_access(struct guest_info * info, ulong_t exit_qual)
-{
+#endif
+    
+static int inline handle_cr_access(struct guest_info * info, ulong_t exit_qual) {
     struct vmexit_cr_qual * cr_qual = (struct vmexit_cr_qual *)&exit_qual;
 
-    if(cr_qual->access_type < 2) {
-        ulong_t reg = 0;
-        switch(cr_qual->gpr) {
+    PrintDebug("Control register: %d\n", cr_qual->access_type);
+
+    if (cr_qual->access_type < 2) {
+        v3_reg_t reg = 0;
+       
+       switch(cr_qual->gpr) {
             case 0:
                 reg = info->vm_regs.rax;
                 break;
                 reg = info->vm_regs.r15;
                 break;
         }
-        PrintDebug("RAX: %p\n", (void *)info->vm_regs.rax);
-
-        if(cr_qual->cr_id == 0
-                && (~reg & CR0_PE)
-                && ((struct vmx_data*)info->vmm_data)->state == VMXASSIST_STARTUP) {
-            setup_v8086_mode_for_boot(info);
-            info->shdw_pg_state.guest_cr0 = 0x0;
-            v3_update_vmcs_guest_state(info);
+
+        if (cr_qual->cr_id == 0) {
+            uint32_t instr_len;
+
+            vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
+
+            if ( ~reg & CR0_PE ) {
+
+                if (v3_vmxassist_ctx_switch(info) != 0) {
+                    PrintError("Unable to execute VMXASSIST context switch!\n");
+                    return -1;
+                }
+
+                load_vmcs_guest_state(info);
+
+                ((struct vmx_data *)info->vmm_data)->state = VMXASSIST_ENABLED;
+
+                PrintDebug("Loading vmxassist at RIP: 0x%p\n", (void *)info->rip);
+                return 0;
+            } else if (v3_vmx_handle_cr0_write(info, reg) != 0) {
+               PrintError("Could not handle CR0 Write\n");
+                return -1;
+            }
+
+            load_vmcs_guest_state(info);
+
+            PrintDebug("Leaving VMXASSIST and entering protected mode at RIP: 0x%p\n", (void *)info->rip);
+
             return 0;
         }
     }
+
     PrintError("Unhandled CR access\n");
     return -1;
 }
 
 
-int v3_handle_vmx_exit(struct v3_gprs * gprs, struct guest_info * info)
-{
+/* At this point the GPRs are already copied into the guest_info state */
+int v3_handle_vmx_exit(struct v3_gprs * gprs, struct guest_info * info) {
     uint32_t exit_reason;
     ulong_t exit_qual;
 
     check_vmcs_read(VMCS_EXIT_REASON, &exit_reason);
     check_vmcs_read(VMCS_EXIT_QUAL, &exit_qual);
 
-    PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_reason, exit_qual);
+    // PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_reason, exit_qual);
 
     /* Update guest state */
     load_vmcs_guest_state(info);
   
-    switch(exit_reason)
-    {
-        case VMEXIT_INFO_EXCEPTION_OR_NMI:
-        {
-            uint32_t int_info;
-            pf_error_t error_code;
-            check_vmcs_read(VMCS_EXIT_INT_INFO, &int_info);
-            check_vmcs_read(VMCS_EXIT_INT_ERR, &error_code);
-
-            if((uint8_t)int_info == 0x0e) {
-                PrintDebug("Page Fault at %p\n", (void*)exit_qual);
-                if(info->shdw_pg_mode == SHADOW_PAGING) {
-                    if(v3_handle_shadow_pagefault(info, (addr_t)exit_qual, error_code) == -1) {
-                        return -1;
-                    }
-                } else {
-                    PrintError("Page fault in unimplemented paging mode\n");
-                    return -1;
-                }
-            } else {
-                PrintDebug("Unknown exception: 0x%x\n", (uint8_t)int_info);
-                v3_print_GPRs(info);
-                return -1;
-            }
-            break;
-        }
-
-        case VMEXIT_IO_INSTR: 
-        {
-            struct vmexit_io_qual * io_qual = (struct vmexit_io_qual *)&exit_qual;
-
-            if(io_qual->dir == 0) {
-                if(io_qual->string) {
-                    if(v3_handle_vmx_io_outs(info) == -1) {
-                        return -1;
-                    }
-                } else {
-                    if(v3_handle_vmx_io_out(info) == -1) {
-                        return -1;
-                    }
-                }
-            } else {
-                if(io_qual->string) {
-                    if(v3_handle_vmx_io_ins(info) == -1) {
-                        return -1;
-                    }
-                } else {
-                    if(v3_handle_vmx_io_in(info) == -1) {
-                        return -1;
-                    }
-                }
-            }
-            break;
-        }
-
+    switch (exit_reason) {
+        case VMEXIT_INFO_EXCEPTION_OR_NMI: {
+           uint32_t int_info;
+           pf_error_t error_code;
+
+           check_vmcs_read(VMCS_EXIT_INT_INFO, &int_info);
+           check_vmcs_read(VMCS_EXIT_INT_ERR, &error_code);
+           
+           // JRL: Change "0x0e" to a macro value
+           if ((uint8_t)int_info == 0x0e) {
+               PrintDebug("Page Fault at %p\n", (void *)exit_qual);
+               
+               if (info->shdw_pg_mode == SHADOW_PAGING) {
+                   if (v3_handle_shadow_pagefault(info, (addr_t)exit_qual, error_code) == -1) {
+                       PrintError("Error handling shadow page fault\n");
+                       return -1;
+                   }
+               } else {
+                   PrintError("Page fault in unimplemented paging mode\n");
+                   return -1;
+               }
+           } else {
+               PrintDebug("Unknown exception: 0x%x\n", (uint8_t)int_info);
+               v3_print_GPRs(info);
+               return -1;
+           }
+           break;
+       }
+           
+        case VMEXIT_CPUID: {
+           int instr_len;
+
+           v3_cpuid(info->vm_regs.rax, (addr_t *)&(info->vm_regs.rax), (addr_t *)&(info->vm_regs.rbx), 
+                    (addr_t *)&(info->vm_regs.rcx), (addr_t *)&(info->vm_regs.rdx));
+
+           check_vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
+
+           info->rip += instr_len;
+           break;
+       }
+           
+        case VMEXIT_IO_INSTR: {
+           struct vmexit_io_qual * io_qual = (struct vmexit_io_qual *)&exit_qual;
+           
+           if (io_qual->dir == 0) {
+               if (io_qual->string) {
+                   if (v3_handle_vmx_io_outs(info) == -1) {
+                       PrintError("Error in outs IO handler\n");
+                       return -1;
+                   }
+               } else {
+                   if (v3_handle_vmx_io_out(info) == -1) {
+                       PrintError("Error in out IO handler\n");
+                       return -1;
+                   }
+               }
+           } else {
+               if (io_qual->string) {
+                   if(v3_handle_vmx_io_ins(info) == -1) {
+                       PrintError("Error in ins IO handler\n");
+                       return -1;
+                   }
+               } else {
+                   if (v3_handle_vmx_io_in(info) == -1) {
+                       PrintError("Error in in IO handler\n");
+                       return -1;
+                   }
+               }
+           }
+           break;
+       }
+           
         case VMEXIT_CR_REG_ACCESSES:
-            if(handle_cr_access(info,exit_qual) != 0)
+            if (handle_cr_access(info,exit_qual) != 0) {
+               PrintError("Error handling CR access\n");
                 return -1;
+           }
+
             break;
 
         default:
-            PrintError("Unhandled VMEXIT\n");
+            PrintError("Unhandled VMEXIT: %u (0x%x), %lu (0x%lx)\n", exit_reason, exit_reason, exit_qual, exit_qual);
             return -1;
     }
 
 
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Andy Gocke <agocke@gmail.com>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Andy Gocke <agocke@gmail.com>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
 
 #include <palacios/vmx_io.h>
 #include <palacios/vmm_io.h>
 #include <palacios/vmx_lowlevel.h>
 #include <palacios/vmm.h>
 #include <palacios/vmx_handler.h>
+#include <palacios/vmm_ctrl_regs.h>
+#include <palacios/vm_guest_mem.h>
+#include <palacios/vmm_decoder.h>
+
+#ifndef CONFIG_DEBUG_IO
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
 
 /* Same as SVM */
 static int update_map(struct guest_info * info, uint16_t port, int hook_read, int hook_write)
     return 0;
 }
 
-int v3_handle_vmx_io_in(struct guest_info * info)
-{
+int v3_handle_vmx_io_in(struct guest_info * info) {
     ulong_t exit_qual;
+    uint32_t instr_length = 0;
 
     vmcs_read(VMCS_EXIT_QUAL, &exit_qual);
 
     struct vmexit_io_qual * io_qual = (struct vmexit_io_qual *)&exit_qual;
 
-    struct v3_io_hook * hook = v3_get_io_hook(info,io_qual->port);
+    struct v3_io_hook * hook = v3_get_io_hook(info, io_qual->port);
     int read_size = 0;
 
-    if(hook == NULL) {
+    if (hook == NULL) {
         PrintError("Hook not present for IN on port %x\n", io_qual->port);
         return -1;
     }
 
-    read_size = 1<<(io_qual->access_size);
+    read_size = io_qual->access_size + 1;
 
     PrintDebug("IN of %d bytes on port %d (0x%x)\n", read_size, io_qual->port, io_qual->port);
 
-    if(hook->read(io_qual->port, &(info->vm_regs.rax), read_size, hook->priv_data) != read_size) {
+    if (hook->read(io_qual->port, &(info->vm_regs.rax), read_size, hook->priv_data) != read_size) {
         PrintError("Read failure for IN on port %x\n", io_qual->port);
         return -1;
     }
 
-    uint32_t instr_length = 0;
 
-    if(vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_length) != VMX_SUCCESS) {
+
+    if (vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_length) != VMX_SUCCESS) {
         PrintError("Could not read instruction length\n");
         return -1;
     }
 
 int v3_handle_vmx_io_ins(struct guest_info * info)
 {
-    PrintDebug("INS not implemented\n");
-    return -1;
+    ulong_t exit_qual;
+
+    vmcs_read(VMCS_EXIT_QUAL, &exit_qual);
+
+    struct vmexit_io_qual * io_qual = (struct vmexit_io_qual *)&exit_qual;
+    struct v3_io_hook * hook = v3_get_io_hook(info, io_qual->port);
+    int read_size;
+    addr_t guest_va;
+    addr_t host_addr;
+    int rdi_change;
+    ulong_t rep_num = 1;
+
+    if(hook == NULL) {
+        PrintError("Hook not present for INS on port 0x%x\n", io_qual->port);
+        return -1;
+    }
+
+    PrintDebug("INS on port 0x%x\n", io_qual->port);
+
+    read_size = io_qual->access_size + 1;
+
+    if (io_qual->rep) {
+        rep_num = info->vm_regs.rcx & get_gpr_mask(info);
+    }
+    
+    if ( ((struct rflags *)&(info->ctrl_regs.rflags))->df ) {
+        rdi_change = -read_size;
+    } else {
+        rdi_change = read_size;
+    }
+
+    PrintDebug("INS size=%d for %ld steps\n", read_size, rep_num);
+
+    vmcs_read(VMCS_GUEST_LINEAR_ADDR, &guest_va);
+
+    if (guest_va_to_host_va(info, guest_va, &host_addr) == -1) {
+        PrintError("Could not convert Guest VA to host VA\n");
+        return -1;
+    }
+
+    do {
+        if (hook->read(io_qual->port, (char *)host_addr, read_size, hook->priv_data) != read_size) {
+            PrintError("Read Failure for INS on port 0x%x\n", io_qual->port);
+            return -1;
+        }
+
+        host_addr += rdi_change;
+        info->vm_regs.rdi += rdi_change;
+
+        if (io_qual->rep) {
+            --info->vm_regs.rcx;
+        }
+        --rep_num;
+
+    } while (rep_num > 0);
+
+    int instr_len = 0;
+
+    vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
+
+    info->rip += instr_len;
+
+    return 0;
 }
 
-int v3_handle_vmx_io_out(struct guest_info * info)
-{
+
+
+int v3_handle_vmx_io_out(struct guest_info * info) {
     ulong_t exit_qual;
 
     vmcs_read(VMCS_EXIT_QUAL, &exit_qual);
 
     struct v3_io_hook * hook = v3_get_io_hook(info, io_qual->port);
 
-    if(hook == NULL) {
+    if (hook == NULL) {
         PrintError("Hook not present for out on port %x\n", io_qual->port);
         return -1;
     }
 
-    int write_size = 1<<(io_qual->access_size);
+    int write_size = io_qual->access_size + 1;
     
     PrintDebug("OUT of %d bytes on port %d (0x%x)\n", write_size, io_qual->port, io_qual->port);
 
 
-    if(hook->write(io_qual->port, &(info->vm_regs.rax), write_size, hook->priv_data) != write_size) {
+    if (hook->write(io_qual->port, &(info->vm_regs.rax), write_size, hook->priv_data) != write_size) {
         PrintError("Write failure for out on port %x\n",io_qual->port);
         return -1;
     }
 
     uint32_t instr_length = 0;
 
-    if(vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_length) != VMX_SUCCESS) {
+    if (vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_length) != VMX_SUCCESS) {
         PrintError("Could not read instruction length\n");
         return -1;
     } 
     return 0;
 }
 
-int v3_handle_vmx_io_outs(struct guest_info * info)
-{
+
+
+int v3_handle_vmx_io_outs(struct guest_info * info) {
     ulong_t exit_qual;
 
     vmcs_read(VMCS_EXIT_QUAL, &exit_qual);
 
     struct vmexit_io_qual * io_qual = (struct vmexit_io_qual *)&exit_qual;
+    struct v3_io_hook * hook = v3_get_io_hook(info, io_qual->port);
+    int write_size;
+    addr_t guest_va;
+    addr_t host_addr;
+    int rsi_change;
+    ulong_t rep_num = 1;
+
+    if (hook == NULL) {
+        PrintError("Hook not present for OUTS on port 0x%x\n", io_qual->port);
+        return -1;
+    }
+
+    PrintDebug("OUTS on port 0x%x\n", io_qual->port);
+
+    write_size = io_qual->access_size + 1;
+
+    if (io_qual->rep) {
+        // Grab the address sized bits of rcx
+        rep_num = info->vm_regs.rcx & get_gpr_mask(info);
+    }
 
-    PrintDebug("OUTS on port %d, (0x%x)\n", io_qual->port, io_qual->port);
-    return -1;
+    if ( ((struct rflags *)&(info->ctrl_regs.rflags))->df ) {
+        rsi_change = -write_size;
+    } else {
+        rsi_change = write_size;
+    }
+
+    vmcs_read(VMCS_GUEST_LINEAR_ADDR, &guest_va);
+
+    PrintDebug("OUTS size=%d for %ld steps\n", write_size, rep_num);
+
+    if (guest_va_to_host_va(info, guest_va, &host_addr) == -1) {
+        PrintError("Could not convert guest VA to host VA\n");
+        return -1;
+    }
+
+    do {
+       if (hook->write(io_qual->port, (char *)host_addr, write_size, hook->priv_data) != write_size) {
+           PrintError("Read failure for INS on port 0x%x\n", io_qual->port);
+           return -1;
+       }
+
+       host_addr += rsi_change;
+       info->vm_regs.rsi += rsi_change;
+
+       if (io_qual->rep) {
+           --info->vm_regs.rcx;
+       }
+       --rep_num;
+
+    } while (rep_num > 0);
+
+    int instr_len = 0;
+
+    vmcs_read(VMCS_EXIT_INSTR_LEN, &instr_len);
+
+    info->rip += instr_len;
+
+    return 0;
 }
+
 
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Andy Gocke <agocke@gmail.com>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Andy Gocke <agocke@gmail.com>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
 
 #include <palacios/vmm.h>
 #include <palacios/vm_guest.h>
 
 /* Same as SVM */
-static int update_map(struct guest_info * info, uint_t msr, int hook_reads, int hook_writes)
-{
+static int update_map(struct guest_info * info, uint_t msr, int hook_reads, int hook_writes) {
 
 #if 0
     int index = get_bitmap_index(msr);
     return 0;
 }
 
-int v3_init_vmx_msr_map(struct guest_info * info)
-{
+int v3_init_vmx_msr_map(struct guest_info * info) {
    struct v3_msr_map * msr_map = &(info->msr_map);
 
    msr_map->update_map = update_map;