Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


HVM Enhancements + Bug Fixes
[palacios.git] / palacios / src / palacios / vmm_hvm.c
index 43a8672..ec1c42c 100644 (file)
 #include <palacios/vmm_debug.h>
 
 
+struct gdt_area {
+    struct {
+        uint16_t limit;
+        uint64_t base;
+    } __attribute__((packed)) gdtr;
+
+    uint64_t fsbase;
+    uint16_t cs;
+    uint16_t ds;
+    uint16_t es;
+    uint16_t fs;
+    uint16_t gs;
+    uint16_t ss;
+
+    uint64_t gdt[0];
+} __attribute__((packed));
+
+
 /*
 
   MEM     = Total size of memory in the GPA (in MB)
@@ -84,12 +102,52 @@ int v3_deinit_hvm()
 // ignore requests from when we are in the wrong state
 #define ENFORCE_STATE_MACHINE 1
 
-// invoke the HRT using a page fault instead of
-// the SWINTR mechanism
-#define USE_UPCALL_MAGIC_PF  1
+// invoke the HRT using one of the followng mechanisms
 #define UPCALL_MAGIC_ADDRESS 0x0000800df00df00dULL
 #define UPCALL_MAGIC_ERROR   0xf00df00d
 
+
+static int magic_upcall(struct guest_info *core, uint64_t num)
+{
+#ifdef V3_CONFIG_HVM_UPCALL_MAGIC_GPF
+    PrintDebug(core->vm_info, core, "hvm: injecting magic #GP into core %llu\n",num);
+    if (v3_raise_exception_with_error(&core->vm_info->cores[num],
+                                     GPF_EXCEPTION, 
+                                     UPCALL_MAGIC_ERROR)) { 
+       PrintError(core->vm_info, core,"hvm: cannot inject HRT #GP to core %llu\n",num);
+       return -1;
+    } else {
+       return 0;
+    }
+#endif
+
+#ifdef V3_CONFIG_HVM_UPCALL_MAGIC_PF
+    PrintDebug(core->vm_info,core,"hvm: injecting magic #GP into core %llu\n",num);
+    core->vm_info->cores[num].ctrl_regs.cr2 = UPCALL_MAGIC_ADDRESS;
+    if (v3_raise_exception_with_error(&core->vm_info->cores[num],
+                                     PF_EXCEPTION, 
+                                     UPCALL_MAGIC_ERROR)) { 
+       PrintError(core->vm_info,core, "hvm: cannot inject HRT #PF to core %llu\n",num);
+       return -1;
+    } else {
+       return 0;
+    }
+#endif
+#ifdef V3_CONFIG_HVM_UPCALL_MAGIC_SWIN
+    PrintDebug(core->vm_info,core,"hvm: injecting SW intr 0x%u into core %llu\n",core->vm_info->hvm_info.hrt_int_vector,num);
+    if (v3_raise_swintr(&core->vm_info->cores[cur],core->vm_info->hvm_info-->hrt_int_vector)) { 
+       PrintError(core->vm_info,core, "hvm: cannot inject HRT interrupt to core %llu\n",cur);
+       return -1;
+    } else {
+       return 0;
+    }
+#endif
+
+    PrintError(core->vm_info,core,"hvm: no upcall mechanism is enabled!\n");
+    return -1;
+}
+
+
 /*
   64 bit only hypercall:
 
@@ -104,12 +162,17 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
     uint64_t bitness = core->vm_regs.rbx;
     uint64_t a1 = core->vm_regs.rcx;
     uint64_t a2 = core->vm_regs.rdx;
+    uint64_t a3 = core->vm_regs.rsi;
     struct v3_vm_hvm *h = &core->vm_info->hvm_state;
+    addr_t irq_state;
 
+    // Let's be paranoid here
+    irq_state = v3_lock_irqsave(h->hypercall_lock);
 
     if (bitness!=0x6464646464646464) { 
        PrintError(core->vm_info,core,"hvm: unable to handle non-64 bit hypercall\n");
        core->vm_regs.rax = -1;
+       v3_unlock_irqrestore(h->hypercall_lock,irq_state);
        return 0;
     }
 
@@ -155,11 +218,86 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
            }
            break;
            
+       case 0x8: // replace HRT image
+           // a2 = gva of image
+           // a3 = size of image
+           PrintDebug(core->vm_info,core,"hvm: request replacement HRT image addr=0x%llx size=0x%llx\n",a2,a3);
+
+           if (h->hrt_image) { 
+               // delete old
+               V3_VFree(h->hrt_image);
+               h->hrt_image = 0;
+           }
+
+           h->hrt_image = V3_VMalloc(a3);
+
+           if (!(h->hrt_image)) {
+               PrintError(core->vm_info,core, "hvm: failed to allocate space for replacement image\n");
+               core->vm_regs.rax = -1;
+           } else {
+               if (v3_read_gva_memory(core, a2, a3, (uint8_t*) h->hrt_image)!=a3) { 
+                   PrintError(core->vm_info, core, "hvm: cannot read replacement image\n");
+                   core->vm_regs.rax = -1;
+               } else {
+                   h->hrt_image_size = a3; 
+                   core->vm_regs.rax = 0;
+               }
+           }
+
+           if (core->vm_regs.rax) { 
+               PrintError(core->vm_info,core,"hvm: Failed to replace HRT image\n");
+           } else {
+               PrintDebug(core->vm_info,core,"hvm: HRT image successfully replaced\n");
+           }
+
+           break;
+
+
        case 0xf: // get HRT state
            core->vm_regs.rax = h->trans_state;
+           if (v3_write_gva_memory(core, a2, sizeof(h->ros_event), (uint8_t*) &h->ros_event)!=sizeof(h->ros_event)) { 
+               PrintError(core->vm_info, core, "hvm: cannot write back ROS event state to %p - continuing\n",(void*)a2);
+           }
            //PrintDebug(core->vm_info,core,"hvm: get HRT transaction state 0x%llx\n",core->vm_regs.rax);
            break;
 
+       case 0x10:
+           PrintDebug(core->vm_info, core, "hvm: ROS event request\n");
+           if (h->ros_event.event_type!=ROS_NONE) { 
+               PrintError(core->vm_info, core, "hvm: ROS event is already in progress\n");
+               core->vm_regs.rax = -1;
+           } else {
+               if (v3_read_gva_memory(core, a2, sizeof(h->ros_event), (uint8_t*)&h->ros_event)!=sizeof(h->ros_event)) { 
+                   PrintError(core->vm_info, core, "hvm: cannot read ROS event from %p\n",(void*)a2);
+                   core->vm_regs.rax = -1;
+               } else {
+                   core->vm_regs.rax = 0;
+                   PrintDebug(core->vm_info, core, "hvm: copied new ROS event (type=%s)\n",
+                              h->ros_event.event_type == ROS_PAGE_FAULT ? "page fault" : 
+                              (h->ros_event.event_type == ROS_SYSCALL ? "syscall" : "none"));
+                   
+               }
+           }
+
+           break;
+
+       case 0x1e: // ack result (HRT has read the result of the finished event)
+           if (h->ros_event.event_type != ROS_DONE) {
+               PrintError(core->vm_info, core, "hvm: cannot ack event result when not in ROS_DONE state\n");
+               core->vm_regs.rax = -1;
+           } else {
+               h->ros_event.event_type=ROS_NONE;
+               PrintDebug(core->vm_info, core, "hvm: HRT core acks event result\n");
+               core->vm_regs.rax = 0;
+           }
+           break;
+
+       case 0x1f:
+           PrintDebug(core->vm_info, core, "hvm: completion of ROS event (rc=0x%llx)\n",a2);
+           h->ros_event.event_type=ROS_DONE;
+           h->ros_event.last_ros_event_result = a2;
+           break;
+
        case 0x20: // invoke function (ROS->HRT)
        case 0x21: // invoke parallel function (ROS->HRT)
            if (v3_is_hvm_hrt_core(core)) { 
@@ -189,28 +327,12 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
                    h->trans_count = last-first+1;
 
                    for (cur=first;cur<=last;cur++) { 
-
-#if USE_UPCALL_MAGIC_PF
-                       PrintDebug(core->vm_info,core,"hvm: injecting magic #PF into core %llu\n",cur);
-                       core->vm_info->cores[cur].ctrl_regs.cr2 = UPCALL_MAGIC_ADDRESS;
-                       if (v3_raise_exception_with_error(&core->vm_info->cores[cur],
-                                                         PF_EXCEPTION, 
-                                                         UPCALL_MAGIC_ERROR)) { 
-                           PrintError(core->vm_info,core, "hvm: cannot inject HRT #PF to core %llu\n",cur);
-                           core->vm_regs.rax = -1;
-                           break;
-                       }
-#else
-                       PrintDebug(core->vm_info,core,"hvm: injecting SW intr 0x%u into core %llu\n",h->hrt_int_vector,cur);
-                       if (v3_raise_swintr(&core->vm_info->cores[cur],h->hrt_int_vector)) { 
-                           PrintError(core->vm_info,core, "hvm: cannot inject HRT interrupt to core %llu\n",cur);
+                       if (magic_upcall(core,cur)) {
                            core->vm_regs.rax = -1;
                            break;
                        }
-#endif
                        // Force core to exit now
                        v3_interrupt_cpu(core->vm_info,core->vm_info->cores[cur].pcpu_id,0);
-                         
                    }
                    if (core->vm_regs.rax==0) { 
                        if (a1==0x20) { 
@@ -253,25 +375,11 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
                    h->trans_count = last-first+1;
 
                    for (cur=first;cur<=last;cur++) { 
-
-#if USE_UPCALL_MAGIC_PF
-                       PrintDebug(core->vm_info,core,"hvm: injecting magic #PF into core %llu\n",cur);
-                       core->vm_info->cores[cur].ctrl_regs.cr2 = UPCALL_MAGIC_ADDRESS;
-                       if (v3_raise_exception_with_error(&core->vm_info->cores[cur],
-                                                         PF_EXCEPTION, 
-                                                         UPCALL_MAGIC_ERROR)) { 
-                           PrintError(core->vm_info,core, "hvm: cannot inject HRT #PF to core %llu\n",cur);
-                           core->vm_regs.rax = -1;
-                           break;
-                       }
-#else
-                       PrintDebug(core->vm_info,core,"hvm: injecting SW intr 0x%u into core %llu\n",h->hrt_int_vector,cur);
-                       if (v3_raise_swintr(&core->vm_info->cores[cur],h->hrt_int_vector)) { 
-                           PrintError(core->vm_info,core, "hvm: cannot inject HRT interrupt to core %llu\n",cur);
+                       
+                       if (magic_upcall(core,cur)) { 
                            core->vm_regs.rax = -1;
                            break;
                        }
-#endif
                        // Force core to exit now
                        v3_interrupt_cpu(core->vm_info,core->vm_info->cores[cur].pcpu_id,0);
                          
@@ -328,7 +436,7 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
                core->vm_regs.rax=-1;
            } else {
                if (ENFORCE_STATE_MACHINE && h->trans_state!=HRT_IDLE) { 
-                   PrintError(core->vm_info,core,"hvm: request to %smerge address space in non-idle state\n",a1==0x30 ? "" : "un");
+                   PrintError(core->vm_info,core,"hvm: request to %smerge address space in non-idle state (%d)\n",a1==0x30 ? "" : "un", h->trans_state);
                    core->vm_regs.rax=-1;
                } else {
                    uint64_t *page = (uint64_t *) h->comm_page_hva;
@@ -340,27 +448,17 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
                    page[1] = core->ctrl_regs.cr3;  // this is a do-not-care for an unmerge
 
                    core->vm_regs.rax = 0;
-#if USE_UPCALL_MAGIC_PF
-                   PrintDebug(core->vm_info,core,"hvm: injecting magic #PF into core %u\n",h->first_hrt_core);
-                   core->vm_info->cores[h->first_hrt_core].ctrl_regs.cr2 = UPCALL_MAGIC_ADDRESS;
-                   if (v3_raise_exception_with_error(&core->vm_info->cores[h->first_hrt_core],
-                                                     PF_EXCEPTION,  
-                                                     UPCALL_MAGIC_ERROR)) { 
-                     PrintError(core->vm_info,core, "hvm: cannot inject HRT #PF to core %u\n",h->first_hrt_core);
-                     core->vm_regs.rax = -1;
-                     break;
-                   }
-#else
-                   PrintDebug(core->vm_info,core,"hvm: injecting SW intr 0x%u into core %u\n",h->hrt_int_vector,h->first_hrt_core);
-                   if (v3_raise_swintr(&core->vm_info->cores[h->first_hrt_core],h->hrt_int_vector)) { 
-                       PrintError(core->vm_info,core, "hvm: cannot inject HRT interrupt to core %u\n",h->first_hrt_core);
+
+                   h->trans_state = HRT_MERGE;
+
+                   if (magic_upcall(core,h->first_hrt_core)) {
                        core->vm_regs.rax = -1;
-                   } 
-#endif         
+                       break;
+                   }
+
                    // Force core to exit now
                    v3_interrupt_cpu(core->vm_info,core->vm_info->cores[h->first_hrt_core].pcpu_id,0);
 
-                   h->trans_state = HRT_MERGE;
                }
                
            }
@@ -384,6 +482,225 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
            }
                    
            break;
+           
+       case 0x40: // install or remove signal handler
+           if (v3_is_hvm_hrt_core(core)) { 
+               PrintError(core->vm_info,core, "hvm: HRT cannot install signal handler...\n");
+               core->vm_regs.rax=-1;
+           } else {
+               PrintDebug(core->vm_info,core,"hvm: install signal handler for CR3=%p, handler=%p, stack=%p\n",(void*)core->ctrl_regs.cr3, (void*)a2, (void*)a3);
+               if (h->ros_signal.code) { 
+                   PrintError(core->vm_info,core,"hvm: signal is pending...\n");
+                   core->vm_regs.rax=-1;
+               } else {
+                   if ((a2 || a3) && (h->ros_signal.handler || h->ros_signal.stack)) { 
+                       PrintError(core->vm_info,core,"hvm: attempt to replace existing handler without removing it first\n");
+                       core->vm_regs.rax=-1;
+                   } else {
+                       // actually make the change
+                       h->ros_signal.handler=a2;
+                       h->ros_signal.stack=a3;
+                       h->ros_signal.cr3=core->ctrl_regs.cr3;
+                       core->vm_regs.rax=0;
+
+                       // test by signalling back a hello 
+                       // if (a2 && a3) { 
+                       //    v3_hvm_signal_ros(core->vm_info,0xf00d);
+                       //}
+                   }
+               }
+           }
+           break;
+
+       case 0x41: // raise signal in the ROS from HRT or ROS
+           PrintDebug(core->vm_info,core,"hvm: HRT raises signal code=0x%llx\n", a2);
+           core->vm_regs.rax = v3_hvm_signal_ros(core->vm_info,a2);
+           break;
+
+       case 0x51: // fill GDT area (HRT only)
+           if (v3_is_hvm_hrt_core(core)) {
+               PrintError(core->vm_info, core, "hvm: HRT cannot request a GDT area fill\n");
+               core->vm_regs.rax = -1;
+           } else {
+               struct guest_info * hrt_core = &core->vm_info->cores[h->first_hrt_core];
+               struct gdt_area * area = V3_Malloc(sizeof(struct gdt_area) + core->segments.gdtr.limit);
+               if (!area) {
+                   PrintError(core->vm_info, core, "hvm: could not allocate GDT area\n");
+                   core->vm_regs.rax = -1;
+                   break;
+               }
+
+               PrintDebug(core->vm_info, core, "hvm: ROS requests to fill GDT area with fsbase=%p\n", (void*)a2);
+
+               if (!h->hrt_gdt_gva) {
+                   PrintError(core->vm_info, core, "hvm: HRT has not registered a GDT state save area\n");
+                   core->vm_regs.rax = -1;
+                   V3_Free(area);
+                   break;
+               }
+
+               area->gdtr.base  = h->hrt_gdt_gva + sizeof(struct gdt_area);
+               area->gdtr.limit = core->segments.gdtr.limit;
+               area->fsbase     = a2;
+               area->cs         = core->segments.cs.selector;
+               area->ds         = core->segments.ds.selector;
+               area->es         = core->segments.es.selector;
+               area->fs         = core->segments.fs.selector;
+               area->gs         = core->segments.gs.selector;
+               area->ss         = core->segments.ss.selector;
+               
+               if (v3_read_gva_memory(core, 
+                                      core->segments.gdtr.base,
+                                      core->segments.gdtr.limit,
+                                      (uint8_t*)area->gdt) != core->segments.gdtr.limit) {
+                   PrintError(core->vm_info, core, "hvm: could not copy GDT from ROS\n");
+                   core->vm_regs.rax = -1;
+                   V3_Free(area);
+                   break;
+               }
+                                       
+               uint_t area_size = sizeof(struct gdt_area) + core->segments.gdtr.limit;
+
+               // copy the entire area over
+               PrintDebug(core->vm_info, core, "hvm: copying %u bytes into GDT area\n", area_size);
+
+               if (v3_write_gva_memory(hrt_core, h->hrt_gdt_gva, area_size, (uchar_t*)area) != area_size) {
+                   PrintError(core->vm_info, core, "hvm: could not copy GDT area\n");
+                   core->vm_regs.rax = -1;
+                   V3_Free(area);
+                   break;
+               }
+
+               if (ENFORCE_STATE_MACHINE && h->trans_state!=HRT_IDLE) { 
+                   PrintError(core->vm_info,core, "hvm: cannot sync GDT in state %d\n", h->trans_state);
+                   core->vm_regs.rax = -1;
+                   V3_Free(area);
+                   break;
+               } else {
+                   uint64_t *page = (uint64_t *) h->comm_page_hva;
+                   uint64_t first, last, cur;
+
+                   PrintDebug(core->vm_info,core, "hvm: sync GDT\n");
+                   page[0] = a1;
+                   page[1] = h->hrt_gdt_gva;
+                   page[2] = a3;
+
+                   first=last=h->first_hrt_core;
+                   
+                   core->vm_regs.rax = 0;
+                   
+                   h->trans_count = last-first+1;
+
+                   for (cur=first;cur<=last;cur++) { 
+                       if (magic_upcall(core,cur)) {
+                           core->vm_regs.rax = -1;
+                           break;
+                       }
+                       // Force core to exit now
+                       v3_interrupt_cpu(core->vm_info,core->vm_info->cores[cur].pcpu_id,0);
+                   }
+                   
+                   if (core->vm_regs.rax==0) { 
+                       h->trans_state = HRT_GDTSYNC;
+                   }  else {
+                       PrintError(core->vm_info,core,"hvm: in inconsistent state due to HRT GDT SYNC failure\n");
+                       h->trans_state = HRT_IDLE;
+                       h->trans_count = 0;
+                   }
+
+                   V3_Free(area);
+
+               }
+               
+           }
+           
+           break;
+        
+       case 0x52: // register HRT GDT area
+           if (!v3_is_hvm_hrt_core(core)) {
+               PrintError(core->vm_info, core, "hvm: ROS cannot install a GDT area\n"); 
+               core->vm_regs.rax = -1;
+           } else {
+               PrintDebug(core->vm_info, core, "hvm: HRT registers GDT save area at gva=%p\n", (void*)a2);
+               h->hrt_gdt_gva = a2;
+               core->vm_regs.rax = 0;
+           }
+
+        PrintDebug(core->vm_info, core, "hvm: Printing current HRT GDT...\n");
+#ifdef V3_CONFIG_DEBUG_HVM
+        v3_print_gdt(core, core->segments.gdtr.base);
+#endif
+       
+        break;
+       
+       case 0x53: // restore GDT
+
+           if (v3_is_hvm_hrt_core(core)) {
+               PrintError(core->vm_info, core, "hvm: HRT cannot request GDT restoration\n");
+               core->vm_regs.rax = -1;
+               break;
+           } else {
+               PrintDebug(core->vm_info, core, "hvm: ROS requesting to restore original GDT\n");
+               core->vm_regs.rax = 0;
+           }
+           
+           if (ENFORCE_STATE_MACHINE && h->trans_state!=HRT_IDLE) { 
+               PrintError(core->vm_info,core, "hvm: cannot sync GDT in state %d\n", h->trans_state);
+               core->vm_regs.rax = -1;
+               break;
+           } else {
+               uint64_t *page = (uint64_t *) h->comm_page_hva;
+               uint64_t first, last, cur;
+               
+               PrintDebug(core->vm_info,core, "hvm: restore GDT\n");
+               page[0] = a1;
+               
+               first=last=h->first_hrt_core;
+               
+               core->vm_regs.rax = 0;
+               
+               h->trans_count = last-first+1;
+               
+               for (cur=first;cur<=last;cur++) { 
+                   if (magic_upcall(core,cur)) {
+                       core->vm_regs.rax = -1;
+                       break;
+                   }
+                   // Force core to exit now
+                   v3_interrupt_cpu(core->vm_info,core->vm_info->cores[cur].pcpu_id,0);
+               }
+               
+               if (core->vm_regs.rax==0) { 
+                   h->trans_state = HRT_GDTSYNC;
+               }  else {
+                   PrintError(core->vm_info,core,"hvm: in inconsistent state due to HRT GDT SYNC failure\n");
+                   h->trans_state = HRT_IDLE;
+                   h->trans_count = 0;
+               }
+           }
+           
+           break;
+           
+       case 0x5f: // GDT sync operation done
+           if (v3_is_hvm_ros_core(core)) { 
+               PrintError(core->vm_info,core, "hvm: invalid request for GDT sync done from ROS core\n");
+               core->vm_regs.rax=-1;
+           } else {
+               if (ENFORCE_STATE_MACHINE && h->trans_state != HRT_GDTSYNC) {
+                   PrintError(core->vm_info,core,"hvm: GDT sync done when in incorrect state (%d)\n", h->trans_state);
+                   core->vm_regs.rax=-1;
+               } else {
+                   PrintDebug(core->vm_info,core, "hvm: GDT sync complete - back to idle\n");
+                   PrintDebug(core->vm_info, core, "hvm: Dumping new HRT GDT...\n");
+#ifdef V3_CONFIG_DEBUG_HVM
+                   v3_print_gdt(core, core->segments.gdtr.base);
+#endif
+                   h->trans_state=HRT_IDLE;
+                   core->vm_regs.rax=0;
+               }
+               
+           }
+           break;
 
        default:
            PrintError(core->vm_info,core,"hvm: unknown hypercall %llx\n",a1);
@@ -391,9 +708,11 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
            break;
     }
                
+    v3_unlock_irqrestore(h->hypercall_lock,irq_state);
     return 0;
 }
 
+
 #define CEIL_DIV(x,y) (((x)/(y)) + !!((x)%(y)))
 
 int v3_init_hvm_vm(struct v3_vm_info *vm, struct v3_xml *config)
@@ -468,6 +787,8 @@ int v3_init_hvm_vm(struct v3_vm_info *vm, struct v3_xml *config)
        return -1;
     }
 
+    v3_lock_init(&(vm->hvm_state.hypercall_lock));
+
     // XXX sanity check config here
 
     vm->hvm_state.is_hvm=1;
@@ -495,8 +816,17 @@ int v3_deinit_hvm_vm(struct v3_vm_info *vm)
 {
     PrintDebug(vm, VCORE_NONE, "hvm: HVM VM deinit\n");
 
+
+    if (vm->hvm_state.hrt_image) { 
+       V3_VFree(vm->hvm_state.hrt_image);
+       vm->hvm_state.hrt_image=0;
+       vm->hvm_state.hrt_image_size=0;
+    }
+
     v3_remove_hypercall(vm,HVM_HCALL);
 
+    v3_lock_deinit(&(vm->hvm_state.hypercall_lock));
+
     if (vm->hvm_state.comm_page_hpa) { 
        struct v3_mem_region *r = v3_get_mem_region(vm,-1,(addr_t)vm->hvm_state.comm_page_hpa);
        if (!r) { 
@@ -563,7 +893,7 @@ uint32_t v3_get_hvm_hrt_cores(struct v3_vm_info *vm)
 int v3_is_hvm_ros_mem_gpa(struct v3_vm_info *vm, addr_t gpa)
 {
     if (vm->hvm_state.is_hvm) { 
-       return gpa>=0 && gpa<vm->hvm_state.first_hrt_gpa;
+       return gpa<vm->hvm_state.first_hrt_gpa;
     } else {
        return 1;
     }
@@ -1321,21 +1651,26 @@ static int configure_hrt(struct v3_vm_info *vm, mb_data_t *mb)
 
 }
 
-static int setup_mb_kernel_hrt(struct v3_vm_info *vm)
+static int setup_mb_kernel_hrt(struct v3_vm_info *vm, void *data, uint64_t size)
 {
     mb_data_t mb;
 
-    if (v3_parse_multiboot_header(vm->hvm_state.hrt_file,&mb)) { 
+    if (v3_parse_multiboot_header(data, size, &mb)) { 
        PrintError(vm,VCORE_NONE, "hvm: failed to parse multiboot kernel header\n");
        return -1;
     }
 
+    if (!mb.mb64_hrt) { 
+       PrintError(vm,VCORE_NONE,"hvm: invalid HRT - there is no MB64_HRT tag\n");
+       return -1;
+    }
+
     if (configure_hrt(vm,&mb)) {
        PrintError(vm,VCORE_NONE, "hvm: cannot configure HRT\n");
        return -1;
     }
     
-    if (v3_write_multiboot_kernel(vm,&mb,vm->hvm_state.hrt_file,
+    if (v3_write_multiboot_kernel(vm,&mb,data,size,
                                  (void*)vm->hvm_state.first_hrt_gpa,
                                  vm->mem_size-vm->hvm_state.first_hrt_gpa)) {
        PrintError(vm,VCORE_NONE, "hvm: failed to write multiboot kernel into memory\n");
@@ -1357,11 +1692,23 @@ static int setup_mb_kernel_hrt(struct v3_vm_info *vm)
 
 static int setup_hrt(struct v3_vm_info *vm)
 {
-    if (is_elf(vm->hvm_state.hrt_file->data,vm->hvm_state.hrt_file->size) && 
-       find_mb_header(vm->hvm_state.hrt_file->data,vm->hvm_state.hrt_file->size)) { 
+    void *data;
+    uint64_t size;
+
+    // If the ROS has installed an image, it takes priority
+    if (vm->hvm_state.hrt_image) { 
+       data = vm->hvm_state.hrt_image;
+       size = vm->hvm_state.hrt_image_size;
+    } else {
+       data = vm->hvm_state.hrt_file->data;
+       size = vm->hvm_state.hrt_file->size;
+    }
+       
+    if (is_elf(data,size) &&
+       find_mb_header(data,size)) {
 
        PrintDebug(vm,VCORE_NONE,"hvm: appears to be a multiboot kernel\n");
-       if (setup_mb_kernel_hrt(vm)) { 
+       if (setup_mb_kernel_hrt(vm,data,size)) { 
            PrintError(vm,VCORE_NONE,"hvm: multiboot kernel setup failed\n");
            return -1;
        } 
@@ -1447,7 +1794,7 @@ int v3_setup_hvm_vm_for_boot(struct v3_vm_info *vm)
    GDTR points to stub GDT
    TS   points to stub TSS
    CR3 points to root page table
-   CR0 has PE and PG
+   CR0 has PE, PG, and WP
    EFER has LME AND LMA (and NX for compatibility with Linux)
    RSP is TOS of core's scratch stack (looks like a call)
 
@@ -1547,8 +1894,8 @@ int v3_setup_hvm_hrt_core_for_boot(struct guest_info *core)
               (void*)(core->vm_regs.rdx));
 
     // Setup CRs for long mode and our stub page table
-    // CR0: PG, PE
-    core->ctrl_regs.cr0 = 0x80000001;
+    // CR0: PG, PE, and WP for catching COW faults in kernel-mode (which is not default behavior)
+    core->ctrl_regs.cr0 = 0x80010001;
     core->shdw_pg_state.guest_cr0 = core->ctrl_regs.cr0;
 
     // CR2: don't care (output from #PF)
@@ -1722,3 +2069,111 @@ int v3_handle_hvm_reset(struct guest_info *core)
        return 0;
     }
 }
+
+int v3_handle_hvm_entry(struct guest_info *core)
+{
+    if (!core->vm_info->hvm_state.is_hvm        // not relevant to non-HVM
+       || core->hvm_state.is_hrt              // not relevant to an HRT in an HVM
+       || !core->vm_info->hvm_state.ros_signal.code) { // not relevant if there is no code to inject
+
+       // Note that above check for code could race with a writer, but
+       // if that happens, we'll simply inject at the next opportunity instead of 
+       // this one (see below for atomic update)
+       return 0;
+    } else {
+       struct v3_ros_signal *s = &core->vm_info->hvm_state.ros_signal;
+
+       // HVM ROS
+       if (! (s->handler && // handler installed
+              s->cr3 &&     // process installed
+              s->stack &&   // stack installed
+              core->cpl == 3 &&  // user mode
+              core->ctrl_regs.cr3 == s->cr3) // right process active
+           ) {
+           // Cannot inject at this time
+           return 0;
+       } else {
+           // We can inject now, let's atomically see if we have something
+           // and commit to doing it if we do
+           uint64_t code;
+
+           // Get code, reset to allow next one
+           code = __sync_fetch_and_and(&(s->code), 0);
+
+           if (!code) { 
+               // nothing to do after all
+               return 0;
+           } else {
+
+               // actually do inject
+
+               uint64_t rsp;
+               uint64_t frame[6];
+               
+               PrintDebug(core->vm_info,core,"hvm: ROS interrupt starting with rip=%p rsp=%p\n", (void*) core->rip, (void*) core->vm_regs.rsp);
+               // build interrupt frame
+               frame[0] = code;
+               frame[1] = core->rip;
+               frame[2] = core->segments.cs.selector; // return cs
+               frame[3] = core->ctrl_regs.rflags;
+               frame[4] = core->vm_regs.rsp;
+               frame[5] = core->segments.ss.selector; // return ss
+               
+               rsp = (s->stack - 16) & (~0xf); // We should be 16 byte aligned to start
+               rsp -= sizeof(frame);
+               
+
+               if (v3_write_gva_memory(core,(addr_t)rsp,sizeof(frame),(uint8_t*)frame)!=sizeof(frame)) { 
+                   PrintError(core->vm_info,core,"hvm: failed to write interrupt frame\n");
+                   // we just lost this inject
+                   return -1;
+               }
+               
+               // now make us look like we are jumping to the entry
+               core->rip = s->handler;
+               core->vm_regs.rsp = rsp;
+
+               PrintDebug(core->vm_info,core,"hvm: ROS frame is 0x%llx|0x%llx|0x%llx|0x%llx|0x%llx|0x%llx and and on entry rip=%p and rsp=%p\n", frame[0],frame[1],frame[2],frame[3],frame[4],frame[5],(void*) core->rip, (void*) core->vm_regs.rsp);
+               
+               // and we should be good to go
+               return 0;
+           } 
+       }
+    }
+}
+
+int v3_handle_hvm_exit(struct guest_info *core)
+{
+    // currently nothing
+    return 0;
+}
+
+
+int v3_hvm_signal_ros(struct v3_vm_info *vm, uint64_t code)
+{
+    struct v3_ros_signal *s = &vm->hvm_state.ros_signal;
+
+    if (!code) { 
+       PrintError(vm,VCORE_NONE,"hvm: cannot signal ros with code zero\n");
+       return -1;
+    }
+
+    // handler, etc, must exist
+    if (!s->handler || !s->stack) { 
+       PrintError(vm,VCORE_NONE,"hvm: cannot signal ros with no installed handler\n");
+       return -1;
+    } else {
+       // we set the code only if we are idle (code 0), 
+       // and we do so only 
+       if (!__sync_bool_compare_and_swap(&(s->code), 0, code)) {
+           PrintError(vm,VCORE_NONE,"hvm: signal was already asserted\n");
+           return -1;
+       } else {
+           PrintDebug(vm,VCORE_NONE,"hvm: raised signal 0x%llx to the ROS\n",code);
+           return 0;
+       }
+    }
+}
+
+
+