X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=palacios%2Fsrc%2Fpalacios%2Fvmm_hvm.c;h=ec1c42c7a0a8af41ed0278e0f06137aa4c764aa6;hb=d85300ed95766164d14a7f3b6c1c681b8b9a9c52;hp=ff5b34f2c349d152083543c29644aa54b6c2cf67;hpb=bf732d7c9e2940ed8dddcb30ff70bdc06bbfdc3b;p=palacios.git

diff --git a/palacios/src/palacios/vmm_hvm.c b/palacios/src/palacios/vmm_hvm.c
index ff5b34f..ec1c42c 100644
--- a/palacios/src/palacios/vmm_hvm.c
+++ b/palacios/src/palacios/vmm_hvm.c
@@ -31,6 +31,24 @@
 #include <palacios/vmm_debug.h>
 
 
+struct gdt_area {
+    struct {
+        uint16_t limit;
+        uint64_t base;
+    } __attribute__((packed)) gdtr;
+
+    uint64_t fsbase;
+    uint16_t cs;
+    uint16_t ds;
+    uint16_t es;
+    uint16_t fs;
+    uint16_t gs;
+    uint16_t ss;
+
+    uint64_t gdt[0];
+} __attribute__((packed));
+
+
 /*
 
   MEM     = Total size of memory in the GPA (in MB)
@@ -84,12 +102,52 @@ int v3_deinit_hvm()
 // ignore requests from when we are in the wrong state
 #define ENFORCE_STATE_MACHINE 1
 
-// invoke the HRT using a page fault instead of
-// the SWINTR mechanism
-#define USE_UPCALL_MAGIC_PF  1
+// invoke the HRT using one of the followng mechanisms
 #define UPCALL_MAGIC_ADDRESS 0x0000800df00df00dULL
 #define UPCALL_MAGIC_ERROR   0xf00df00d
 
+
+static int magic_upcall(struct guest_info *core, uint64_t num)
+{
+#ifdef V3_CONFIG_HVM_UPCALL_MAGIC_GPF
+    PrintDebug(core->vm_info, core, "hvm: injecting magic #GP into core %llu\n",num);
+    if (v3_raise_exception_with_error(&core->vm_info->cores[num],
+				      GPF_EXCEPTION, 
+				      UPCALL_MAGIC_ERROR)) { 
+	PrintError(core->vm_info, core,"hvm: cannot inject HRT #GP to core %llu\n",num);
+	return -1;
+    } else {
+	return 0;
+    }
+#endif
+
+#ifdef V3_CONFIG_HVM_UPCALL_MAGIC_PF
+    PrintDebug(core->vm_info,core,"hvm: injecting magic #GP into core %llu\n",num);
+    core->vm_info->cores[num].ctrl_regs.cr2 = UPCALL_MAGIC_ADDRESS;
+    if (v3_raise_exception_with_error(&core->vm_info->cores[num],
+				      PF_EXCEPTION, 
+				      UPCALL_MAGIC_ERROR)) { 
+	PrintError(core->vm_info,core, "hvm: cannot inject HRT #PF to core %llu\n",num);
+	return -1;
+    } else {
+	return 0;
+    }
+#endif
+#ifdef V3_CONFIG_HVM_UPCALL_MAGIC_SWIN
+    PrintDebug(core->vm_info,core,"hvm: injecting SW intr 0x%u into core %llu\n",core->vm_info->hvm_info.hrt_int_vector,num);
+    if (v3_raise_swintr(&core->vm_info->cores[cur],core->vm_info->hvm_info-->hrt_int_vector)) { 
+	PrintError(core->vm_info,core, "hvm: cannot inject HRT interrupt to core %llu\n",cur);
+	return -1;
+    } else {
+	return 0;
+    }
+#endif
+
+    PrintError(core->vm_info,core,"hvm: no upcall mechanism is enabled!\n");
+    return -1;
+}
+
+
 /*
   64 bit only hypercall:
 
@@ -104,12 +162,17 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
     uint64_t bitness = core->vm_regs.rbx;
     uint64_t a1 = core->vm_regs.rcx;
     uint64_t a2 = core->vm_regs.rdx;
+    uint64_t a3 = core->vm_regs.rsi;
     struct v3_vm_hvm *h = &core->vm_info->hvm_state;
+    addr_t irq_state;
 
+    // Let's be paranoid here
+    irq_state = v3_lock_irqsave(h->hypercall_lock);
 
     if (bitness!=0x6464646464646464) { 
 	PrintError(core->vm_info,core,"hvm: unable to handle non-64 bit hypercall\n");
 	core->vm_regs.rax = -1;
+	v3_unlock_irqrestore(h->hypercall_lock,irq_state);
 	return 0;
     }
 
@@ -155,6 +218,41 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
 	    }
 	    break;
 	    
+	case 0x8: // replace HRT image
+	    // a2 = gva of image
+	    // a3 = size of image
+	    PrintDebug(core->vm_info,core,"hvm: request replacement HRT image addr=0x%llx size=0x%llx\n",a2,a3);
+
+	    if (h->hrt_image) { 
+		// delete old
+		V3_VFree(h->hrt_image);
+		h->hrt_image = 0;
+	    }
+
+	    h->hrt_image = V3_VMalloc(a3);
+
+	    if (!(h->hrt_image)) {
+		PrintError(core->vm_info,core, "hvm: failed to allocate space for replacement image\n");
+		core->vm_regs.rax = -1;
+	    } else {
+		if (v3_read_gva_memory(core, a2, a3, (uint8_t*) h->hrt_image)!=a3) { 
+		    PrintError(core->vm_info, core, "hvm: cannot read replacement image\n");
+		    core->vm_regs.rax = -1;
+		} else {
+		    h->hrt_image_size = a3; 
+		    core->vm_regs.rax = 0;
+		}
+	    }
+
+	    if (core->vm_regs.rax) { 
+		PrintError(core->vm_info,core,"hvm: Failed to replace HRT image\n");
+	    } else {
+		PrintDebug(core->vm_info,core,"hvm: HRT image successfully replaced\n");
+	    }
+
+	    break;
+
+
 	case 0xf: // get HRT state
 	    core->vm_regs.rax = h->trans_state;
 	    if (v3_write_gva_memory(core, a2, sizeof(h->ros_event), (uint8_t*) &h->ros_event)!=sizeof(h->ros_event)) { 
@@ -174,14 +272,29 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
 		    core->vm_regs.rax = -1;
 		} else {
 		    core->vm_regs.rax = 0;
+		    PrintDebug(core->vm_info, core, "hvm: copied new ROS event (type=%s)\n",
+			       h->ros_event.event_type == ROS_PAGE_FAULT ? "page fault" : 
+			       (h->ros_event.event_type == ROS_SYSCALL ? "syscall" : "none"));
+		    
 		}
 	    }
 
 	    break;
 
+	case 0x1e: // ack result (HRT has read the result of the finished event)
+	    if (h->ros_event.event_type != ROS_DONE) {
+		PrintError(core->vm_info, core, "hvm: cannot ack event result when not in ROS_DONE state\n");
+		core->vm_regs.rax = -1;
+	    } else {
+		h->ros_event.event_type=ROS_NONE;
+		PrintDebug(core->vm_info, core, "hvm: HRT core acks event result\n");
+		core->vm_regs.rax = 0;
+	    }
+	    break;
+
 	case 0x1f:
 	    PrintDebug(core->vm_info, core, "hvm: completion of ROS event (rc=0x%llx)\n",a2);
-	    h->ros_event.event_type=ROS_NONE;
+	    h->ros_event.event_type=ROS_DONE;
 	    h->ros_event.last_ros_event_result = a2;
 	    break;
 
@@ -214,28 +327,12 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
 		    h->trans_count = last-first+1;
 
 		    for (cur=first;cur<=last;cur++) { 
-
-#if USE_UPCALL_MAGIC_PF
-			PrintDebug(core->vm_info,core,"hvm: injecting magic #PF into core %llu\n",cur);
-			core->vm_info->cores[cur].ctrl_regs.cr2 = UPCALL_MAGIC_ADDRESS;
-			if (v3_raise_exception_with_error(&core->vm_info->cores[cur],
-							  PF_EXCEPTION, 
-							  UPCALL_MAGIC_ERROR)) { 
-			    PrintError(core->vm_info,core, "hvm: cannot inject HRT #PF to core %llu\n",cur);
-			    core->vm_regs.rax = -1;
-			    break;
-			}
-#else
-			PrintDebug(core->vm_info,core,"hvm: injecting SW intr 0x%u into core %llu\n",h->hrt_int_vector,cur);
-			if (v3_raise_swintr(&core->vm_info->cores[cur],h->hrt_int_vector)) { 
-			    PrintError(core->vm_info,core, "hvm: cannot inject HRT interrupt to core %llu\n",cur);
+			if (magic_upcall(core,cur)) {
 			    core->vm_regs.rax = -1;
 			    break;
 			}
-#endif
 			// Force core to exit now
 			v3_interrupt_cpu(core->vm_info,core->vm_info->cores[cur].pcpu_id,0);
-			  
 		    }
 		    if (core->vm_regs.rax==0) { 
 			if (a1==0x20) { 
@@ -278,25 +375,11 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
 		    h->trans_count = last-first+1;
 
 		    for (cur=first;cur<=last;cur++) { 
-
-#if USE_UPCALL_MAGIC_PF
-			PrintDebug(core->vm_info,core,"hvm: injecting magic #PF into core %llu\n",cur);
-			core->vm_info->cores[cur].ctrl_regs.cr2 = UPCALL_MAGIC_ADDRESS;
-			if (v3_raise_exception_with_error(&core->vm_info->cores[cur],
-							  PF_EXCEPTION, 
-							  UPCALL_MAGIC_ERROR)) { 
-			    PrintError(core->vm_info,core, "hvm: cannot inject HRT #PF to core %llu\n",cur);
-			    core->vm_regs.rax = -1;
-			    break;
-			}
-#else
-			PrintDebug(core->vm_info,core,"hvm: injecting SW intr 0x%u into core %llu\n",h->hrt_int_vector,cur);
-			if (v3_raise_swintr(&core->vm_info->cores[cur],h->hrt_int_vector)) { 
-			    PrintError(core->vm_info,core, "hvm: cannot inject HRT interrupt to core %llu\n",cur);
+			
+			if (magic_upcall(core,cur)) { 
 			    core->vm_regs.rax = -1;
 			    break;
 			}
-#endif
 			// Force core to exit now
 			v3_interrupt_cpu(core->vm_info,core->vm_info->cores[cur].pcpu_id,0);
 			  
@@ -353,7 +436,7 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
 		core->vm_regs.rax=-1;
 	    } else {
 		if (ENFORCE_STATE_MACHINE && h->trans_state!=HRT_IDLE) { 
-		    PrintError(core->vm_info,core,"hvm: request to %smerge address space in non-idle state\n",a1==0x30 ? "" : "un");
+		    PrintError(core->vm_info,core,"hvm: request to %smerge address space in non-idle state (%d)\n",a1==0x30 ? "" : "un", h->trans_state);
 		    core->vm_regs.rax=-1;
 		} else {
 		    uint64_t *page = (uint64_t *) h->comm_page_hva;
@@ -365,27 +448,17 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
 		    page[1] = core->ctrl_regs.cr3;  // this is a do-not-care for an unmerge
 
 		    core->vm_regs.rax = 0;
-#if USE_UPCALL_MAGIC_PF
-		    PrintDebug(core->vm_info,core,"hvm: injecting magic #PF into core %u\n",h->first_hrt_core);
-		    core->vm_info->cores[h->first_hrt_core].ctrl_regs.cr2 = UPCALL_MAGIC_ADDRESS;
-		    if (v3_raise_exception_with_error(&core->vm_info->cores[h->first_hrt_core],
-						      PF_EXCEPTION,  
-						      UPCALL_MAGIC_ERROR)) { 
-		      PrintError(core->vm_info,core, "hvm: cannot inject HRT #PF to core %u\n",h->first_hrt_core);
-		      core->vm_regs.rax = -1;
-		      break;
-		    }
-#else
-		    PrintDebug(core->vm_info,core,"hvm: injecting SW intr 0x%u into core %u\n",h->hrt_int_vector,h->first_hrt_core);
-		    if (v3_raise_swintr(&core->vm_info->cores[h->first_hrt_core],h->hrt_int_vector)) { 
-			PrintError(core->vm_info,core, "hvm: cannot inject HRT interrupt to core %u\n",h->first_hrt_core);
+
+		    h->trans_state = HRT_MERGE;
+
+		    if (magic_upcall(core,h->first_hrt_core)) {
 			core->vm_regs.rax = -1;
-		    } 
-#endif		
+			break;
+		    }
+
 		    // Force core to exit now
 		    v3_interrupt_cpu(core->vm_info,core->vm_info->cores[h->first_hrt_core].pcpu_id,0);
 
-		    h->trans_state = HRT_MERGE;
 		}
 		
 	    }
@@ -409,6 +482,225 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
 	    }
 		    
 	    break;
+	    
+	case 0x40: // install or remove signal handler
+	    if (v3_is_hvm_hrt_core(core)) { 
+		PrintError(core->vm_info,core, "hvm: HRT cannot install signal handler...\n");
+		core->vm_regs.rax=-1;
+	    } else {
+		PrintDebug(core->vm_info,core,"hvm: install signal handler for CR3=%p, handler=%p, stack=%p\n",(void*)core->ctrl_regs.cr3, (void*)a2, (void*)a3);
+		if (h->ros_signal.code) { 
+		    PrintError(core->vm_info,core,"hvm: signal is pending...\n");
+		    core->vm_regs.rax=-1;
+		} else {
+		    if ((a2 || a3) && (h->ros_signal.handler || h->ros_signal.stack)) { 
+			PrintError(core->vm_info,core,"hvm: attempt to replace existing handler without removing it first\n");
+			core->vm_regs.rax=-1;
+		    } else {
+			// actually make the change
+			h->ros_signal.handler=a2;
+			h->ros_signal.stack=a3;
+			h->ros_signal.cr3=core->ctrl_regs.cr3;
+			core->vm_regs.rax=0;
+
+			// test by signalling back a hello 
+			// if (a2 && a3) { 
+			//    v3_hvm_signal_ros(core->vm_info,0xf00d);
+			//}
+		    }
+		}
+	    }
+	    break;
+
+	case 0x41: // raise signal in the ROS from HRT or ROS
+	    PrintDebug(core->vm_info,core,"hvm: HRT raises signal code=0x%llx\n", a2);
+	    core->vm_regs.rax = v3_hvm_signal_ros(core->vm_info,a2);
+	    break;
+
+	case 0x51: // fill GDT area (HRT only)
+	    if (v3_is_hvm_hrt_core(core)) {
+		PrintError(core->vm_info, core, "hvm: HRT cannot request a GDT area fill\n");
+		core->vm_regs.rax = -1;
+	    } else {
+		struct guest_info * hrt_core = &core->vm_info->cores[h->first_hrt_core];
+		struct gdt_area * area = V3_Malloc(sizeof(struct gdt_area) + core->segments.gdtr.limit);
+		if (!area) {
+		    PrintError(core->vm_info, core, "hvm: could not allocate GDT area\n");
+		    core->vm_regs.rax = -1;
+		    break;
+		}
+
+		PrintDebug(core->vm_info, core, "hvm: ROS requests to fill GDT area with fsbase=%p\n", (void*)a2);
+
+		if (!h->hrt_gdt_gva) {
+		    PrintError(core->vm_info, core, "hvm: HRT has not registered a GDT state save area\n");
+		    core->vm_regs.rax = -1;
+		    V3_Free(area);
+		    break;
+		}
+
+		area->gdtr.base  = h->hrt_gdt_gva + sizeof(struct gdt_area);
+		area->gdtr.limit = core->segments.gdtr.limit;
+		area->fsbase     = a2;
+		area->cs         = core->segments.cs.selector;
+		area->ds         = core->segments.ds.selector;
+		area->es         = core->segments.es.selector;
+		area->fs         = core->segments.fs.selector;
+		area->gs         = core->segments.gs.selector;
+		area->ss         = core->segments.ss.selector;
+		
+		if (v3_read_gva_memory(core, 
+				       core->segments.gdtr.base,
+				       core->segments.gdtr.limit,
+				       (uint8_t*)area->gdt) != core->segments.gdtr.limit) {
+		    PrintError(core->vm_info, core, "hvm: could not copy GDT from ROS\n");
+		    core->vm_regs.rax = -1;
+		    V3_Free(area);
+		    break;
+		}
+					
+		uint_t area_size = sizeof(struct gdt_area) + core->segments.gdtr.limit;
+
+		// copy the entire area over
+		PrintDebug(core->vm_info, core, "hvm: copying %u bytes into GDT area\n", area_size);
+
+		if (v3_write_gva_memory(hrt_core, h->hrt_gdt_gva, area_size, (uchar_t*)area) != area_size) {
+		    PrintError(core->vm_info, core, "hvm: could not copy GDT area\n");
+		    core->vm_regs.rax = -1;
+		    V3_Free(area);
+		    break;
+		}
+
+		if (ENFORCE_STATE_MACHINE && h->trans_state!=HRT_IDLE) { 
+		    PrintError(core->vm_info,core, "hvm: cannot sync GDT in state %d\n", h->trans_state);
+		    core->vm_regs.rax = -1;
+		    V3_Free(area);
+		    break;
+		} else {
+		    uint64_t *page = (uint64_t *) h->comm_page_hva;
+		    uint64_t first, last, cur;
+
+		    PrintDebug(core->vm_info,core, "hvm: sync GDT\n");
+		    page[0] = a1;
+		    page[1] = h->hrt_gdt_gva;
+		    page[2] = a3;
+
+		    first=last=h->first_hrt_core;
+		    
+		    core->vm_regs.rax = 0;
+		    
+		    h->trans_count = last-first+1;
+
+		    for (cur=first;cur<=last;cur++) { 
+			if (magic_upcall(core,cur)) {
+			    core->vm_regs.rax = -1;
+			    break;
+			}
+			// Force core to exit now
+			v3_interrupt_cpu(core->vm_info,core->vm_info->cores[cur].pcpu_id,0);
+		    }
+		    
+		    if (core->vm_regs.rax==0) { 
+			h->trans_state = HRT_GDTSYNC;
+		    }  else {
+			PrintError(core->vm_info,core,"hvm: in inconsistent state due to HRT GDT SYNC failure\n");
+			h->trans_state = HRT_IDLE;
+			h->trans_count = 0;
+		    }
+
+		    V3_Free(area);
+
+		}
+		
+	    }
+	    
+	    break;
+        
+	case 0x52: // register HRT GDT area
+	    if (!v3_is_hvm_hrt_core(core)) {
+		PrintError(core->vm_info, core, "hvm: ROS cannot install a GDT area\n"); 
+		core->vm_regs.rax = -1;
+	    } else {
+		PrintDebug(core->vm_info, core, "hvm: HRT registers GDT save area at gva=%p\n", (void*)a2);
+		h->hrt_gdt_gva = a2;
+		core->vm_regs.rax = 0;
+	    }
+
+        PrintDebug(core->vm_info, core, "hvm: Printing current HRT GDT...\n");
+#ifdef V3_CONFIG_DEBUG_HVM
+        v3_print_gdt(core, core->segments.gdtr.base);
+#endif
+	
+        break;
+	
+	case 0x53: // restore GDT
+
+	    if (v3_is_hvm_hrt_core(core)) {
+		PrintError(core->vm_info, core, "hvm: HRT cannot request GDT restoration\n");
+		core->vm_regs.rax = -1;
+		break;
+	    } else {
+		PrintDebug(core->vm_info, core, "hvm: ROS requesting to restore original GDT\n");
+		core->vm_regs.rax = 0;
+	    }
+	    
+	    if (ENFORCE_STATE_MACHINE && h->trans_state!=HRT_IDLE) { 
+		PrintError(core->vm_info,core, "hvm: cannot sync GDT in state %d\n", h->trans_state);
+		core->vm_regs.rax = -1;
+		break;
+	    } else {
+		uint64_t *page = (uint64_t *) h->comm_page_hva;
+		uint64_t first, last, cur;
+		
+		PrintDebug(core->vm_info,core, "hvm: restore GDT\n");
+		page[0] = a1;
+		
+		first=last=h->first_hrt_core;
+		
+		core->vm_regs.rax = 0;
+		
+		h->trans_count = last-first+1;
+		
+		for (cur=first;cur<=last;cur++) { 
+		    if (magic_upcall(core,cur)) {
+			core->vm_regs.rax = -1;
+			break;
+		    }
+		    // Force core to exit now
+		    v3_interrupt_cpu(core->vm_info,core->vm_info->cores[cur].pcpu_id,0);
+		}
+		
+		if (core->vm_regs.rax==0) { 
+		    h->trans_state = HRT_GDTSYNC;
+		}  else {
+		    PrintError(core->vm_info,core,"hvm: in inconsistent state due to HRT GDT SYNC failure\n");
+		    h->trans_state = HRT_IDLE;
+		    h->trans_count = 0;
+		}
+	    }
+	    
+	    break;
+	    
+	case 0x5f: // GDT sync operation done
+	    if (v3_is_hvm_ros_core(core)) { 
+		PrintError(core->vm_info,core, "hvm: invalid request for GDT sync done from ROS core\n");
+		core->vm_regs.rax=-1;
+	    } else {
+		if (ENFORCE_STATE_MACHINE && h->trans_state != HRT_GDTSYNC) {
+		    PrintError(core->vm_info,core,"hvm: GDT sync done when in incorrect state (%d)\n", h->trans_state);
+		    core->vm_regs.rax=-1;
+		} else {
+		    PrintDebug(core->vm_info,core, "hvm: GDT sync complete - back to idle\n");
+		    PrintDebug(core->vm_info, core, "hvm: Dumping new HRT GDT...\n");
+#ifdef V3_CONFIG_DEBUG_HVM
+		    v3_print_gdt(core, core->segments.gdtr.base);
+#endif
+		    h->trans_state=HRT_IDLE;
+		    core->vm_regs.rax=0;
+		}
+		
+	    }
+	    break;
 
 	default:
 	    PrintError(core->vm_info,core,"hvm: unknown hypercall %llx\n",a1);
@@ -416,9 +708,11 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
 	    break;
     }
 		
+    v3_unlock_irqrestore(h->hypercall_lock,irq_state);
     return 0;
 }
 
+
 #define CEIL_DIV(x,y) (((x)/(y)) + !!((x)%(y)))
 
 int v3_init_hvm_vm(struct v3_vm_info *vm, struct v3_xml *config)
@@ -493,6 +787,8 @@ int v3_init_hvm_vm(struct v3_vm_info *vm, struct v3_xml *config)
 	return -1;
     }
 
+    v3_lock_init(&(vm->hvm_state.hypercall_lock));
+
     // XXX sanity check config here
 
     vm->hvm_state.is_hvm=1;
@@ -520,8 +816,17 @@ int v3_deinit_hvm_vm(struct v3_vm_info *vm)
 {
     PrintDebug(vm, VCORE_NONE, "hvm: HVM VM deinit\n");
 
+
+    if (vm->hvm_state.hrt_image) { 
+	V3_VFree(vm->hvm_state.hrt_image);
+	vm->hvm_state.hrt_image=0;
+	vm->hvm_state.hrt_image_size=0;
+    }
+
     v3_remove_hypercall(vm,HVM_HCALL);
 
+    v3_lock_deinit(&(vm->hvm_state.hypercall_lock));
+
     if (vm->hvm_state.comm_page_hpa) { 
 	struct v3_mem_region *r = v3_get_mem_region(vm,-1,(addr_t)vm->hvm_state.comm_page_hpa);
 	if (!r) { 
@@ -1346,21 +1651,26 @@ static int configure_hrt(struct v3_vm_info *vm, mb_data_t *mb)
 
 }
 
-static int setup_mb_kernel_hrt(struct v3_vm_info *vm)
+static int setup_mb_kernel_hrt(struct v3_vm_info *vm, void *data, uint64_t size)
 {
     mb_data_t mb;
 
-    if (v3_parse_multiboot_header(vm->hvm_state.hrt_file,&mb)) { 
+    if (v3_parse_multiboot_header(data, size, &mb)) { 
 	PrintError(vm,VCORE_NONE, "hvm: failed to parse multiboot kernel header\n");
 	return -1;
     }
 
+    if (!mb.mb64_hrt) { 
+	PrintError(vm,VCORE_NONE,"hvm: invalid HRT - there is no MB64_HRT tag\n");
+	return -1;
+    }
+
     if (configure_hrt(vm,&mb)) {
 	PrintError(vm,VCORE_NONE, "hvm: cannot configure HRT\n");
 	return -1;
     }
     
-    if (v3_write_multiboot_kernel(vm,&mb,vm->hvm_state.hrt_file,
+    if (v3_write_multiboot_kernel(vm,&mb,data,size,
 				  (void*)vm->hvm_state.first_hrt_gpa,
 				  vm->mem_size-vm->hvm_state.first_hrt_gpa)) {
 	PrintError(vm,VCORE_NONE, "hvm: failed to write multiboot kernel into memory\n");
@@ -1382,11 +1692,23 @@ static int setup_mb_kernel_hrt(struct v3_vm_info *vm)
 
 static int setup_hrt(struct v3_vm_info *vm)
 {
-    if (is_elf(vm->hvm_state.hrt_file->data,vm->hvm_state.hrt_file->size) && 
-	find_mb_header(vm->hvm_state.hrt_file->data,vm->hvm_state.hrt_file->size)) { 
+    void *data;
+    uint64_t size;
+
+    // If the ROS has installed an image, it takes priority
+    if (vm->hvm_state.hrt_image) { 
+	data = vm->hvm_state.hrt_image;
+	size = vm->hvm_state.hrt_image_size;
+    } else {
+	data = vm->hvm_state.hrt_file->data;
+	size = vm->hvm_state.hrt_file->size;
+    }
+	
+    if (is_elf(data,size) &&
+	find_mb_header(data,size)) {
 
 	PrintDebug(vm,VCORE_NONE,"hvm: appears to be a multiboot kernel\n");
-	if (setup_mb_kernel_hrt(vm)) { 
+	if (setup_mb_kernel_hrt(vm,data,size)) { 
 	    PrintError(vm,VCORE_NONE,"hvm: multiboot kernel setup failed\n");
 	    return -1;
 	} 
@@ -1472,7 +1794,7 @@ int v3_setup_hvm_vm_for_boot(struct v3_vm_info *vm)
    GDTR points to stub GDT
    TS   points to stub TSS
    CR3 points to root page table
-   CR0 has PE and PG
+   CR0 has PE, PG, and WP
    EFER has LME AND LMA (and NX for compatibility with Linux)
    RSP is TOS of core's scratch stack (looks like a call)
 
@@ -1572,8 +1894,8 @@ int v3_setup_hvm_hrt_core_for_boot(struct guest_info *core)
 	       (void*)(core->vm_regs.rdx));
 
     // Setup CRs for long mode and our stub page table
-    // CR0: PG, PE
-    core->ctrl_regs.cr0 = 0x80000001;
+    // CR0: PG, PE, and WP for catching COW faults in kernel-mode (which is not default behavior)
+    core->ctrl_regs.cr0 = 0x80010001;
     core->shdw_pg_state.guest_cr0 = core->ctrl_regs.cr0;
 
     // CR2: don't care (output from #PF)
@@ -1747,3 +2069,111 @@ int v3_handle_hvm_reset(struct guest_info *core)
 	return 0;
     }
 }
+
+int v3_handle_hvm_entry(struct guest_info *core)
+{
+    if (!core->vm_info->hvm_state.is_hvm        // not relevant to non-HVM
+	|| core->hvm_state.is_hrt              // not relevant to an HRT in an HVM
+	|| !core->vm_info->hvm_state.ros_signal.code) { // not relevant if there is no code to inject
+
+	// Note that above check for code could race with a writer, but
+	// if that happens, we'll simply inject at the next opportunity instead of 
+	// this one (see below for atomic update)
+	return 0;
+    } else {
+	struct v3_ros_signal *s = &core->vm_info->hvm_state.ros_signal;
+
+	// HVM ROS
+	if (! (s->handler && // handler installed
+	       s->cr3 &&     // process installed
+	       s->stack &&   // stack installed
+	       core->cpl == 3 &&  // user mode
+	       core->ctrl_regs.cr3 == s->cr3) // right process active
+	    ) {
+	    // Cannot inject at this time
+	    return 0;
+	} else {
+	    // We can inject now, let's atomically see if we have something
+	    // and commit to doing it if we do
+	    uint64_t code;
+
+	    // Get code, reset to allow next one
+	    code = __sync_fetch_and_and(&(s->code), 0);
+
+	    if (!code) { 
+		// nothing to do after all
+		return 0;
+	    } else {
+
+		// actually do inject
+
+		uint64_t rsp;
+		uint64_t frame[6];
+		
+		PrintDebug(core->vm_info,core,"hvm: ROS interrupt starting with rip=%p rsp=%p\n", (void*) core->rip, (void*) core->vm_regs.rsp);
+		// build interrupt frame
+		frame[0] = code;
+		frame[1] = core->rip;
+		frame[2] = core->segments.cs.selector; // return cs
+		frame[3] = core->ctrl_regs.rflags;
+		frame[4] = core->vm_regs.rsp;
+		frame[5] = core->segments.ss.selector; // return ss
+		
+		rsp = (s->stack - 16) & (~0xf); // We should be 16 byte aligned to start
+		rsp -= sizeof(frame);
+		
+
+		if (v3_write_gva_memory(core,(addr_t)rsp,sizeof(frame),(uint8_t*)frame)!=sizeof(frame)) { 
+		    PrintError(core->vm_info,core,"hvm: failed to write interrupt frame\n");
+		    // we just lost this inject
+		    return -1;
+		}
+		
+		// now make us look like we are jumping to the entry
+		core->rip = s->handler;
+		core->vm_regs.rsp = rsp;
+
+		PrintDebug(core->vm_info,core,"hvm: ROS frame is 0x%llx|0x%llx|0x%llx|0x%llx|0x%llx|0x%llx and and on entry rip=%p and rsp=%p\n", frame[0],frame[1],frame[2],frame[3],frame[4],frame[5],(void*) core->rip, (void*) core->vm_regs.rsp);
+		
+		// and we should be good to go
+		return 0;
+	    } 
+	}
+    }
+}
+
+int v3_handle_hvm_exit(struct guest_info *core)
+{
+    // currently nothing
+    return 0;
+}
+
+
+int v3_hvm_signal_ros(struct v3_vm_info *vm, uint64_t code)
+{
+    struct v3_ros_signal *s = &vm->hvm_state.ros_signal;
+
+    if (!code) { 
+	PrintError(vm,VCORE_NONE,"hvm: cannot signal ros with code zero\n");
+	return -1;
+    }
+
+    // handler, etc, must exist
+    if (!s->handler || !s->stack) { 
+	PrintError(vm,VCORE_NONE,"hvm: cannot signal ros with no installed handler\n");
+	return -1;
+    } else {
+	// we set the code only if we are idle (code 0), 
+	// and we do so only 
+	if (!__sync_bool_compare_and_swap(&(s->code), 0, code)) {
+	    PrintError(vm,VCORE_NONE,"hvm: signal was already asserted\n");
+	    return -1;
+	} else {
+	    PrintDebug(vm,VCORE_NONE,"hvm: raised signal 0x%llx to the ROS\n",code);
+	    return 0;
+	}
+    }
+}
+
+
+