Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


HVM capability enhancement: asynchronous upcalls to ROS userspace
Peter Dinda [Sun, 27 Sep 2015 21:15:11 +0000 (16:15 -0500)]
This adds the ability for the HRT, ROS, or other component of
the VMM to send what looks like a "user interrupt" (e.g. signal)
to a process running in the ROS.

This mechanism looks similar to signals:  the ROS process
installs a handler and stack (our library hides the details), the VMM
then dispatches to the handler by switching stacks, building
what looks like a CPL 3 -> CPL 3 interrupt frame (with error code)
on the selected stack, and then points rip to a userspace wrapper
that handles register save/restore + iret, wrapping around a call
to the user's handler.

Whoever raises the signal supplies the error code, which is then
coerced into an argument for the user's handler.

Only a single process in the ROS can register a handler at a time.
The context includes not only the stack and handler function, but
also the CR3 extant at registration time, and CPL 3 at injection time.

guest/linux/hvm-ros/Makefile
guest/linux/hvm-ros/test.c
guest/linux/hvm-ros/v3_hvm_ros_user.c
guest/linux/hvm-ros/v3_hvm_ros_user.h
guest/linux/hvm-ros/v3_hvm_ros_user_low_level.S [new file with mode: 0644]
palacios/include/palacios/vmm_hvm.h
palacios/src/palacios/svm.c
palacios/src/palacios/vmm_hvm.c
palacios/src/palacios/vmx.c

index 96ce1f3..f818a39 100644 (file)
@@ -1,14 +1,17 @@
 all: libv3_hvm_ros_user.a test
 
-libv3_hvm_ros_user.a: v3_hvm_ros_user.o
-       ar ruv libv3_hvm_ros_user.a v3_hvm_ros_user.o
+libv3_hvm_ros_user.a: v3_hvm_ros_user.o v3_hvm_ros_user_low_level.o
+       ar ruv libv3_hvm_ros_user.a v3_hvm_ros_user.o v3_hvm_ros_user_low_level.o
 
 v3_hvm_ros_user.o: v3_hvm_ros_user.c v3_hvm_ros_user.h 
        gcc -Wall -c v3_hvm_ros_user.c
 
+v3_hvm_ros_user_low_level.o: v3_hvm_ros_user_low_level.S
+       gcc -Wall -c v3_hvm_ros_user_low_level.S
+
 
 test: test.c libv3_hvm_ros_user.a v3_hvm_ros_user.h
        gcc -Wall -static test.c -L. -lv3_hvm_ros_user -o test
 
 clean:
-       rm -f libv3_hvm_ros_user.a v3_hvm_ros_user.o test
+       rm -f libv3_hvm_ros_user.a v3_hvm_ros_user.o v3_hvm_ros_user_low_level.o test
index e6e14bf..2781610 100644 (file)
@@ -188,6 +188,14 @@ int timing_test_async(uint64_t num_merge, uint64_t num_call)
     return 0;
 }
 
+#define HVM_SIG_STACK_SIZE 8192
+static char hvm_sig_stack[HVM_SIG_STACK_SIZE] __attribute__((aligned(4096)));
+
+static void my_hvm_sig_handler(uint64_t code)
+{
+    printf("HVM Signal Handler Invoked: code=0x%lx\n",code);
+}
+
 int main(int argc, char *argv[]) 
 {
     int rc;
@@ -198,7 +206,10 @@ int main(int argc, char *argv[])
     }
     
     v3_hvm_ros_user_init();
-    
+
+    v3_hvm_ros_unregister_signal(); // remove any existing signal handler
+    v3_hvm_ros_register_signal(my_hvm_sig_handler,hvm_sig_stack,HVM_SIG_STACK_SIZE);
+
     if (argv[1][0]=='s') {
        if (argv[2][0]=='s') { 
            rc=simple_test_sync();
@@ -222,6 +233,8 @@ int main(int argc, char *argv[])
        rc=-1;
     }
     
+    v3_hvm_ros_unregister_signal();
+
     v3_hvm_ros_user_deinit();
 
     return rc;
index 345f8cb..c703ec1 100644 (file)
@@ -9,6 +9,7 @@
 #include <stdio.h>
 #include <string.h>
 
+#include <sys/mman.h>
 
 #include "v3_hvm_ros_user.h"
 
@@ -480,3 +481,87 @@ int v3_hvm_ros_invoke_hrt_sync(void *buf, int ros)
     return 0;
 }
 
+extern void *__v3_hvm_ros_signal_handler_stub;
+
+void (*__v3_hvm_ros_signal_handler)(uint64_t) = 0;
+static void *__v3_hvm_ros_signal_handler_stack = 0;
+static uint64_t __v3_hvm_ros_signal_handler_stack_size=0;
+
+int v3_hvm_ros_register_signal(void (*h)(uint64_t), void *stack, uint64_t stack_size )
+{
+    unsigned long long rc, num, a1=0, a2=0, a3=0, a4=0, a5=0, a6=0, a7=0, a8=0;
+
+    if (mlock(stack,stack_size)) { 
+       INFO("Can't pin stack - proceeding\n");
+    }
+
+    // clear it and touch it
+    memset(stack,0,stack_size);
+
+    __v3_hvm_ros_signal_handler_stack = stack;
+    __v3_hvm_ros_signal_handler_stack_size = stack_size;
+    __v3_hvm_ros_signal_handler = h;
+
+    // Attempt to install
+    
+    num = 0xf00d;
+    a1 = 0x40;
+    a2 = (unsigned long long) &__v3_hvm_ros_signal_handler_stub;
+    a3 = (unsigned long long) stack + stack_size - 8; // put us at the top of the stack
+
+    HCALL(rc,num,a1,a2,a3,a4,a5,a6,a7,a8);
+    if (rc) {
+       INFO("Failed to install HVM signal handler\n");
+       return -1;
+    } 
+
+    return 0;
+}
+
+int v3_hvm_ros_unregister_signal()
+{
+    unsigned long long rc, num, a1=0, a2=0, a3=0, a4=0, a5=0, a6=0, a7=0, a8=0;
+
+    // an unregister boils down to setting handler to null
+    num = 0xf00d;
+    a1 = 0x40;
+    a2 = (unsigned long long) 0;
+    a3 = (unsigned long long) 0; 
+
+    HCALL(rc,num,a1,a2,a3,a4,a5,a6,a7,a8);
+
+    if (rc) {
+       INFO("Failed to uninstall HVM signal handler\n");
+    } 
+
+    // and now do user space cleanup
+
+    __v3_hvm_ros_signal_handler = 0;
+
+    if (__v3_hvm_ros_signal_handler_stack) { 
+       munlock(__v3_hvm_ros_signal_handler_stack,__v3_hvm_ros_signal_handler_stack_size);
+       __v3_hvm_ros_signal_handler_stack = 0;
+       __v3_hvm_ros_signal_handler_stack_size = 0;
+    }
+    
+    if (rc) { 
+       return -1;
+    } else {
+       return 0;
+    }
+}
+
+
+int  v3_hvm_hrt_signal_ros(uint64_t code)
+{
+    unsigned long long rc, num, a1=0, a2=0, a3=0, a4=0, a5=0, a6=0, a7=0, a8=0;
+
+    num = 0xf00d;
+    a1 = 0x41;
+    a2 = (unsigned long long) code;
+
+    HCALL(rc,num,a1,a2,a3,a4,a5,a6,a7,a8);
+
+    return rc ? -1 : rc;
+
+}
index 96c8262..de3b2e1 100644 (file)
 int v3_hvm_ros_user_init();
 int v3_hvm_ros_user_deinit();
 
+// Establish function to be invoked by the VMM
+// to signal activity (basically an interrupt handler)
+// The handler can use the GPRs, but must save/restore
+// any other registers it needs itself.  If it goes
+// out of its stack, it's out of luck
+int v3_hvm_ros_register_signal(void (*handler)(uint64_t), void *stack, uint64_t stack_size); 
+int v3_hvm_ros_unregister_signal();
 
 // Replace the existing HRT with a new one
 //  - this does not boot the new HRT
@@ -43,5 +50,8 @@ int  v3_hvm_ros_synchronize();
 int  v3_hvm_ros_invoke_hrt_sync(void *p, int handle_ros_events);
 int  v3_hvm_ros_desynchronize();
 
+// Signal the HRT from the ROS
+// The ROS can call this too, but it shouldn't be necesary
+int  v3_hvm_hrt_signal_ros(uint64_t code);
 
 #endif
diff --git a/guest/linux/hvm-ros/v3_hvm_ros_user_low_level.S b/guest/linux/hvm-ros/v3_hvm_ros_user_low_level.S
new file mode 100644 (file)
index 0000000..3cbcc07
--- /dev/null
@@ -0,0 +1,88 @@
+.section .text
+       
+.extern __v3_hvm_ros_signal_handler  
+
+/* 
+   This is the entry point for signal dispatch
+   from the VMM.
+       
+   VMM invokes this stub when a user signal is
+   raised and: the relevant address space
+   is active, and we are at user-level.
+   It will be invoked  on exactly one core,  
+   although there are  no guarantees on which one.
+   
+*/
+
+
+#define GPR_SAVE()                                               \
+       pushq %rbp              ;                                \
+       pushq %rax              ;                                \
+       pushq %rbx              ;                                \
+       pushq %rcx              ;                                \
+       pushq %rdx              ;                                \
+       pushq %rsi              ;                                \
+       pushq %rdi              ;                                \
+       pushq %r8               ;                                \
+       pushq %r9               ;                                \
+       pushq %r10              ;                                \
+       pushq %r11              ;                                \
+       pushq %r12              ;                                \
+       pushq %r13              ;                                \
+       pushq %r14              ;                                \
+       pushq %r15              ;                                \
+
+#define GPR_LOAD()                       \
+       popq %r15               ;                                \
+       popq %r14               ;                                \
+       popq %r13               ;                                \
+       popq %r12               ;                                \
+       popq %r11               ;                                \
+       popq %r10               ;                                \
+       popq %r9                ;                                \
+       popq %r8                ;                                \
+       popq %rdi               ;                                \
+       popq %rsi               ;                                \
+       popq %rdx               ;                                \
+       popq %rcx               ;                                \
+       popq %rbx               ;                                \
+       popq %rax               ;                                \
+       popq %rbp               ;          
+
+       
+/*
+   The VMM puts us here with what looks like a long mode 
+   interrupt dispatch, but it's from CPL 3 to CPL 3 and
+   it's not done as an interrupt injection per se, so
+   we can be interrupted, and deliverable interrupts
+   automatically prioritize over us.
+
+        48bitsblank | return SS (16 bits)
+       Return RSP
+       Return RFLAGS
+       48bitsblank | return CS (16 bits)
+       Return RIP
+       ERROR CODE (HVM-specific non-zero number here)  <- RSP on entry
+       
+       We then need simply to do this:
+
+       save regs
+       call the handler (if installed)
+       restore regs
+       iret
+       
+*/
+
+.global __v3_hvm_ros_signal_handler_stub
+__v3_hvm_ros_signal_handler_stub:
+       GPR_SAVE()
+       movabs __v3_hvm_ros_signal_handler, %rax
+       testq %rax, %rax
+       jz skip_handler
+       movq 120(%rsp), %rdi /* error code becomes argument */
+       callq *%rax     
+skip_handler:  
+       GPR_LOAD()
+       addq $8, %rsp /* nuke the error code */
+       iretq
+       
index 9d441d6..48b0a3a 100644 (file)
@@ -41,6 +41,21 @@ struct v3_ros_event {
     };
 };
 
+struct v3_ros_signal { 
+    // swapped atomically at entry check (xchg)
+    // so only one core does entry
+    // code = 0 => no signal is pending
+    uint64_t code;
+
+    // ROS process context we inject to
+    // if any of these are zero, no injection happens
+    // it must be the case that the ROS is at CPL 3
+    // and in user-mode for injection to occur
+    uint64_t cr3;
+    uint64_t handler;
+    uint64_t stack;
+};
+
 struct v3_vm_hvm {
     uint8_t   is_hvm;
     uint32_t  first_hrt_core;
@@ -49,6 +64,7 @@ struct v3_vm_hvm {
     void      *hrt_image;          // image provided by ROS, if any
     uint64_t  hrt_image_size;      //   size of this image
     uint64_t  hrt_entry_addr;
+
     enum { HRT_BLOB, HRT_ELF64, HRT_MBOOT2, HRT_MBOOT64 } hrt_type;
 
     // The following parallel the content of mb_info_hrt_t in
@@ -71,6 +87,9 @@ struct v3_vm_hvm {
     // the ROS event to be handed back
     struct v3_ros_event ros_event;
 
+    // user-level interrupt injection state for ROS
+    struct v3_ros_signal ros_signal;
+
 };
 
 struct v3_core_hvm {
@@ -114,8 +133,14 @@ int v3_build_hrt_multiboot_tag(struct guest_info *core, mb_info_hrt_t *hrt);
 int v3_setup_hvm_vm_for_boot(struct v3_vm_info *vm);
 int v3_setup_hvm_hrt_core_for_boot(struct guest_info *core);
 
+// 0 is not a valid code
+int v3_hvm_signal_ros(struct v3_vm_info *vm, uint64_t code);
+
 int v3_handle_hvm_reset(struct guest_info *core);
 
+int v3_handle_hvm_entry(struct guest_info *core);
+int v3_handle_hvm_exit(struct guest_info *core);
+
 /*
   HVM/HRT interaction is as follows:
 
@@ -152,13 +177,19 @@ int v3_handle_hvm_reset(struct guest_info *core);
          - flags copied from the HRT's HRT tag (position independence, 
            page table model, offset, etc)
   4. Downcalls:
-         hypercall 0xf00df00d with arguments depending on operation
-         with examples described below.
+         hypercall 0xf00d with arguments depending on operation
+         with examples described below.  Some requests are only
+         allowed from an HRT core (or ROS core).   rax is set to -1
+         on error.
   5. Upcalls
-         interrupt injected by VMM or a magic #PF
-         communication via a shared memory page, contents below
+         (To HRT) interrupt injected by VMM or a magic #PF
+                  info via a shared memory page, contents below
+         (To ROS) ROS *app* can set itself up to receive a 
+                 *user-level* "interrupt" manufactured by the VMM
+                  our user library automates this, making it look
+                  sort of like a signal handler
 
-  Upcalls
+  Upcalls to HRT
 
    Type of upcall is determined by the first 64 bits in the commm page
 
@@ -184,7 +215,7 @@ int v3_handle_hvm_reset(struct guest_info *core);
    0x31 =>  Unmerge address space
             return the ROS memory mapping to normal (physical/virtual identity)
 
-  Downcalls
+  Downcalls from ROS or HRT
 
    HVM_HCALL is the general hypercall number used to talk to the HVM
      The first argument is the request number (below).   The other arguments
@@ -220,6 +251,16 @@ int v3_handle_hvm_reset(struct guest_info *core);
             release any address space merger and restore identity mapping
    0x3f =>  Merge request complete (HRT->ROS)
 
+   0x40 =>  Install user-mode interrupt/signal handler (ROS)
+            arg1 = handler, arg2 = stack
+
+   0x41 =>  Signal ROS handler (HRT->ROS)
+            arg1 = number (must != 0)
+
+   Upcalls to ROS
+   
+   (Currently all are application/HRT dependent)
+
 */     
      
 
index 7be8f8e..28c8a70 100644 (file)
@@ -799,6 +799,10 @@ int v3_svm_enter(struct guest_info * info) {
     v3_mem_track_entry(info);
 #endif 
 
+#ifdef V3_CONFIG_HVM
+    v3_handle_hvm_entry(info);
+#endif
+
     // Update timer devices after being in the VM before doing 
     // IRQ updates, so that any interrupts they raise get seen 
     // immediately.
@@ -1005,6 +1009,10 @@ int v3_svm_enter(struct guest_info * info) {
        v3_handle_timeouts(info, guest_cycles);
     }
 
+#ifdef V3_CONFIG_HVM
+    v3_handle_hvm_exit(info);
+#endif 
+
 #ifdef V3_CONFIG_MEM_TRACK
     v3_mem_track_exit(info);
 #endif 
index 3818663..43e7c33 100644 (file)
@@ -444,6 +444,40 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi
            }
                    
            break;
+           
+       case 0x40: // install or remove signal handler
+           if (v3_is_hvm_hrt_core(core)) { 
+               PrintError(core->vm_info,core, "hvm: HRT cannot install signal handler...\n");
+               core->vm_regs.rax=-1;
+           } else {
+               PrintDebug(core->vm_info,core,"hvm: install signal handler for CR3=%p, handler=%p, stack=%p\n",(void*)core->ctrl_regs.cr3, (void*)a2, (void*)a3);
+               if (h->ros_signal.code) { 
+                   PrintError(core->vm_info,core,"hvm: signal is pending...\n");
+                   core->vm_regs.rax=-1;
+               } else {
+                   if ((a2 || a3) && (h->ros_signal.handler || h->ros_signal.stack)) { 
+                       PrintError(core->vm_info,core,"hvm: attempt to replace existing handler without removing it first\n");
+                       core->vm_regs.rax=-1;
+                   } else {
+                       // actually make the change
+                       h->ros_signal.handler=a2;
+                       h->ros_signal.stack=a3;
+                       h->ros_signal.cr3=core->ctrl_regs.cr3;
+                       core->vm_regs.rax=0;
+
+                       // test by signalling back a hello 
+                       // if (a2 && a3) { 
+                       //    v3_hvm_signal_ros(core->vm_info,0xf00d);
+                       //}
+                   }
+               }
+           }
+           break;
+
+       case 0x41: // raise signal in the ROS from HRT or ROS
+           PrintDebug(core->vm_info,core,"hvm: HRT raises signal code=0x%llx\n", a2);
+           core->vm_regs.rax = v3_hvm_signal_ros(core->vm_info,a2);
+           break;
 
        default:
            PrintError(core->vm_info,core,"hvm: unknown hypercall %llx\n",a1);
@@ -1805,3 +1839,111 @@ int v3_handle_hvm_reset(struct guest_info *core)
        return 0;
     }
 }
+
+
+int v3_handle_hvm_entry(struct guest_info *core)
+{
+    if (!core->vm_info->hvm_state.is_hvm        // not relevant to non-HVM
+       || core->hvm_state.is_hrt              // not relevant to an HRT in an HVM
+       || !core->vm_info->hvm_state.ros_signal.code) { // not relevant if there is no code to inject
+
+       // Note that above check for code could race with a writer, but
+       // if that happens, we'll simply inject at the next opportunity instead of 
+       // this one (see below for atomic update)
+       return 0;
+    } else {
+       struct v3_ros_signal *s = &core->vm_info->hvm_state.ros_signal;
+
+       // HVM ROS
+       if (! (s->handler && // handler installed
+              s->cr3 &&     // process installed
+              s->stack &&   // stack installed
+              core->cpl == 3 &&  // user mode
+              core->ctrl_regs.cr3 == s->cr3) // right process active
+           ) {
+           // Cannot inject at this time
+           return 0;
+       } else {
+           // We can inject now, let's atomically see if we have something
+           // and commit to doing it if we do
+           uint64_t code;
+
+           // Get code, reset to allow next one
+           code = __sync_fetch_and_and(&(s->code), 0);
+
+           if (!code) { 
+               // nothing to do after all
+               return 0;
+           } else {
+
+               // actually do inject
+
+               uint64_t rsp;
+               uint64_t frame[6];
+               
+               PrintDebug(core->vm_info,core,"hvm: ROS interrupt starting with rip=%p rsp=%p\n", (void*) core->rip, (void*) core->vm_regs.rsp);
+               // build interrupt frame
+               frame[0] = code;
+               frame[1] = core->rip;
+               frame[2] = core->segments.cs.selector; // return cs
+               frame[3] = core->ctrl_regs.rflags;
+               frame[4] = core->vm_regs.rsp;
+               frame[5] = core->segments.ss.selector; // return ss
+               
+               rsp = (s->stack - 8) & (~0x7); // make sure we are aligned
+               rsp -= sizeof(frame);
+               
+
+               if (v3_write_gva_memory(core,(addr_t)rsp,sizeof(frame),(uint8_t*)frame)!=sizeof(frame)) { 
+                   PrintError(core->vm_info,core,"hvm: failed to write interrupt frame\n");
+                   // we just lost this inject
+                   return -1;
+               }
+               
+               // now make us look like we are jumping to the entry
+               core->rip = s->handler;
+               core->vm_regs.rsp = rsp;
+
+               PrintDebug(core->vm_info,core,"hvm: ROS frame is 0x%llx|0x%llx|0x%llx|0x%llx|0x%llx|0x%llx and and on entry rip=%p and rsp=%p\n", frame[0],frame[1],frame[2],frame[3],frame[4],frame[5],(void*) core->rip, (void*) core->vm_regs.rsp);
+               
+               // and we should be good to go
+               return 0;
+           } 
+       }
+    }
+}
+
+int v3_handle_hvm_exit(struct guest_info *core)
+{
+    // currently nothing
+    return 0;
+}
+
+int v3_hvm_signal_ros(struct v3_vm_info *vm, uint64_t code)
+{
+    struct v3_ros_signal *s = &vm->hvm_state.ros_signal;
+
+    if (!code) { 
+       PrintError(vm,VCORE_NONE,"hvm: cannot signal ros with code zero\n");
+       return -1;
+    }
+
+    // handler, etc, must exist
+    if (!s->handler || !s->stack) { 
+       PrintError(vm,VCORE_NONE,"hvm: cannot signal ros with no installed handler\n");
+       return -1;
+    } else {
+       // we set the code only if we are idle (code 0), 
+       // and we do so only 
+       if (!__sync_bool_compare_and_swap(&(s->code), 0, code)) {
+           PrintError(vm,VCORE_NONE,"hvm: signal was already asserted\n");
+           return -1;
+       } else {
+           PrintDebug(vm,VCORE_NONE,"hvm: raised signal 0x%llx to the ROS\n",code);
+           return 0;
+       }
+    }
+}
+
+
+       
index 2984de8..f45ffa3 100644 (file)
@@ -994,6 +994,10 @@ int v3_vmx_enter(struct guest_info * info) {
     v3_mem_track_entry(info);
 #endif 
 
+#ifdef V3_CONFIG_HVM
+    v3_handle_hvm_entry(info);
+#endif
+
     // Update timer devices late after being in the VM so that as much 
     // of the time in the VM is accounted for as possible. Also do it before
     // updating IRQ entry state so that any interrupts the timers raise get 
@@ -1196,6 +1200,10 @@ int v3_vmx_enter(struct guest_info * info) {
        v3_handle_timeouts(info, guest_cycles);
     }
 
+#ifdef V3_CONFIG_HVM
+    v3_handle_hvm_exit(info);
+#endif
+
 #ifdef V3_CONFIG_MEM_TRACK
     v3_mem_track_exit(info);
 #endif