From: Peter Dinda Date: Sun, 27 Sep 2015 21:15:11 +0000 (-0500) Subject: HVM capability enhancement: asynchronous upcalls to ROS userspace X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=4e43946f01f687361197dc9571b7df02ae20de30 HVM capability enhancement: asynchronous upcalls to ROS userspace This adds the ability for the HRT, ROS, or other component of the VMM to send what looks like a "user interrupt" (e.g. signal) to a process running in the ROS. This mechanism looks similar to signals: the ROS process installs a handler and stack (our library hides the details), the VMM then dispatches to the handler by switching stacks, building what looks like a CPL 3 -> CPL 3 interrupt frame (with error code) on the selected stack, and then points rip to a userspace wrapper that handles register save/restore + iret, wrapping around a call to the user's handler. Whoever raises the signal supplies the error code, which is then coerced into an argument for the user's handler. Only a single process in the ROS can register a handler at a time. The context includes not only the stack and handler function, but also the CR3 extant at registration time, and CPL 3 at injection time. --- diff --git a/guest/linux/hvm-ros/Makefile b/guest/linux/hvm-ros/Makefile index 96ce1f3..f818a39 100644 --- a/guest/linux/hvm-ros/Makefile +++ b/guest/linux/hvm-ros/Makefile @@ -1,14 +1,17 @@ all: libv3_hvm_ros_user.a test -libv3_hvm_ros_user.a: v3_hvm_ros_user.o - ar ruv libv3_hvm_ros_user.a v3_hvm_ros_user.o +libv3_hvm_ros_user.a: v3_hvm_ros_user.o v3_hvm_ros_user_low_level.o + ar ruv libv3_hvm_ros_user.a v3_hvm_ros_user.o v3_hvm_ros_user_low_level.o v3_hvm_ros_user.o: v3_hvm_ros_user.c v3_hvm_ros_user.h gcc -Wall -c v3_hvm_ros_user.c +v3_hvm_ros_user_low_level.o: v3_hvm_ros_user_low_level.S + gcc -Wall -c v3_hvm_ros_user_low_level.S + test: test.c libv3_hvm_ros_user.a v3_hvm_ros_user.h gcc -Wall -static test.c -L. -lv3_hvm_ros_user -o test clean: - rm -f libv3_hvm_ros_user.a v3_hvm_ros_user.o test + rm -f libv3_hvm_ros_user.a v3_hvm_ros_user.o v3_hvm_ros_user_low_level.o test diff --git a/guest/linux/hvm-ros/test.c b/guest/linux/hvm-ros/test.c index e6e14bf..2781610 100644 --- a/guest/linux/hvm-ros/test.c +++ b/guest/linux/hvm-ros/test.c @@ -188,6 +188,14 @@ int timing_test_async(uint64_t num_merge, uint64_t num_call) return 0; } +#define HVM_SIG_STACK_SIZE 8192 +static char hvm_sig_stack[HVM_SIG_STACK_SIZE] __attribute__((aligned(4096))); + +static void my_hvm_sig_handler(uint64_t code) +{ + printf("HVM Signal Handler Invoked: code=0x%lx\n",code); +} + int main(int argc, char *argv[]) { int rc; @@ -198,7 +206,10 @@ int main(int argc, char *argv[]) } v3_hvm_ros_user_init(); - + + v3_hvm_ros_unregister_signal(); // remove any existing signal handler + v3_hvm_ros_register_signal(my_hvm_sig_handler,hvm_sig_stack,HVM_SIG_STACK_SIZE); + if (argv[1][0]=='s') { if (argv[2][0]=='s') { rc=simple_test_sync(); @@ -222,6 +233,8 @@ int main(int argc, char *argv[]) rc=-1; } + v3_hvm_ros_unregister_signal(); + v3_hvm_ros_user_deinit(); return rc; diff --git a/guest/linux/hvm-ros/v3_hvm_ros_user.c b/guest/linux/hvm-ros/v3_hvm_ros_user.c index 345f8cb..c703ec1 100644 --- a/guest/linux/hvm-ros/v3_hvm_ros_user.c +++ b/guest/linux/hvm-ros/v3_hvm_ros_user.c @@ -9,6 +9,7 @@ #include #include +#include #include "v3_hvm_ros_user.h" @@ -480,3 +481,87 @@ int v3_hvm_ros_invoke_hrt_sync(void *buf, int ros) return 0; } +extern void *__v3_hvm_ros_signal_handler_stub; + +void (*__v3_hvm_ros_signal_handler)(uint64_t) = 0; +static void *__v3_hvm_ros_signal_handler_stack = 0; +static uint64_t __v3_hvm_ros_signal_handler_stack_size=0; + +int v3_hvm_ros_register_signal(void (*h)(uint64_t), void *stack, uint64_t stack_size ) +{ + unsigned long long rc, num, a1=0, a2=0, a3=0, a4=0, a5=0, a6=0, a7=0, a8=0; + + if (mlock(stack,stack_size)) { + INFO("Can't pin stack - proceeding\n"); + } + + // clear it and touch it + memset(stack,0,stack_size); + + __v3_hvm_ros_signal_handler_stack = stack; + __v3_hvm_ros_signal_handler_stack_size = stack_size; + __v3_hvm_ros_signal_handler = h; + + // Attempt to install + + num = 0xf00d; + a1 = 0x40; + a2 = (unsigned long long) &__v3_hvm_ros_signal_handler_stub; + a3 = (unsigned long long) stack + stack_size - 8; // put us at the top of the stack + + HCALL(rc,num,a1,a2,a3,a4,a5,a6,a7,a8); + if (rc) { + INFO("Failed to install HVM signal handler\n"); + return -1; + } + + return 0; +} + +int v3_hvm_ros_unregister_signal() +{ + unsigned long long rc, num, a1=0, a2=0, a3=0, a4=0, a5=0, a6=0, a7=0, a8=0; + + // an unregister boils down to setting handler to null + num = 0xf00d; + a1 = 0x40; + a2 = (unsigned long long) 0; + a3 = (unsigned long long) 0; + + HCALL(rc,num,a1,a2,a3,a4,a5,a6,a7,a8); + + if (rc) { + INFO("Failed to uninstall HVM signal handler\n"); + } + + // and now do user space cleanup + + __v3_hvm_ros_signal_handler = 0; + + if (__v3_hvm_ros_signal_handler_stack) { + munlock(__v3_hvm_ros_signal_handler_stack,__v3_hvm_ros_signal_handler_stack_size); + __v3_hvm_ros_signal_handler_stack = 0; + __v3_hvm_ros_signal_handler_stack_size = 0; + } + + if (rc) { + return -1; + } else { + return 0; + } +} + + +int v3_hvm_hrt_signal_ros(uint64_t code) +{ + unsigned long long rc, num, a1=0, a2=0, a3=0, a4=0, a5=0, a6=0, a7=0, a8=0; + + num = 0xf00d; + a1 = 0x41; + a2 = (unsigned long long) code; + + HCALL(rc,num,a1,a2,a3,a4,a5,a6,a7,a8); + + return rc ? -1 : rc; + +} diff --git a/guest/linux/hvm-ros/v3_hvm_ros_user.h b/guest/linux/hvm-ros/v3_hvm_ros_user.h index 96c8262..de3b2e1 100644 --- a/guest/linux/hvm-ros/v3_hvm_ros_user.h +++ b/guest/linux/hvm-ros/v3_hvm_ros_user.h @@ -12,6 +12,13 @@ int v3_hvm_ros_user_init(); int v3_hvm_ros_user_deinit(); +// Establish function to be invoked by the VMM +// to signal activity (basically an interrupt handler) +// The handler can use the GPRs, but must save/restore +// any other registers it needs itself. If it goes +// out of its stack, it's out of luck +int v3_hvm_ros_register_signal(void (*handler)(uint64_t), void *stack, uint64_t stack_size); +int v3_hvm_ros_unregister_signal(); // Replace the existing HRT with a new one // - this does not boot the new HRT @@ -43,5 +50,8 @@ int v3_hvm_ros_synchronize(); int v3_hvm_ros_invoke_hrt_sync(void *p, int handle_ros_events); int v3_hvm_ros_desynchronize(); +// Signal the HRT from the ROS +// The ROS can call this too, but it shouldn't be necesary +int v3_hvm_hrt_signal_ros(uint64_t code); #endif diff --git a/guest/linux/hvm-ros/v3_hvm_ros_user_low_level.S b/guest/linux/hvm-ros/v3_hvm_ros_user_low_level.S new file mode 100644 index 0000000..3cbcc07 --- /dev/null +++ b/guest/linux/hvm-ros/v3_hvm_ros_user_low_level.S @@ -0,0 +1,88 @@ +.section .text + +.extern __v3_hvm_ros_signal_handler + +/* + This is the entry point for signal dispatch + from the VMM. + + VMM invokes this stub when a user signal is + raised and: the relevant address space + is active, and we are at user-level. + It will be invoked on exactly one core, + although there are no guarantees on which one. + +*/ + + +#define GPR_SAVE() \ + pushq %rbp ; \ + pushq %rax ; \ + pushq %rbx ; \ + pushq %rcx ; \ + pushq %rdx ; \ + pushq %rsi ; \ + pushq %rdi ; \ + pushq %r8 ; \ + pushq %r9 ; \ + pushq %r10 ; \ + pushq %r11 ; \ + pushq %r12 ; \ + pushq %r13 ; \ + pushq %r14 ; \ + pushq %r15 ; \ + +#define GPR_LOAD() \ + popq %r15 ; \ + popq %r14 ; \ + popq %r13 ; \ + popq %r12 ; \ + popq %r11 ; \ + popq %r10 ; \ + popq %r9 ; \ + popq %r8 ; \ + popq %rdi ; \ + popq %rsi ; \ + popq %rdx ; \ + popq %rcx ; \ + popq %rbx ; \ + popq %rax ; \ + popq %rbp ; + + +/* + The VMM puts us here with what looks like a long mode + interrupt dispatch, but it's from CPL 3 to CPL 3 and + it's not done as an interrupt injection per se, so + we can be interrupted, and deliverable interrupts + automatically prioritize over us. + + 48bitsblank | return SS (16 bits) + Return RSP + Return RFLAGS + 48bitsblank | return CS (16 bits) + Return RIP + ERROR CODE (HVM-specific non-zero number here) <- RSP on entry + + We then need simply to do this: + + save regs + call the handler (if installed) + restore regs + iret + +*/ + +.global __v3_hvm_ros_signal_handler_stub +__v3_hvm_ros_signal_handler_stub: + GPR_SAVE() + movabs __v3_hvm_ros_signal_handler, %rax + testq %rax, %rax + jz skip_handler + movq 120(%rsp), %rdi /* error code becomes argument */ + callq *%rax +skip_handler: + GPR_LOAD() + addq $8, %rsp /* nuke the error code */ + iretq + diff --git a/palacios/include/palacios/vmm_hvm.h b/palacios/include/palacios/vmm_hvm.h index 9d441d6..48b0a3a 100644 --- a/palacios/include/palacios/vmm_hvm.h +++ b/palacios/include/palacios/vmm_hvm.h @@ -41,6 +41,21 @@ struct v3_ros_event { }; }; +struct v3_ros_signal { + // swapped atomically at entry check (xchg) + // so only one core does entry + // code = 0 => no signal is pending + uint64_t code; + + // ROS process context we inject to + // if any of these are zero, no injection happens + // it must be the case that the ROS is at CPL 3 + // and in user-mode for injection to occur + uint64_t cr3; + uint64_t handler; + uint64_t stack; +}; + struct v3_vm_hvm { uint8_t is_hvm; uint32_t first_hrt_core; @@ -49,6 +64,7 @@ struct v3_vm_hvm { void *hrt_image; // image provided by ROS, if any uint64_t hrt_image_size; // size of this image uint64_t hrt_entry_addr; + enum { HRT_BLOB, HRT_ELF64, HRT_MBOOT2, HRT_MBOOT64 } hrt_type; // The following parallel the content of mb_info_hrt_t in @@ -71,6 +87,9 @@ struct v3_vm_hvm { // the ROS event to be handed back struct v3_ros_event ros_event; + // user-level interrupt injection state for ROS + struct v3_ros_signal ros_signal; + }; struct v3_core_hvm { @@ -114,8 +133,14 @@ int v3_build_hrt_multiboot_tag(struct guest_info *core, mb_info_hrt_t *hrt); int v3_setup_hvm_vm_for_boot(struct v3_vm_info *vm); int v3_setup_hvm_hrt_core_for_boot(struct guest_info *core); +// 0 is not a valid code +int v3_hvm_signal_ros(struct v3_vm_info *vm, uint64_t code); + int v3_handle_hvm_reset(struct guest_info *core); +int v3_handle_hvm_entry(struct guest_info *core); +int v3_handle_hvm_exit(struct guest_info *core); + /* HVM/HRT interaction is as follows: @@ -152,13 +177,19 @@ int v3_handle_hvm_reset(struct guest_info *core); - flags copied from the HRT's HRT tag (position independence, page table model, offset, etc) 4. Downcalls: - hypercall 0xf00df00d with arguments depending on operation - with examples described below. + hypercall 0xf00d with arguments depending on operation + with examples described below. Some requests are only + allowed from an HRT core (or ROS core). rax is set to -1 + on error. 5. Upcalls - interrupt injected by VMM or a magic #PF - communication via a shared memory page, contents below + (To HRT) interrupt injected by VMM or a magic #PF + info via a shared memory page, contents below + (To ROS) ROS *app* can set itself up to receive a + *user-level* "interrupt" manufactured by the VMM + our user library automates this, making it look + sort of like a signal handler - Upcalls + Upcalls to HRT Type of upcall is determined by the first 64 bits in the commm page @@ -184,7 +215,7 @@ int v3_handle_hvm_reset(struct guest_info *core); 0x31 => Unmerge address space return the ROS memory mapping to normal (physical/virtual identity) - Downcalls + Downcalls from ROS or HRT HVM_HCALL is the general hypercall number used to talk to the HVM The first argument is the request number (below). The other arguments @@ -220,6 +251,16 @@ int v3_handle_hvm_reset(struct guest_info *core); release any address space merger and restore identity mapping 0x3f => Merge request complete (HRT->ROS) + 0x40 => Install user-mode interrupt/signal handler (ROS) + arg1 = handler, arg2 = stack + + 0x41 => Signal ROS handler (HRT->ROS) + arg1 = number (must != 0) + + Upcalls to ROS + + (Currently all are application/HRT dependent) + */ diff --git a/palacios/src/palacios/svm.c b/palacios/src/palacios/svm.c index 7be8f8e..28c8a70 100644 --- a/palacios/src/palacios/svm.c +++ b/palacios/src/palacios/svm.c @@ -799,6 +799,10 @@ int v3_svm_enter(struct guest_info * info) { v3_mem_track_entry(info); #endif +#ifdef V3_CONFIG_HVM + v3_handle_hvm_entry(info); +#endif + // Update timer devices after being in the VM before doing // IRQ updates, so that any interrupts they raise get seen // immediately. @@ -1005,6 +1009,10 @@ int v3_svm_enter(struct guest_info * info) { v3_handle_timeouts(info, guest_cycles); } +#ifdef V3_CONFIG_HVM + v3_handle_hvm_exit(info); +#endif + #ifdef V3_CONFIG_MEM_TRACK v3_mem_track_exit(info); #endif diff --git a/palacios/src/palacios/vmm_hvm.c b/palacios/src/palacios/vmm_hvm.c index 3818663..43e7c33 100644 --- a/palacios/src/palacios/vmm_hvm.c +++ b/palacios/src/palacios/vmm_hvm.c @@ -444,6 +444,40 @@ static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, voi } break; + + case 0x40: // install or remove signal handler + if (v3_is_hvm_hrt_core(core)) { + PrintError(core->vm_info,core, "hvm: HRT cannot install signal handler...\n"); + core->vm_regs.rax=-1; + } else { + PrintDebug(core->vm_info,core,"hvm: install signal handler for CR3=%p, handler=%p, stack=%p\n",(void*)core->ctrl_regs.cr3, (void*)a2, (void*)a3); + if (h->ros_signal.code) { + PrintError(core->vm_info,core,"hvm: signal is pending...\n"); + core->vm_regs.rax=-1; + } else { + if ((a2 || a3) && (h->ros_signal.handler || h->ros_signal.stack)) { + PrintError(core->vm_info,core,"hvm: attempt to replace existing handler without removing it first\n"); + core->vm_regs.rax=-1; + } else { + // actually make the change + h->ros_signal.handler=a2; + h->ros_signal.stack=a3; + h->ros_signal.cr3=core->ctrl_regs.cr3; + core->vm_regs.rax=0; + + // test by signalling back a hello + // if (a2 && a3) { + // v3_hvm_signal_ros(core->vm_info,0xf00d); + //} + } + } + } + break; + + case 0x41: // raise signal in the ROS from HRT or ROS + PrintDebug(core->vm_info,core,"hvm: HRT raises signal code=0x%llx\n", a2); + core->vm_regs.rax = v3_hvm_signal_ros(core->vm_info,a2); + break; default: PrintError(core->vm_info,core,"hvm: unknown hypercall %llx\n",a1); @@ -1805,3 +1839,111 @@ int v3_handle_hvm_reset(struct guest_info *core) return 0; } } + + +int v3_handle_hvm_entry(struct guest_info *core) +{ + if (!core->vm_info->hvm_state.is_hvm // not relevant to non-HVM + || core->hvm_state.is_hrt // not relevant to an HRT in an HVM + || !core->vm_info->hvm_state.ros_signal.code) { // not relevant if there is no code to inject + + // Note that above check for code could race with a writer, but + // if that happens, we'll simply inject at the next opportunity instead of + // this one (see below for atomic update) + return 0; + } else { + struct v3_ros_signal *s = &core->vm_info->hvm_state.ros_signal; + + // HVM ROS + if (! (s->handler && // handler installed + s->cr3 && // process installed + s->stack && // stack installed + core->cpl == 3 && // user mode + core->ctrl_regs.cr3 == s->cr3) // right process active + ) { + // Cannot inject at this time + return 0; + } else { + // We can inject now, let's atomically see if we have something + // and commit to doing it if we do + uint64_t code; + + // Get code, reset to allow next one + code = __sync_fetch_and_and(&(s->code), 0); + + if (!code) { + // nothing to do after all + return 0; + } else { + + // actually do inject + + uint64_t rsp; + uint64_t frame[6]; + + PrintDebug(core->vm_info,core,"hvm: ROS interrupt starting with rip=%p rsp=%p\n", (void*) core->rip, (void*) core->vm_regs.rsp); + // build interrupt frame + frame[0] = code; + frame[1] = core->rip; + frame[2] = core->segments.cs.selector; // return cs + frame[3] = core->ctrl_regs.rflags; + frame[4] = core->vm_regs.rsp; + frame[5] = core->segments.ss.selector; // return ss + + rsp = (s->stack - 8) & (~0x7); // make sure we are aligned + rsp -= sizeof(frame); + + + if (v3_write_gva_memory(core,(addr_t)rsp,sizeof(frame),(uint8_t*)frame)!=sizeof(frame)) { + PrintError(core->vm_info,core,"hvm: failed to write interrupt frame\n"); + // we just lost this inject + return -1; + } + + // now make us look like we are jumping to the entry + core->rip = s->handler; + core->vm_regs.rsp = rsp; + + PrintDebug(core->vm_info,core,"hvm: ROS frame is 0x%llx|0x%llx|0x%llx|0x%llx|0x%llx|0x%llx and and on entry rip=%p and rsp=%p\n", frame[0],frame[1],frame[2],frame[3],frame[4],frame[5],(void*) core->rip, (void*) core->vm_regs.rsp); + + // and we should be good to go + return 0; + } + } + } +} + +int v3_handle_hvm_exit(struct guest_info *core) +{ + // currently nothing + return 0; +} + +int v3_hvm_signal_ros(struct v3_vm_info *vm, uint64_t code) +{ + struct v3_ros_signal *s = &vm->hvm_state.ros_signal; + + if (!code) { + PrintError(vm,VCORE_NONE,"hvm: cannot signal ros with code zero\n"); + return -1; + } + + // handler, etc, must exist + if (!s->handler || !s->stack) { + PrintError(vm,VCORE_NONE,"hvm: cannot signal ros with no installed handler\n"); + return -1; + } else { + // we set the code only if we are idle (code 0), + // and we do so only + if (!__sync_bool_compare_and_swap(&(s->code), 0, code)) { + PrintError(vm,VCORE_NONE,"hvm: signal was already asserted\n"); + return -1; + } else { + PrintDebug(vm,VCORE_NONE,"hvm: raised signal 0x%llx to the ROS\n",code); + return 0; + } + } +} + + + diff --git a/palacios/src/palacios/vmx.c b/palacios/src/palacios/vmx.c index 2984de8..f45ffa3 100644 --- a/palacios/src/palacios/vmx.c +++ b/palacios/src/palacios/vmx.c @@ -994,6 +994,10 @@ int v3_vmx_enter(struct guest_info * info) { v3_mem_track_entry(info); #endif +#ifdef V3_CONFIG_HVM + v3_handle_hvm_entry(info); +#endif + // Update timer devices late after being in the VM so that as much // of the time in the VM is accounted for as possible. Also do it before // updating IRQ entry state so that any interrupts the timers raise get @@ -1196,6 +1200,10 @@ int v3_vmx_enter(struct guest_info * info) { v3_handle_timeouts(info, guest_cycles); } +#ifdef V3_CONFIG_HVM + v3_handle_hvm_exit(info); +#endif + #ifdef V3_CONFIG_MEM_TRACK v3_mem_track_exit(info); #endif