From: Kyle Hale Date: Fri, 13 Apr 2012 21:33:34 +0000 (-0500) Subject: Gears Fast System Call Exiting Utility Service X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=08c7d14f027c317dc643eacc1a3687745de9b4f9;p=palacios.releases.git Gears Fast System Call Exiting Utility Service --- diff --git a/gears/services/fsceu/Makefile b/gears/services/fsceu/Makefile new file mode 100644 index 0000000..7cdca85 --- /dev/null +++ b/gears/services/fsceu/Makefile @@ -0,0 +1,11 @@ +KDIR=/home/kch479/kyle_guest/kyle_gl +obj-m += fsceu.o +fsceu-objs := syscall_decode.o syscall.o + +syscall_decode.ko: syscall_decode.c syscall_decode.h syscall.S + make -C $(KDIR) M=$(PWD) modules + +clean: + rm *.o *.ko + make -C $(KDIR) M=$(PWD) clean + diff --git a/gears/services/fsceu/README b/gears/services/fsceu/README new file mode 100644 index 0000000..618a4fd --- /dev/null +++ b/gears/services/fsceu/README @@ -0,0 +1,4 @@ +This is a module that should be injected into the guest to +allow selective system call exiting. FSCEU stands for Fast System-Call Exiting +Utility. Once enabled, only the system call vectors enabled in an in-memory byte array +will cause VM exits. diff --git a/gears/services/fsceu/syscall.S b/gears/services/fsceu/syscall.S new file mode 100644 index 0000000..6405cb3 --- /dev/null +++ b/gears/services/fsceu/syscall.S @@ -0,0 +1,42 @@ +/* Kyle C. Hale 2011 */ + +#include "syscall_decode.h" + +.text + +/* Because SYSCALL doesn't put a kernel stack in place for us, we have to jump + * through some hoops. Linux uses the nifty swapgs instruction to pull + * a pointer to its data structures and replace it with the user gs (hence the + * name). The problem is that the kernel stack is at a fixed offset from the + * kernel gs, but in this module we don't have access to that offset (unless we + * can maybe find it through a symbol lookup, but I wanted to keep things + * compact). So, this module allocates 2 pages to use as a personal kernel stack. + * This should be enough because interrupts are off and since the code is small, + * I only expect a few page faults. + */ + +/* You might be wondering, "he said interrupts are off, but I don't see a cli!" + * Well, it's because Linux sets the SFMask MSR such that when SYSCALL + * is invoked (how we got here), the IF flag is cleared. The linux SYSCALL + * entry point later enables them. We won't bother. It's just asking for trouble. + */ + +ENTRY(syscall_stub) + pushq %rdi; /* this is bad, shouldn't be using user-stack, any ideas? */ + movq state_save_area, %rdi; + popq (%rdi); + pushq SYSCALL_ENTRY_OFFSET(%rdi); + SAVE_ALL + leaq SYSCALL_ENTRY_OFFSET(%rdi), %rsp; /* create our own little kernel stack*/ + + movq syscall_map, %rsi; + leaq (%rsi,%rax,1), %rsi; + cmpb $0x0, (%rsi); + je sysentry; + mov $SYSCALL_DISPATCH_HCALL, %eax; + vmmcall; + +sysentry: + RESTORE_ALL + movq (%rdi), %rdi; + retq; diff --git a/gears/services/fsceu/syscall_decode.c b/gears/services/fsceu/syscall_decode.c new file mode 100644 index 0000000..48b1d20 --- /dev/null +++ b/gears/services/fsceu/syscall_decode.c @@ -0,0 +1,79 @@ +/* + * Kyle C. Hale 2012 + * Module to be injected into guest kernel to enable + * selective system call exiting + */ +#include +#include +#include +#include + +#include "syscall_decode.h" + +#define AUTHOR "Kyle C. Hale " +#define INFO "This kernel module is a paravirtualized module that will"\ + "reroute system calls to a handler stub. This stub will decide"\ + "based on a VMM-mapped vector whether or not the particular system call"\ + "should trap to the VMM." + + +extern void syscall_stub(void); + +uint64_t * state_save_area; +uint8_t * syscall_map; + +int init_module (void) { + uint64_t ret; + + state_save_area = kmalloc(sizeof(uint64_t)*(PAGE_SIZE), GFP_KERNEL); + if (!state_save_area){ + printk("Problem allocating sate save area\n"); + return -1; + } + memset(state_save_area, 0, sizeof(uint64_t)*(PAGE_SIZE)); + + syscall_map = kmalloc(NUM_SYSCALLS, GFP_KERNEL); + if (!syscall_map) { + printk("Problem allocating syscall map\n"); + return -1; + } + memset(syscall_map, 0, NUM_SYSCALLS); + + // vmm will return -1 on error, address of syscall_entry on success + asm volatile ("vmmcall" + : "=a" (ret) + : "0" (SYSCALL_SETUP_HCALL), "b" (syscall_stub), "c" (syscall_map), + "d" (state_save_area)); + + if (ret < 0) { + printk("syscall_decode: problem initing selective syscall exiting\n"); + return -1; + } else { + state_save_area[NUM_SAVE_REGS] = ret; + } + + printk("syscall_decode: inited\n"); + return 0; +} + + +void cleanup_module (void) { + int ret; + kfree(state_save_area); + kfree(syscall_map); + /* tell Palacios to restore the original system call entry point */ + asm volatile ("vmmcall" + : "=a" (ret) + : "0"(SYSCALL_CLEANUP_HCALL)); + if (ret < 0) { + printk("syscall_decode: problem deiniting selective syscall exiting\n"); + } + + printk("syscall_page: deinited\n"); +} + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR(AUTHOR); +MODULE_VERSION("0.2"); +MODULE_DESCRIPTION(INFO); + diff --git a/gears/services/fsceu/syscall_decode.h b/gears/services/fsceu/syscall_decode.h new file mode 100644 index 0000000..4b43739 --- /dev/null +++ b/gears/services/fsceu/syscall_decode.h @@ -0,0 +1,69 @@ +#ifndef __SYSCALL_DECODE__ +#define __SYSCALL_DECODE__ + +// hypercall numbers +#define SYSCALL_DISPATCH_HCALL 0x5CA11 +#define SYSCALL_SETUP_HCALL 0x5CA12 +#define SYSCALL_CLEANUP_HCALL 0x5CA13 + +#define NUM_SYSCALLS 256 + +#define NUM_SAVE_REGS 16 +#define SYSCALL_ENTRY_OFFSET (NUM_SAVE_REGS*8) + +#ifdef __ASSEMBLY__ + +#define SAVE_ALL \ + movq %rsi, 8(%rdi); \ + movq %rbp, 16(%rdi); \ + movq %rsp, 24(%rdi); \ + movq %rbx, 32(%rdi); \ + movq %rdx, 40(%rdi); \ + movq %rcx, 48(%rdi); \ + movq %rax, 56(%rdi); \ + movq %r8, 64(%rdi); \ + movq %r9, 72(%rdi); \ + movq %r10, 80(%rdi); \ + movq %r11, 88(%rdi); \ + movq %r12, 96(%rdi); \ + movq %r13, 104(%rdi); \ + movq %r14, 112(%rdi); \ + movq %r15, 120(%rdi); \ + +#define RESTORE_ALL \ + movq 8(%rdi), %rsi; \ + movq 16(%rdi), %rbp; \ + movq 24(%rdi), %rsp; \ + movq 32(%rdi), %rbx; \ + movq 40(%rdi), %rdx; \ + movq 48(%rdi), %rcx; \ + movq 56(%rdi), %rax; \ + movq 64(%rdi), %r8; \ + movq 72(%rdi), %r9; \ + movq 80(%rdi), %r10; \ + movq 88(%rdi), %r11; \ + movq 96(%rdi), %r12; \ + movq 104(%rdi),%r13; \ + movq 112(%rdi),%r14; \ + movq 120(%rdi),%r15; \ + + +/* align on word boundary with nops */ +#define ALIGN .align 8, 0x90 + +#ifndef ENTRY + +#define ENTRY(name) \ + .global name; \ + ALIGN; \ + name: \ + +#endif + + +#else + +#include + +#endif +#endif