Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Gears Fast System Call Exiting Utility Service
Kyle Hale [Fri, 13 Apr 2012 21:33:34 +0000 (16:33 -0500)]
gears/services/fsceu/Makefile [new file with mode: 0644]
gears/services/fsceu/README [new file with mode: 0644]
gears/services/fsceu/syscall.S [new file with mode: 0644]
gears/services/fsceu/syscall_decode.c [new file with mode: 0644]
gears/services/fsceu/syscall_decode.h [new file with mode: 0644]

diff --git a/gears/services/fsceu/Makefile b/gears/services/fsceu/Makefile
new file mode 100644 (file)
index 0000000..7cdca85
--- /dev/null
@@ -0,0 +1,11 @@
+KDIR=/home/kch479/kyle_guest/kyle_gl
+obj-m += fsceu.o
+fsceu-objs := syscall_decode.o syscall.o
+
+syscall_decode.ko: syscall_decode.c syscall_decode.h syscall.S
+       make -C $(KDIR) M=$(PWD) modules
+
+clean: 
+       rm *.o *.ko
+       make -C $(KDIR) M=$(PWD) clean
+
diff --git a/gears/services/fsceu/README b/gears/services/fsceu/README
new file mode 100644 (file)
index 0000000..618a4fd
--- /dev/null
@@ -0,0 +1,4 @@
+This is a module that should be injected into the guest to 
+allow selective system call exiting. FSCEU stands for Fast System-Call Exiting 
+Utility. Once enabled, only the system call vectors enabled in an in-memory byte array
+will cause VM exits.
diff --git a/gears/services/fsceu/syscall.S b/gears/services/fsceu/syscall.S
new file mode 100644 (file)
index 0000000..6405cb3
--- /dev/null
@@ -0,0 +1,42 @@
+/* Kyle C. Hale 2011 */
+
+#include "syscall_decode.h"
+
+.text 
+
+/* Because SYSCALL doesn't put a kernel stack in place for us, we have to jump
+ * through some hoops. Linux uses the nifty swapgs instruction to pull
+ * a pointer to its data structures and replace it with the user gs (hence the
+ * name). The problem is that the kernel stack is at a fixed offset from the
+ * kernel gs, but in this module we don't have access to that offset (unless we
+ * can maybe find it through a symbol lookup, but I wanted to keep things
+ * compact). So, this module allocates 2 pages to use as a personal kernel stack.
+ * This should be enough because interrupts are off and since the code is small,
+ * I only expect a few page faults.
+ */
+
+/* You might be wondering, "he said interrupts are off, but I don't see a cli!"
+ * Well, it's because Linux sets the SFMask MSR such that when SYSCALL
+ * is invoked (how we got here), the IF flag is cleared. The linux SYSCALL
+ * entry point later enables them. We won't bother. It's just asking for trouble.
+ */
+
+ENTRY(syscall_stub)
+    pushq %rdi;  /* this is bad, shouldn't be using user-stack, any ideas? */
+    movq state_save_area, %rdi;
+    popq (%rdi);
+    pushq SYSCALL_ENTRY_OFFSET(%rdi);
+    SAVE_ALL 
+    leaq SYSCALL_ENTRY_OFFSET(%rdi), %rsp; /* create our own little kernel stack*/
+
+    movq syscall_map, %rsi;   
+    leaq (%rsi,%rax,1), %rsi;
+    cmpb $0x0, (%rsi);
+    je sysentry;
+    mov $SYSCALL_DISPATCH_HCALL, %eax;
+    vmmcall;
+
+sysentry:
+    RESTORE_ALL
+    movq (%rdi), %rdi;
+    retq;
diff --git a/gears/services/fsceu/syscall_decode.c b/gears/services/fsceu/syscall_decode.c
new file mode 100644 (file)
index 0000000..48b1d20
--- /dev/null
@@ -0,0 +1,79 @@
+/* 
+ *   Kyle C. Hale 2012
+ * Module to be injected into guest kernel to enable
+ *  selective system call exiting
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include "syscall_decode.h"
+
+#define AUTHOR "Kyle C. Hale <kh@u.northwestern.edu>"
+#define INFO "This kernel module is a paravirtualized module that will"\
+              "reroute system calls to a handler stub. This stub will decide"\
+              "based on a VMM-mapped vector whether or not the particular system call"\
+              "should trap to the VMM."
+
+
+extern void syscall_stub(void);
+
+uint64_t * state_save_area;
+uint8_t  * syscall_map;
+
+int init_module (void) {
+    uint64_t ret;
+
+    state_save_area = kmalloc(sizeof(uint64_t)*(PAGE_SIZE), GFP_KERNEL);
+    if (!state_save_area){
+        printk("Problem allocating sate save area\n");
+        return -1;
+    }
+    memset(state_save_area, 0, sizeof(uint64_t)*(PAGE_SIZE));
+
+    syscall_map = kmalloc(NUM_SYSCALLS, GFP_KERNEL);
+    if (!syscall_map) {
+        printk("Problem allocating syscall map\n");
+        return -1;
+    }
+    memset(syscall_map, 0, NUM_SYSCALLS);
+
+    // vmm will return -1 on error, address of syscall_entry on success
+    asm volatile ("vmmcall"
+                : "=a" (ret)
+                : "0" (SYSCALL_SETUP_HCALL), "b" (syscall_stub), "c" (syscall_map), 
+                  "d" (state_save_area));
+
+    if (ret < 0) {
+        printk("syscall_decode: problem initing selective syscall exiting\n");
+        return -1;
+    } else {
+        state_save_area[NUM_SAVE_REGS] = ret; 
+    }
+
+    printk("syscall_decode: inited\n");
+    return 0;
+}
+
+
+void cleanup_module (void) {
+  int ret;
+  kfree(state_save_area);
+  kfree(syscall_map);
+  /* tell Palacios to restore the original system call entry point */
+  asm volatile ("vmmcall"
+                : "=a" (ret)
+                : "0"(SYSCALL_CLEANUP_HCALL));
+  if (ret < 0) {
+    printk("syscall_decode: problem deiniting selective syscall exiting\n");
+  }
+
+  printk("syscall_page: deinited\n");
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR(AUTHOR);
+MODULE_VERSION("0.2");
+MODULE_DESCRIPTION(INFO);
+
diff --git a/gears/services/fsceu/syscall_decode.h b/gears/services/fsceu/syscall_decode.h
new file mode 100644 (file)
index 0000000..4b43739
--- /dev/null
@@ -0,0 +1,69 @@
+#ifndef __SYSCALL_DECODE__
+#define __SYSCALL_DECODE__
+
+// hypercall numbers
+#define SYSCALL_DISPATCH_HCALL 0x5CA11
+#define SYSCALL_SETUP_HCALL    0x5CA12
+#define SYSCALL_CLEANUP_HCALL  0x5CA13
+
+#define NUM_SYSCALLS 256
+
+#define NUM_SAVE_REGS 16
+#define SYSCALL_ENTRY_OFFSET   (NUM_SAVE_REGS*8)
+
+#ifdef __ASSEMBLY__
+
+#define SAVE_ALL            \
+    movq %rsi, 8(%rdi);     \
+    movq %rbp, 16(%rdi);    \
+    movq %rsp, 24(%rdi);    \
+    movq %rbx, 32(%rdi);    \
+    movq %rdx, 40(%rdi);    \
+    movq %rcx, 48(%rdi);    \
+    movq %rax, 56(%rdi);    \
+    movq %r8,  64(%rdi);    \
+    movq %r9,  72(%rdi);    \
+    movq %r10, 80(%rdi);    \
+    movq %r11, 88(%rdi);    \
+    movq %r12, 96(%rdi);    \
+    movq %r13, 104(%rdi);   \
+    movq %r14, 112(%rdi);   \
+    movq %r15, 120(%rdi);   \
+
+#define RESTORE_ALL         \
+    movq 8(%rdi),  %rsi;    \
+    movq 16(%rdi), %rbp;    \
+    movq 24(%rdi), %rsp;    \
+    movq 32(%rdi), %rbx;    \
+    movq 40(%rdi), %rdx;    \
+    movq 48(%rdi), %rcx;    \
+    movq 56(%rdi), %rax;    \
+    movq 64(%rdi), %r8;     \
+    movq 72(%rdi), %r9;     \
+    movq 80(%rdi), %r10;    \
+    movq 88(%rdi), %r11;    \
+    movq 96(%rdi), %r12;    \
+    movq 104(%rdi),%r13;    \
+    movq 112(%rdi),%r14;    \
+    movq 120(%rdi),%r15;    \
+
+
+/* align on word boundary with nops */
+#define ALIGN  .align 8, 0x90
+
+#ifndef ENTRY
+
+#define ENTRY(name) \
+    .global name;   \
+    ALIGN;          \
+    name:           \
+
+#endif
+
+
+#else
+
+#include <linux/types.h>
+
+#endif 
+#endif