endmenu
-menu "Debug configuration"
+source "palacios/src/gears/Kconfig"
+menu "Debug configuration"
## Is unwind information useful
interfaces-y := palacios/src/interfaces/
extensions-y := palacios/src/extensions/
vnet-y := palacios/src/vnet/
+gears-y := palacios/src/gears/
modules-y := modules/
palacios-dirs := $(patsubst %/,%,$(filter %/, \
- $(core-y) $(devices-y) $(interfaces-y) $(extensions-y) $(vnet-y) $(libs-y)) $(modules-y))
+ $(core-y) $(devices-y) $(interfaces-y) $(extensions-y) $(vnet-y) $(gears-y) $(libs-y)) $(modules-y))
palacios-cleandirs := $(sort $(palacios-dirs) $(patsubst %/,%,$(filter %/, \
$(core-n) $(core-) $(devices-n) $(devices-) \
- $(interfaces-n) $(interfaces-) $(extensions-n) $(extensions-) $(vnet-n) $(vnet-) $(modules-n) $(modules-))))
+ $(interfaces-n) $(interfaces-) $(extensions-n) $(extensions-) $(vnet-n) $(vnet-) $(gears-n) $(gears-) $(modules-n) $(modules-))))
extensions-y := $(patsubst %/, %/built-in.o, $(extensions-y))
libs-y := $(patsubst %/, %/built-in.o, $(libs-y))
vnet-y := $(patsubst %/, %/built-in.o, $(vnet-y))
+gears-y := $(patsubst %/, %/built-in.o, $(gears-y))
modules-y := $(patsubst %/, %/built-in.o, $(modules-y))
#lnxmod-y := $(patsubst %/, %/built-in.o, $(lnxmod-y))
-palacios := $(core-y) $(devices-y) $(interfaces-y) $(extensions-y) $(vnet-y) $(libs-y) $(modules-y)
+palacios := $(core-y) $(devices-y) $(interfaces-y) $(extensions-y) $(vnet-y) $(gears-y) $(libs-y) $(modules-y)
# Rule to link palacios - also used during CONFIG_CONFIGKALLSYMS
--- /dev/null
+#!/usr/bin/perl -w
+
+use Getopt::Long;
+
+&GetOptions(32=>\$m32, 64=>\$m64, "output=s"=>\$mod);
+
+$#ARGV >= 0 or die "usage: compile-for-static-user-level-injection.pl [-32|-64] [--output=module_name] source.c+ [lib.a]*\n";
+
+if (!$m32 && !$m64) {
+ print "Assuming 32 bit. Use -64 to override\n";
+ $m32=1;
+}
+
+if (!$mod) {
+ print "No module name given, assuming a.tooth\n";
+ $mod = "a.tooth";
+}
+
+if ($m32) {
+ $gopt = "-m32";
+ $lopt = "-melf_i386 --oformat elf32-i386";
+}
+
+if ($m64) {
+ $gopt = "-m64";
+ $lopt = "-melf_x86_64 --oformat elf64-x86-64";
+}
+
+
+$linkerscript = <<END;
+SECTIONS
+{
+/* Must be on a page boundary */
+/* Should link like ld -z max-page-size=4096 -T ld.script ... */
+/* If object file is -fPIC, then it shouldn't matter where we load it */
+ . = 0x1000;
+/* Text, data, and bss squished together */
+ .text : { *(.text) }
+ .data : { *(.data) }
+ .bss : { *(.bss) }
+/* Result will be one load group marked RWX */
+}
+END
+
+
+
+@stems=grep(/.*\.c$/,@ARGV);
+@libs=grep(/.*\.a$/,@ARGV);
+
+map { $_ =~ s/\.c$//g} @stems;
+
+print "Compiling...\n";
+foreach $s (@stems) {
+ system("gcc $gopt -fPIE -Wa,-R -c $s.c -nostartfiles -nodefaultlibs -nostdlib -static -o $s.o") == 0
+ or die "Compilation of $s.c failed\n";
+ system("gcc $gopt -fPIE -Wa,-R -S $s.c -nostartfiles -nodefaultlibs -nostdlib -static -o $s.s") == 0
+ or die "Compilation of $s.c failed\n";
+}
+print "Compilation done.\n";
+
+open(W,">.linker_script");
+print W $linkerscript;
+close(W);
+
+print "Linking...\n";
+
+$rc=system("ld $lopt -z max-page-size=4096 -T .linker_script ".join(" ",map { "$_.o" } @stems)." ".join(" ",@libs)." -o $mod\n");
+
+unlink ".linker_script";
+
+$rc==0 or die "Linking of $mod failed\n";
+
+print "Linking of $mod completed. Done.\n";
+
+open(E,"readelf -h $mod |");
+while (<E>) {
+ if (/^\s*Entry point address:\s+(\S+)$/) {
+ print "Entry point relative to beginning of file: $1\n";
+ last;
+ }
+}
+close(E);
+
--- /dev/null
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+/* 32-bit syscall numbers */
+#define __NR_exit 1
+#define __NR_fork 2
+#define __NR_write 4
+#define __NR_open 5
+#define __NR_close 6
+#define __NR_waitpid 7
+#define __NR_execve 11
+
+/* 32-bit system call conventions
+ *
+ * eax = syscall nr
+ * ebx = arg 1
+ * ecx = arg 2
+ * edx = arg 3
+ * esi = arg 4
+ * edi = arg 5
+ * ebp = arg 6
+ */
+int _start() {
+
+ int FD, bytes_written, status, exec_ret;
+ int flags = O_RDWR|O_CREAT;
+ int mode = S_IRUSR|S_IWUSR|S_IXUSR;
+ pid_t pid, ret;
+ char * env[1];
+
+ env[0] = 0;
+
+#include "generated.h"
+
+#ifdef DO_WRITE
+ /* open("FILENAME, O_RDWR | O_CREAT, */
+ asm volatile ("pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx"
+ : "=a" (FD)
+ : "0" (__NR_open), "r" (FILE_NAME), "c" (flags), "d" (mode));
+
+ if (!FD)
+ goto die;
+
+
+ /* write(FD, INJECT_FILE, FILE_LENGTH) */
+ asm volatile ("pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx"
+ : "=a" (bytes_written)
+ : "0" (__NR_write), "r" (FD), "c" (inject_file), "d" (FILE_LENGTH));
+
+ if (!bytes_written)
+ goto die;
+
+
+ /* close(FD) */
+ asm volatile ("pushl %%ebx; movl %1,%%ebx; int $0x80; popl %%ebx"
+ : : "a" (__NR_close), "r" (FD));
+#endif
+
+
+#ifdef DO_FORKEXEC
+ /* pid = fork() */
+ asm volatile ("int $0x80" : "=a" (pid) : "0" (__NR_fork));
+
+
+ if (pid < 0) {
+ goto die;
+ } else if (pid > 0) {
+
+ do {
+ /* ret = waitpid(pid, &status, 0) */
+ asm volatile ("pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx"
+ : "=a" (ret)
+ : "0" (__NR_waitpid), "r" (pid), "c" (&status), "d" (0));
+
+ } while (ret == -1);
+
+ } else {
+
+ /* execve("command", "arg0" , ..., "argN" , env) */
+ asm volatile ("pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx"
+ : "=a" (exec_ret)
+ : "0" (__NR_execve), "r" (CMD), "c" (args), "d" (env));
+
+ if (exec_ret < 0)
+ /* exit(127) */
+ asm volatile ("pushl %%ebx; movl %1,%%ebx; int $0x80; popl %%ebx"
+ : : "a" (__NR_exit), "r" (127));
+ }
+#endif
+
+ die:
+ /* hypercall(f001) <=> exit(0) */
+ asm volatile ("movl $0xf001, %eax");
+ asm volatile ("vmmcall");
+ /* exit(1) */
+ asm volatile ("pushl %%ebx; movl %1,%%ebx; int $0x80; popl %%ebx"
+ : : "a" (__NR_exit), "r" (1));
+}
--- /dev/null
+#! /usr/bin/perl -w
+
+use Getopt::Long;
+
+sub usage() {
+ die "\n\nusage: prepare_inject.pl [-w output_file_name inject_object] [-e command {arg_list} ]\n\n".
+ "You must either indicate to write out an injected file with -w or to execute a command,".
+ " with -e, or both.\n\n".
+ "\t'output_file_name' is what the name of the inject_object will be when it is written out to the guest.\n\n".
+ "\t'inject_object' is the file that will be written out to the guest. This could be a text file, program, or ".
+ "really anything.\n\n".
+ "\t'command' is the fully qualified path name for a file within the guest to execute, either by itself, ".
+ "or after a specified inject_object is written out.\n\n";
+}
+
+&GetOptions("w:s{2}" => \@write_opts, "e:s{,}" => \@exec_opts, "output:s" => \$out_name) or usage();
+
+usage() unless (@exec_opts || @write_opts);
+
+$hfile = <<END;
+#ifndef _GENERATED_H_
+#define _GENERATED_H_
+
+END
+
+
+if (@exec_opts) {
+ $cmd = $exec_opts[0];
+ $hfile .= "#define DO_FORKEXEC\n";
+ $hfile .= "#define CMD \"$cmd\"\n";
+
+ $numargs = scalar(@exec_opts);
+ $hfile .= "char * const args[".$numargs."] = {\"".join('","', @exec_opts)."\"};\n";
+}
+
+
+if (@write_opts) {
+ $out_file = $write_opts[0];
+ $inject_file = $write_opts[1];
+
+ $hfile .= "#define DO_WRITE\n";
+ $hfile .= "#define FILE_NAME \"$out_file\"\n";
+
+
+ $size = `ls -l $inject_file | cut -f5 -d ' '`;
+ $hfile .= "#define FILE_LENGTH $size\n";
+
+ # generate a string from the file, char * inject_file = string
+ open FILE, $inject_file or die $!;
+ binmode FILE;
+ my ($buf, $data, $n);
+ while (($n = read FILE, $data, 1) != 0) {
+ $buf .= "\\x" . unpack("H8", $data);
+ }
+
+ close(FILE);
+ $hfile .= "char * inject_file = \"$buf\";\n\n\n";
+}
+
+$hfile .= "#endif\n";
+
+# write out the h file
+open (W, ">generated.h") or die $!;
+print W $hfile;
+close(W);
+
+print "running special inject code compilation and linking...\n";
+# compile with generated h file and inject_code_template.c with peter's script
+$compile_cmd = "perl compile-for-static-user-level-injection.pl -32 ";
+$compile_cmd .= "--output=$out_name " if defined($out_name);
+$compile_cmd .= "inject_code_template.c";
+system($compile_cmd);
+
+unlink "generated.h";
+
+
+print "All done.\n";
+
--- /dev/null
+KDIR=/home/kch479/kyle_guest/kyle_gl
+obj-m += fsceu.o
+fsceu-objs := syscall_decode.o syscall.o
+
+syscall_decode.ko: syscall_decode.c syscall_decode.h syscall.S
+ make -C $(KDIR) M=$(PWD) modules
+
+clean:
+ rm *.o *.ko
+ make -C $(KDIR) M=$(PWD) clean
+
--- /dev/null
+This is a module that should be injected into the guest to
+allow selective system call exiting. FSCEU stands for Fast System-Call Exiting
+Utility. Once enabled, only the system call vectors enabled in an in-memory byte array
+will cause VM exits.
+
+Note that the Makefile needs to be modified to point to the kernel source
+tree of the *GUEST* kernel.
+
--- /dev/null
+/* Kyle C. Hale 2011 */
+
+#include "syscall_decode.h"
+
+.text
+
+/* Because SYSCALL doesn't put a kernel stack in place for us, we have to jump
+ * through some hoops. Linux uses the nifty swapgs instruction to pull
+ * a pointer to its data structures and replace it with the user gs (hence the
+ * name). The problem is that the kernel stack is at a fixed offset from the
+ * kernel gs, but in this module we don't have access to that offset (unless we
+ * can maybe find it through a symbol lookup, but I wanted to keep things
+ * compact). So, this module allocates 2 pages to use as a personal kernel stack.
+ * This should be enough because interrupts are off and since the code is small,
+ * I only expect a few page faults.
+ */
+
+/* You might be wondering, "he said interrupts are off, but I don't see a cli!"
+ * Well, it's because Linux sets the SFMask MSR such that when SYSCALL
+ * is invoked (how we got here), the IF flag is cleared. The linux SYSCALL
+ * entry point later enables them. We won't bother. It's just asking for trouble.
+ */
+
+ENTRY(syscall_stub)
+ pushq %rdi; /* this is bad, shouldn't be using user-stack, any ideas? */
+ movq state_save_area, %rdi;
+ popq (%rdi);
+ pushq SYSCALL_ENTRY_OFFSET(%rdi);
+ SAVE_ALL
+ leaq SYSCALL_ENTRY_OFFSET(%rdi), %rsp; /* create our own little kernel stack*/
+
+ movq syscall_map, %rsi;
+ leaq (%rsi,%rax,1), %rsi;
+ cmpb $0x0, (%rsi);
+ je sysentry;
+ mov $SYSCALL_DISPATCH_HCALL, %eax;
+ vmmcall;
+
+sysentry:
+ RESTORE_ALL
+ movq (%rdi), %rdi;
+ retq;
--- /dev/null
+/*
+ * Kyle C. Hale 2012
+ * Module to be injected into guest kernel to enable
+ * selective system call exiting
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include "syscall_decode.h"
+
+#define AUTHOR "Kyle C. Hale <kh@u.northwestern.edu>"
+#define INFO "This kernel module is a paravirtualized module that will"\
+ "reroute system calls to a handler stub. This stub will decide"\
+ "based on a VMM-mapped vector whether or not the particular system call"\
+ "should trap to the VMM."
+
+
+extern void syscall_stub(void);
+
+uint64_t * state_save_area;
+uint8_t * syscall_map;
+
+int init_module (void) {
+ uint64_t ret;
+
+ state_save_area = kmalloc(sizeof(uint64_t)*(PAGE_SIZE), GFP_KERNEL);
+ if (!state_save_area){
+ printk("Problem allocating sate save area\n");
+ return -1;
+ }
+ memset(state_save_area, 0, sizeof(uint64_t)*(PAGE_SIZE));
+
+ syscall_map = kmalloc(NUM_SYSCALLS, GFP_KERNEL);
+ if (!syscall_map) {
+ printk("Problem allocating syscall map\n");
+ return -1;
+ }
+ memset(syscall_map, 0, NUM_SYSCALLS);
+
+ // vmm will return -1 on error, address of syscall_entry on success
+ asm volatile ("vmmcall"
+ : "=a" (ret)
+ : "0" (SYSCALL_SETUP_HCALL), "b" (syscall_stub), "c" (syscall_map),
+ "d" (state_save_area));
+
+ if (ret < 0) {
+ printk("syscall_decode: problem initing selective syscall exiting\n");
+ return -1;
+ } else {
+ state_save_area[NUM_SAVE_REGS] = ret;
+ }
+
+ printk("syscall_decode: inited\n");
+ return 0;
+}
+
+
+void cleanup_module (void) {
+ int ret;
+ kfree(state_save_area);
+ kfree(syscall_map);
+ /* tell Palacios to restore the original system call entry point */
+ asm volatile ("vmmcall"
+ : "=a" (ret)
+ : "0"(SYSCALL_CLEANUP_HCALL));
+ if (ret < 0) {
+ printk("syscall_decode: problem deiniting selective syscall exiting\n");
+ }
+
+ printk("syscall_page: deinited\n");
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR(AUTHOR);
+MODULE_VERSION("0.2");
+MODULE_DESCRIPTION(INFO);
+
--- /dev/null
+#ifndef __SYSCALL_DECODE__
+#define __SYSCALL_DECODE__
+
+// hypercall numbers
+#define SYSCALL_DISPATCH_HCALL 0x5CA11
+#define SYSCALL_SETUP_HCALL 0x5CA12
+#define SYSCALL_CLEANUP_HCALL 0x5CA13
+
+#define NUM_SYSCALLS 256
+
+#define NUM_SAVE_REGS 16
+#define SYSCALL_ENTRY_OFFSET (NUM_SAVE_REGS*8)
+
+#ifdef __ASSEMBLY__
+
+#define SAVE_ALL \
+ movq %rsi, 8(%rdi); \
+ movq %rbp, 16(%rdi); \
+ movq %rsp, 24(%rdi); \
+ movq %rbx, 32(%rdi); \
+ movq %rdx, 40(%rdi); \
+ movq %rcx, 48(%rdi); \
+ movq %rax, 56(%rdi); \
+ movq %r8, 64(%rdi); \
+ movq %r9, 72(%rdi); \
+ movq %r10, 80(%rdi); \
+ movq %r11, 88(%rdi); \
+ movq %r12, 96(%rdi); \
+ movq %r13, 104(%rdi); \
+ movq %r14, 112(%rdi); \
+ movq %r15, 120(%rdi); \
+
+#define RESTORE_ALL \
+ movq 8(%rdi), %rsi; \
+ movq 16(%rdi), %rbp; \
+ movq 24(%rdi), %rsp; \
+ movq 32(%rdi), %rbx; \
+ movq 40(%rdi), %rdx; \
+ movq 48(%rdi), %rcx; \
+ movq 56(%rdi), %rax; \
+ movq 64(%rdi), %r8; \
+ movq 72(%rdi), %r9; \
+ movq 80(%rdi), %r10; \
+ movq 88(%rdi), %r11; \
+ movq 96(%rdi), %r12; \
+ movq 104(%rdi),%r13; \
+ movq 112(%rdi),%r14; \
+ movq 120(%rdi),%r15; \
+
+
+/* align on word boundary with nops */
+#define ALIGN .align 8, 0x90
+
+#ifndef ENTRY
+
+#define ENTRY(name) \
+ .global name; \
+ ALIGN; \
+ name: \
+
+#endif
+
+
+#else
+
+#include <linux/types.h>
+
+#endif
+#endif
--- /dev/null
+EXTRA = -m32
+
+
+all: libmpi_hcall.a mpi_preload.so mpi.ko test_static
+
+libmpi_hcall.a: mpi_hc.o
+ ar ruv libmpi_hcall.a mpi_hc.o
+
+mpi_hc.o: mpi_hc.c mpi_hc.h hcall.h
+ gcc $(EXTRA) -static -fPIC -S mpi_hc.c -o mpi_hc.s
+ gcc $(EXTRA) -static -fPIC -c mpi_hc.c -o mpi_hc.o
+
+mpi_preload.so: mpi_preload.c libmpi_hcall.a
+ gcc $(EXTRA) -Wall -O2 -fPIC -shared -nostdlib -I/usr/include mpi_preload.c -L. -lmpi_hcall -ldl -lc -o mpi_preload.so
+
+test_static: test_static.c libmpi_hcall.a
+ gcc $(EXTRA) -static test_static.c -L. -lmpi_hcall -o test_static
+
+
+EXTRA_CFLAGS += -I$(PWD)/../../../palacios/include
+
+obj-m += mpi.o
+
+mpi.ko: mpi.c
+ make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
+
+clean:
+ rm *.o *.so *.a test_static
+ make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
+
--- /dev/null
+This is the MPI accelerator service. For co-located
+VMs running an MPI application, this redirects mpi_send
+and mpi_receive through the VMM.
+
+mpi_preload.c - the code injected into the guest
+mpi.c - the acclerator implemention as host hypercalls
--- /dev/null
+#ifndef __HCALL__
+#define __HCALL__
+
+/*
+ Calling convention:
+
+64 bit:
+ rax = hcall number
+ rbx = 0x6464646464646464...
+ rcx = 1st arg
+ rdx = 2nd arg
+ rsi = 3rd arg
+ rdi = 4th arg
+ r8 = 5th arg
+ r9 = 6th arg
+ r10 = 7th arg
+ r11 = 8th arg
+
+32 bit:
+ eax = hcall number
+ ebx = 0x32323232
+ arguments on stack in C order (first argument is TOS)
+ arguments are also 32 bit
+*/
+#define HCALL64(rc,id,a,b,c,d,e,f,g,h) \
+ asm volatile ("movq %1, %%rax; " \
+ "pushq %%rbx; " \
+ "movq $0x6464646464646464, %%rbx; " \
+ "movq %2, %%rcx; " \
+ "movq %3, %%rdx; " \
+ "movq %4, %%rsi; " \
+ "movq %5, %%rdi; " \
+ "movq %6, %%r8 ; " \
+ "movq %7, %%r9 ; " \
+ "movq %8, %%r10; " \
+ "movq %9, %%r11; " \
+ "vmmcall ; " \
+ "movq %%rax, %0; " \
+ "popq %%rbx; " \
+ : "=r"(rc) \
+ : "m"(id), \
+ "m"(a), "m"(b), "m"(c), "m"(d), \
+ "m"(e), "m"(f), "m"(g), "m"(h) \
+ : "%rax","%rcx","%rdx","%rsi","%rdi", \
+ "%r8","%r9","%r10","%r11" \
+ )
+
+#define HCALL32(rc,id,a,b,c,d,e,f,g,h) \
+ asm volatile ("movl %1, %%eax; " \
+ "pushl %%ebx; " \
+ "movl $0x32323232, %%ebx; " \
+ "pushl %9;" \
+ "pushl %8;" \
+ "pushl %7;" \
+ "pushl %6;" \
+ "pushl %5;" \
+ "pushl %4;" \
+ "pushl %3;" \
+ "pushl %2;" \
+ "vmmcall ; " \
+ "movl %%eax, %0; " \
+ "addl $32, %%esp; " \
+ "popl %%ebx; " \
+ : "=r"(rc) \
+ : "m"(id), \
+ "m"(a), "m"(b), "m"(c), "m"(d), \
+ "m"(e), "m"(f), "m"(g), "m"(h) \
+ : "%eax" \
+ )
+
+#ifdef __x86_64__
+#define HCALL(rc,id,a,b,c,d,e,f,g,h) HCALL64(rc,id,a,b,c,d,e,f,g,h)
+#else
+#define HCALL(rc,id,a,b,c,d,e,f,g,h) HCALL32(rc,id,a,b,c,d,e,f,g,h)
+#endif
+
+#endif
--- /dev/null
+/*
+ MPI module
+
+ (c) 2012 Peter Dinda
+
+ */
+
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+
+#include <palacios/vmm.h>
+#include <palacios/vm_guest.h>
+#include <interfaces/vmm_host_hypercall.h>
+
+#include "mpi_hc.h"
+
+#define DEEP_DEBUG 0
+#define SHALLOW_DEBUG 0
+
+#if DEEP_DEBUG
+#define DEEP_DEBUG_PRINT(fmt, args...) printk((fmt), ##args)
+#else
+#define DEEP_DEBUG_PRINT(fmt, args...)
+#endif
+
+#if SHALLOW_DEBUG
+#define SHALLOW_DEBUG_PRINT(fmt, args...) printk((fmt), ##args)
+#else
+#define SHALLOW_DEBUG_PRINT(fmt, args...)
+#endif
+
+
+#define ERROR(fmt, args...) printk((fmt), ##args)
+#define INFO(fmt, args...) printk((fmt), ##args)
+
+#define RENDEZVOUS_TABLE_MAX 32
+#define EXEC_NAME_MAX 128
+
+struct rendezvous_table_row {
+ enum {
+ FREE=0,
+ INITED,
+ RANKED,
+ } state;
+
+ char exec[EXEC_NAME_MAX];
+ uint64_t rank;
+ struct guest_info *core;
+ struct guest_accessors *acc;
+ uint64_t cr3;
+ wait_queue_head_t send_wait_queue;
+ int send_pending;
+ uint64_t send_vaddr;
+ uint64_t send_size;
+ uint64_t send_dest;
+ uint64_t send_tag;
+ uint64_t send_rc;
+ wait_queue_head_t recv_wait_queue;
+ int recv_pending;
+ uint64_t recv_vaddr;
+ uint64_t recv_size;
+ uint64_t recv_src;
+ uint64_t recv_tag;
+ uint64_t recv_stat_vaddr;
+ uint64_t recv_rc;
+};
+
+static struct rendezvous_table_row *rtab;
+
+
+static int mpi_init_hcall(struct guest_info *core,
+ struct guest_accessors *acc,
+ int *argc,
+ char ***argv)
+{
+ int i;
+ struct rendezvous_table_row *r;
+ uint32_t va;
+
+ SHALLOW_DEBUG_PRINT("mpi: mpi_init_hcall(%p,%p)\n",(void*)argc,(void*)argv);
+
+ if (!rtab) {
+ ERROR("mpi: no rtab!\n");
+ return -1;
+ }
+
+ for (i=0;i<RENDEZVOUS_TABLE_MAX;i++) {
+ if (rtab[i].state==FREE) {
+ break;
+ }
+ }
+
+ if (i==RENDEZVOUS_TABLE_MAX) {
+ ERROR("mpi: no room in rtab\n");
+ return -1;
+ }
+
+ r=&(rtab[i]);
+ r->rank=0;
+ r->core=core;
+ r->acc=acc;
+ r->cr3=acc->get_cr3(core);
+ r->send_pending=0;
+ r->recv_pending=0;
+
+ // The following hideously assumes that FIX FIX FIX
+ // the guest app is 32 bit! FIX FIX FIX
+ // THIS IS COMMON ASSUMPTION THROUGHOUT FIX FIX FIX
+ if (acc->read_gva(core,(uint64_t)argv,4,&va)<0) {
+ ERROR("mpi: init cannot copy argv (first deref)\n");
+ return -1;
+ } else {
+ //now we have *argv
+ // we want **argv
+ if (acc->read_gva(core,(uint64_t)va,4,&va)<0) {
+ ERROR("mpi: init cannot copy argv (second deref)\n");
+ return -1;
+ } else {
+ // now we have **argv, and we want the array it points to
+ if (acc->read_gva(core,(uint64_t)va,EXEC_NAME_MAX,r->exec)<0) {
+ ERROR("mpi: init cannot copy exec name (third deref)\n");
+ return -1;
+ }
+ // for good measure
+ r->exec[EXEC_NAME_MAX-1]=0;
+ }
+ }
+
+ init_waitqueue_head(&(r->send_wait_queue));
+ init_waitqueue_head(&(r->recv_wait_queue));
+
+ r->state=INITED;
+
+ DEEP_DEBUG_PRINT("mpi: inited entry %d to '%s' core=%p cr3=%p\n",
+ i,r->exec,r->core,(void*)(r->cr3));
+
+ return 0;
+}
+
+static int mpi_deinit_hcall(struct guest_info *core,
+ struct guest_accessors *acc)
+{
+ int i;
+ uint64_t cr3;
+
+ SHALLOW_DEBUG_PRINT("mpi: mpi_deinit_hcall()\n");
+
+ cr3=acc->get_cr3(core);
+
+ for (i=0;i<RENDEZVOUS_TABLE_MAX;i++) {
+ if (rtab[i].state!=FREE &&
+ rtab[i].core==core &&
+ rtab[i].cr3==cr3) {
+ break;
+ }
+ }
+
+ if (i==RENDEZVOUS_TABLE_MAX) {
+ ERROR("mpi: could not find matching row in rtab to delete\n");
+ return -1;
+ }
+
+ if (rtab[i].send_pending) {
+ ERROR("mpi: warning: deleting matching row with send pending\n");
+ }
+
+ if (rtab[i].recv_pending) {
+ ERROR("mpi: warning: deleting matching row with recv pending\n");
+ }
+
+ DEEP_DEBUG_PRINT("mpi: removing row for core %p, cr3 %p, exec '%s'\n",
+ core, (void*)cr3, rtab[i].exec);
+
+
+ memset(&(rtab[i]),0,sizeof(struct rendezvous_table_row));
+
+ return 0;
+}
+
+static int mpi_comm_rank_hcall(struct guest_info *core,
+ struct guest_accessors *acc,
+ void *comm_va,
+ int *rank_va)
+{
+ int i;
+ uint64_t cr3;
+
+ SHALLOW_DEBUG_PRINT("mpi_comm_rank_hcall(%p,%p)\n",(void*)comm_va,(void*)rank_va);
+
+ cr3=acc->get_cr3(core);
+
+ for (i=0;i<RENDEZVOUS_TABLE_MAX;i++) {
+ if (rtab[i].state==INITED &&
+ rtab[i].core==core &&
+ rtab[i].cr3==cr3) {
+ break;
+ }
+ }
+
+ if (i==RENDEZVOUS_TABLE_MAX) {
+ ERROR("mpi: no matching row found\n");
+ return -1;
+ }
+
+ //
+ // The following completely ignores the communicator
+ // Throughout we assume everyone is in MPI_COMM_WORLD
+ // FIX FIX FIX FIX
+ //
+
+ if (acc->read_gva(core,(uint64_t)rank_va,4,&(rtab[i].rank))<0) {
+ ERROR("mpi: rank cannot copy rank\n");
+ return -1;
+ }
+
+ rtab[i].state=RANKED;
+
+ SHALLOW_DEBUG_PRINT("mpi: ranking rcore %p, cr3 %p, exec '%s' as %llu\n",
+ core, (void*)cr3, rtab[i].exec, rtab[i].rank);
+
+ return 0;
+}
+
+#define PAGE_ADDR(x) ((x)&~((uint64_t)0xfff))
+#define PAGE_NEXT_ADDR(x) (PAGE_ADDR(x)+0x1000)
+
+
+
+static uint64_t fast_inter_vm_copy(struct guest_info *dest_core,
+ struct guest_accessors *dest_acc,
+ uint64_t dest_va,
+ struct guest_info *src_core,
+ struct guest_accessors *src_acc,
+ uint64_t src_va,
+ uint64_t count)
+{
+
+ uint64_t left, chunk;
+ uint64_t src_page_left, dest_page_left;
+ uint64_t src_host_va, dest_host_va;
+
+ left = count;
+
+ while (left) {
+ src_page_left = PAGE_NEXT_ADDR(src_va) - src_va;
+ dest_page_left = PAGE_NEXT_ADDR(dest_va) - dest_va;
+
+ chunk = src_page_left < dest_page_left ? src_page_left : dest_page_left;
+ chunk = chunk < left ? chunk : left;
+
+ DEEP_DEBUG_PRINT("mpi: copy chunk=%d, src_va=%p, dest_va=%p\n",
+ chunk, src_va, dest_va);
+
+ if (src_acc->gva_to_hva(src_core,src_va,&src_host_va)<0) {
+ ERROR("mpi: cannot translate src address %p in VM core %p\n",src_va,src_core);
+ return count-left;
+ }
+ if (dest_acc->gva_to_hva(dest_core,dest_va,&dest_host_va)<0) {
+ ERROR("mpi: cannot translate dest address %p in VM core %p\n",dest_va,dest_core);
+ return count-left;
+ }
+
+ DEEP_DEBUG_PRINT("mpi: copy chunk=%d, src_host_va=%p, dest_host_va=%p\n",
+ chunk, src_host_va, dest_host_va);
+
+ memcpy((void*)dest_host_va,(void*)src_host_va,chunk);
+
+ src_va += chunk;
+ dest_va += chunk;
+ left -= chunk;
+ }
+
+ return count;
+
+}
+
+
+
+static int mpi_send_hcall(struct guest_info *core,
+ struct guest_accessors *acc,
+ void *buf,
+ int n,
+ int dtype,
+ int dest,
+ int tag,
+ int comm)
+{
+ uint64_t cr3;
+ int i;
+ struct rendezvous_table_row *sender, *receiver;
+
+ SHALLOW_DEBUG_PRINT("mpi: mpi_send_hcall(%p,%p,%p,%p,%p,%p)\n",(void*)buf,(void*)n,(void*)dtype,(void*)dest,(void*)tag,(void*)comm);
+
+ cr3=acc->get_cr3(core);
+
+ // First find me
+ for (i=0;i<RENDEZVOUS_TABLE_MAX;i++) {
+ if (rtab[i].state==RANKED &&
+ rtab[i].core==core &&
+ rtab[i].cr3==cr3) {
+ break;
+ }
+ }
+
+ if (i==RENDEZVOUS_TABLE_MAX) {
+ ERROR("mpi: existential panic in send\n");
+ return -1;
+ }
+
+ sender=&(rtab[i]);
+
+ // Next try to find a matching receive
+
+ for (i=0;i<RENDEZVOUS_TABLE_MAX;i++) {
+ if (&(rtab[i])!=sender &&
+ rtab[i].state==RANKED &&
+ strncmp(rtab[i].exec,sender->exec,EXEC_NAME_MAX)==0) {
+ break;
+ }
+ }
+
+ if (i==RENDEZVOUS_TABLE_MAX) {
+ DEEP_DEBUG_PRINT("mpi: receiver does not exist yet - pending ourselves\n");
+ goto pending;
+ } else {
+ receiver=&(rtab[i]);
+ if (!(receiver->recv_pending)) {
+ DEEP_DEBUG_PRINT("mpi: receiver has no pending receive - pending ourselves\n");
+ goto pending;
+ }
+ // totally ignores communicator!!! FIX FIX FIX
+ // simplistic fully qualified matching FIX FIX FIX
+ if (receiver->recv_tag==tag &&
+ receiver->recv_src==sender->rank) {
+ // fast path
+ // totally ignores types and assumes byte xfer FIX FIX FIX
+ uint64_t size = n < receiver->recv_size ? n : receiver->recv_size;
+
+ SHALLOW_DEBUG_PRINT("mpi: mpi_send: copying %llu bytes\n", size);
+
+ if (fast_inter_vm_copy(receiver->core,
+ receiver->acc,
+ receiver->recv_vaddr,
+ core,
+ acc,
+ buf,
+ size) != size) {
+ ERROR("mpi: fast_inter_vm_copy failed in mpi_send: destvm=%p, destacc=%p, dest_va=%p, srcvm=%p, srcacc=%p, src_va=%p, size=%llu\n",receiver->core,receiver->acc,receiver->recv_vaddr,core,acc,buf,size);
+ return -1;
+ }
+
+
+ SHALLOW_DEBUG_PRINT("mpi: mpi_send: finished copying\n");
+
+
+ // Now we release the receiver
+ receiver->recv_rc = 0;
+ receiver->recv_pending = 0;
+
+ wake_up_interruptible(&(receiver->recv_wait_queue));
+
+ // And we are also done
+
+ return 0;
+
+ } else {
+ DEEP_DEBUG_PRINT("mpi: receiver's pending receive does not match - pending ourselves\n");
+ goto pending;
+ }
+ }
+
+
+
+ pending:
+
+ // we store our state
+ sender->send_vaddr=buf;
+ sender->send_size=n;
+ sender->send_dest=dest;
+ sender->send_tag=tag;
+ sender->send_rc=-1;
+
+ // And now we wait for the receive to do the job
+ sender->send_pending=1;
+ while (wait_event_interruptible(sender->send_wait_queue,
+ !(sender->send_pending)) !=0) {
+ // wait wait wait
+ }
+
+ // released
+
+ return sender->send_rc;
+}
+
+static int mpi_recv_hcall(struct guest_info *core,
+ struct guest_accessors *acc,
+ void *buf,
+ int n,
+ int dtype,
+ int src,
+ int tag,
+ int comm,
+ void *stat)
+{
+ uint64_t cr3;
+ int i;
+ struct rendezvous_table_row *sender, *receiver;
+
+ SHALLOW_DEBUG_PRINT("mpi_recv_hcall(%p,%p,%p,%p,%p,%p,%p)\n",(void*)buf,(void*)n,(void*)dtype,(void*)src,(void*)tag,(void*)comm,(void*)stat);
+
+ cr3=acc->get_cr3(core);
+
+ // First find me
+ for (i=0;i<RENDEZVOUS_TABLE_MAX;i++) {
+ if (rtab[i].state==RANKED &&
+ rtab[i].core==core &&
+ rtab[i].cr3==cr3) {
+ break;
+ }
+ }
+
+ if (i==RENDEZVOUS_TABLE_MAX) {
+ ERROR("mpi: existential panic in receive\n");
+ return -1;
+ }
+
+ receiver=&(rtab[i]);
+
+ // Next try to find a matching send
+
+ for (i=0;i<RENDEZVOUS_TABLE_MAX;i++) {
+ if (&(rtab[i])!=receiver &&
+ rtab[i].state==RANKED &&
+ strncmp(rtab[i].exec,receiver->exec,EXEC_NAME_MAX)==0) {
+ break;
+ }
+ }
+
+ if (i==RENDEZVOUS_TABLE_MAX) {
+ DEEP_DEBUG_PRINT("mpi: sender does not exist yet - pending ourselves\n");
+ goto pending;
+ } else {
+ sender=&(rtab[i]);
+ if (!(sender->send_pending)) {
+ DEEP_DEBUG_PRINT("mpi: sender has no pending receive - pending ourselves\n");
+ goto pending;
+ }
+ // totally ignores communicator!!! FIX FIX FIX
+ // simplistic fully qualified matching FIX FIX FIX
+ if (sender->send_tag==tag &&
+ sender->send_dest==receiver->rank) {
+
+ uint64_t size = n < sender->send_size ? n : sender->send_size;
+
+ SHALLOW_DEBUG_PRINT("mpi: mpi_recv: copying %llu bytes\n", size);
+
+ if (fast_inter_vm_copy(core,
+ acc,
+ buf,
+ sender->core,
+ sender->acc,
+ sender->send_vaddr,
+ size) != size) {
+ ERROR("mpi: fast_inter_vm_copy failed in mpi_recv: destvm=%p, destacc=%p, dest_va=%p, srcvm=%p, srcacc=%p, src_va=%p, size=%llu\n",core,acc,buf,sender->core,sender->acc,sender->send_vaddr,size);
+ return -1;
+ }
+
+ SHALLOW_DEBUG_PRINT("mpi: mpi_recv: finished copying\n");
+
+ // Now we release the sender
+ sender->send_rc = 0;
+ sender->send_pending = 0;
+
+ wake_up_interruptible(&(sender->send_wait_queue));
+
+ // And we are also done
+
+ return 0;
+
+ } else {
+ DEEP_DEBUG_PRINT("mpi: sender's pending send does not match - pending ourselves\n");
+ goto pending;
+ }
+ }
+
+
+
+ pending:
+
+ // we store our state
+ receiver->recv_vaddr=buf;
+ receiver->recv_size=n;
+ receiver->recv_src=src;
+ receiver->recv_tag=tag;
+ receiver->recv_rc=-1;
+
+ // And now we wait for the send to do the job
+ receiver->recv_pending=1;
+ while (wait_event_interruptible(receiver->recv_wait_queue,
+ !(receiver->recv_pending)) !=0) {
+ // wait wait wait
+ }
+
+ // released
+
+ return receiver->recv_rc;
+}
+
+
+static void get_args_64(palacios_core_t core,
+ struct guest_accessors *acc,
+ uint64_t *a1,
+ uint64_t *a2,
+ uint64_t *a3,
+ uint64_t *a4,
+ uint64_t *a5,
+ uint64_t *a6,
+ uint64_t *a7,
+ uint64_t *a8)
+{
+ *a1 = acc->get_rcx(core);
+ *a2 = acc->get_rdx(core);
+ *a3 = acc->get_rsi(core);
+ *a4 = acc->get_rdi(core);
+ *a5 = acc->get_r8(core);
+ *a6 = acc->get_r9(core);
+ *a7 = acc->get_r10(core);
+ *a8 = acc->get_r11(core);
+}
+
+static void get_args_32(palacios_core_t core,
+ struct guest_accessors *acc,
+ uint64_t *a1,
+ uint64_t *a2,
+ uint64_t *a3,
+ uint64_t *a4,
+ uint64_t *a5,
+ uint64_t *a6,
+ uint64_t *a7,
+ uint64_t *a8)
+{
+ uint64_t rsp;
+ uint32_t temp;
+
+
+ rsp = acc->get_rsp(core);
+
+ acc->read_gva(core,rsp,4,&temp); *a1=temp;
+ acc->read_gva(core,rsp+4,4,&temp); *a2=temp;
+ acc->read_gva(core,rsp+8,4,&temp); *a3=temp;
+ acc->read_gva(core,rsp+12,4,&temp); *a4=temp;
+ acc->read_gva(core,rsp+16,4,&temp); *a5=temp;
+ acc->read_gva(core,rsp+20,4,&temp); *a6=temp;
+ acc->read_gva(core,rsp+24,4,&temp); *a7=temp;
+ acc->read_gva(core,rsp+28,4,&temp); *a8=temp;
+
+}
+
+static void get_args(palacios_core_t core,
+ struct guest_accessors *acc,
+ uint64_t *a1,
+ uint64_t *a2,
+ uint64_t *a3,
+ uint64_t *a4,
+ uint64_t *a5,
+ uint64_t *a6,
+ uint64_t *a7,
+ uint64_t *a8)
+{
+ uint64_t rbx;
+ uint32_t ebx;
+
+ rbx=acc->get_rbx(core);
+ ebx=rbx&0xffffffff;
+
+ switch (ebx) {
+ case 0x64646464:
+ DEEP_DEBUG_PRINT("64 bit hcall\n");
+ return get_args_64(core,acc,a1,a2,a3,a4,a5,a6,a7,a8);
+ break;
+ case 0x32323232:
+ DEEP_DEBUG_PRINT("32 bit hcall\n");
+ return get_args_32(core,acc,a1,a2,a3,a4,a5,a6,a7,a8);
+ break;
+ default:
+ ERROR("UNKNOWN hcall calling convention\n");
+ break;
+ }
+}
+
+static void put_return(palacios_core_t core,
+ struct guest_accessors *acc,
+ uint64_t rc)
+{
+ acc->set_rax(core,rc);
+}
+
+
+int mpi_hypercall(palacios_core_t *core,
+ unsigned int hid,
+ struct guest_accessors *acc,
+ void *p)
+{
+ uint64_t a1,a2,a3,a4,a5,a6,a7,a8;
+ uint64_t rc;
+
+ DEEP_DEBUG_PRINT("palacios: mpi_hypercall(%p,0x%x,%p,%p)\n",
+ core,hid,acc,p);
+
+ get_args(core,acc,&a1,&a2,&a3,&a4,&a5,&a6,&a7,&a8);
+
+ DEEP_DEBUG_PRINT("palacios: arguments: %p, %p, %p, %p, %p, %p, %p, %p\n",
+ a1,a2,a3,a4,a5,a6,a7,a8);
+
+ switch (hid) {
+ case MPI_INIT:
+ rc = mpi_init_hcall(core,acc,(int*)a1,(char ***)a2);
+ break;
+ case MPI_DEINIT:
+ rc = mpi_deinit_hcall(core,acc);
+ break;
+ case MPI_RANK:
+ rc = mpi_comm_rank_hcall(core,acc,(void*)a1,(int*)a2);
+ break;
+ case MPI_SEND:
+ rc = mpi_send_hcall(core,acc,(void*)a1,(int)a2,(int)a3,(int)a4,(int)a5,(int)a6);
+ break;
+ case MPI_RECV:
+ rc = mpi_recv_hcall(core,acc,(void*)a1,(int)a2,(int)a3,(int)a4,(int)a5,(int)a6,(void*)a7);
+ break;
+ default:
+ ERROR("palacios: mpi: unknown hcall number\n");
+ rc = -1;
+ }
+
+ put_return(core,acc,rc);
+
+ return 0;
+
+}
+
+
+
+EXPORT_SYMBOL(mpi_hypercall);
+
+
+int init_module(void)
+{
+
+ rtab = kmalloc(sizeof(struct rendezvous_table_row)*RENDEZVOUS_TABLE_MAX,GFP_KERNEL);
+ if (!rtab) {
+ ERROR("mpi: could not allocate memory\n");
+ return -1;
+ } else {
+ memset(rtab,0,sizeof(struct rendezvous_table_row)*RENDEZVOUS_TABLE_MAX);
+ INFO("mpi: inited\n");
+ return 0;
+ }
+
+}
+
+
+void cleanup_module(void)
+{
+ if (rtab) {
+ kfree(rtab);
+ rtab=0;
+ }
+
+ INFO("mpi: deinited\n");
+
+}
+
+
--- /dev/null
+#include "hcall.h"
+#include "mpi_hc.h"
+
+int mpi_init_hcall(int *argc, char ***argv)
+{
+ long long rc;
+ long long zero=0;
+ long long cmd=MPI_INIT;
+
+ HCALL(rc,cmd,argc,argv,zero,zero,zero,zero,zero,zero);
+
+ return rc;
+}
+
+int mpi_deinit_hcall()
+{
+ long long rc;
+ long long zero=0;
+ long long cmd=MPI_DEINIT;
+
+ HCALL(rc,cmd,zero,zero,zero,zero,zero,zero,zero,zero);
+
+ return rc;
+}
+
+int mpi_comm_rank_hcall(void *comm, int *rank)
+{
+ long long rc;
+ long long zero=0;
+ long long cmd=MPI_RANK;
+
+ HCALL(rc,cmd,comm,rank,zero,zero,zero,zero,zero,zero);
+
+ return rc;
+}
+
+int mpi_send_hcall(void *buf, int n, void* dtype, int dest, int tag, void *comm)
+{
+ long long rc;
+ long long zero=0;
+ long long cmd=MPI_SEND;
+
+ HCALL(rc,cmd,buf,n,dtype,dest,tag,comm,zero,zero);
+
+ return rc;
+}
+
+int mpi_recv_hcall(void *buf, int n, void *dtype, int src, int tag,
+ void * comm, void *stat)
+{
+ long long rc;
+ long long zero=0;
+ long long cmd=MPI_RECV;
+
+ HCALL(rc,cmd,buf,n,dtype,src,tag,comm,stat,zero);
+
+ return rc;
+}
+
--- /dev/null
+#ifndef __MPI_INJECT__
+#define __MPI_INJECT__
+
+#define MPI_INIT 1500
+#define MPI_DEINIT 1501
+#define MPI_RANK 1502
+#define MPI_SEND 1503
+#define MPI_RECV 1504
+
+#ifndef __KERNEL__
+int mpi_init_hcall(int *argc, char ***argv);
+int mpi_deinit_hcall();
+int mpi_comm_rank_hcall(void *comm, int *rank);
+int mpi_send_hcall(void *buf, int n, void *dtype, int dest,
+ int tag, void *comm);
+int mpi_recv_hcall(void *buf, int n, void *dtype, int src,
+ int tag, void *comm, void *stat);
+
+#endif
+
+#endif
--- /dev/null
+#include <mpi/mpi.h>
+#include <stdio.h>
+#include <dlfcn.h>
+#include <stdlib.h>
+#include "mpi_hc.h"
+
+static int (*mpi_init)(int *argc, char ***argv) = NULL;
+static int (*mpi_deinit)() = NULL;
+static int (*mpi_comm_rank)(MPI_Comm, int *) = NULL;
+static int (*mpi_send)(void *, int, MPI_Datatype, int, int, MPI_Comm) = NULL;
+static int (*mpi_recv)(void *, int, MPI_Datatype, int, int, MPI_Comm, MPI_Status *) = NULL;
+
+static int hcall_enabled=0;
+
+int connect_handler(void)
+{
+ void * handle;
+ char * err;
+
+ handle = dlopen("/usr/local/lib/libmpich.so", RTLD_LAZY);
+ if (!handle){
+ fputs(dlerror(), stderr);
+ return -1;
+ }
+ mpi_init = dlsym(handle, "MPI_Init");
+ if ((err = dlerror()) != NULL) {
+ fprintf(stderr, "%s\n", err);
+ return -1;
+ }
+ mpi_deinit = dlsym(handle, "MPI_Finalize");
+ if ((err = dlerror()) != NULL) {
+ fprintf(stderr, "%s\n", err);
+ return -1;
+ }
+ mpi_comm_rank = dlsym(handle, "MPI_Comm_rank");
+ if ((err = dlerror()) != NULL) {
+ fprintf(stderr, "%s\n", err);
+ return -1;
+ }
+ mpi_recv = dlsym(handle, "MPI_Recv");
+ if ((err = dlerror()) != NULL) {
+ fprintf(stderr, "%s\n", err);
+ return -1;
+ }
+ mpi_send = dlsym(handle, "MPI_Send");
+ if ((err = dlerror()) != NULL) {
+ fprintf(stderr, "%s\n", err);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int MPI_Init(int *argc, char ***argv)
+{
+ int rc;
+ volatile char temp;
+
+ if (mpi_init == NULL){
+ connect_handler();
+ }
+
+ // Make sure that ***argv is in memory
+ temp = ***argv;
+
+ rc = mpi_init(argc,argv);
+
+ if (rc<0) {
+ return rc;
+ }
+
+ fprintf(stderr,"Invoking mpi_init_hcall(%p,%p)\n",argc,argv);
+
+ if (mpi_init_hcall(argc,argv)<0) {
+ // not connected
+ hcall_enabled=0;
+ fprintf(stderr,"No connection to V3VEE MPI accelerator\n");
+ } else {
+ // connected
+ hcall_enabled=1;
+ fprintf(stderr,"Connected to V3VEE MPI accelerator\n");
+ }
+
+ return rc;
+}
+
+int MPI_Finalize()
+{
+ if (mpi_deinit == NULL){
+ connect_handler();
+ }
+
+ if (hcall_enabled) {
+ if (mpi_deinit_hcall()<0) {
+ fprintf(stderr,"Could not disconnect from V3VEE MPI accelerator\n");
+ }
+ hcall_enabled=0;
+ }
+
+ return mpi_deinit();
+
+}
+
+
+int MPI_Comm_rank(MPI_Comm comm, int *rank)
+{
+ int rc;
+ volatile int temp;
+
+ if (mpi_comm_rank == NULL){
+ connect_handler();
+ }
+
+
+ rc=mpi_comm_rank(comm,rank);
+
+ if (rc<0) {
+ return rc;
+ }
+
+ // Make sure *rank is in memory
+ temp=*rank;
+
+ if (hcall_enabled) {
+ if (mpi_comm_rank_hcall(comm,rank)<0) {
+ fprintf(stderr,"Could not invoke mpi_comm_rank on V3VEE MPI accelerator\n");
+ }
+ }
+
+ return rc;
+
+}
+
+int MPI_Send(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm)
+{
+ if (mpi_send == NULL){
+ connect_handler();
+ }
+
+ if (hcall_enabled) {
+ int i;
+ volatile char temp;
+ int rc;
+
+ // Force into memory
+ for (i=0;i<count;i+=4096) {
+ temp=((char*)buf)[i];
+ }
+
+ if ((rc=mpi_send_hcall(buf,count,datatype,dest,tag,comm))<0) {
+ fprintf(stderr, "Could not send using V3VEE MPI accelerator - Trying Slow Path\n");
+ return mpi_send(buf, count, datatype, dest, tag, comm);
+ } else {
+ return rc;
+ }
+ } else {
+ return mpi_send(buf, count, datatype, dest, tag, comm);
+ }
+}
+
+int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag,
+ MPI_Comm comm, MPI_Status *status)
+{
+ if (mpi_recv == NULL){
+ connect_handler();
+ }
+
+ if (hcall_enabled) {
+ int rc;
+ int i;
+ volatile char temp=93;
+
+ // Force into memory
+ for (i=0;i<count;i+=4096) {
+ ((char*)buf)[i]=temp;
+ }
+ if ((rc=mpi_recv_hcall(buf,count,datatype,source,tag,comm,status))<0) {
+ fprintf(stderr, "Could not receive using V3VEE MPI accelerator - Trying Slow Path\n");
+ return mpi_recv(buf, count, datatype, source, tag, comm, status);
+ } else {
+ return rc;
+ }
+ } else {
+ return mpi_recv(buf, count, datatype, source, tag, comm, status);
+ }
+}
+
--- /dev/null
+#include <stdio.h>
+#include "mpi_hc.h"
+
+int main()
+{
+ int rc;
+
+ printf("Now trying hypercalls to MPI backend\n");
+
+ printf("mpi_init_hcall(0xdeadbeef,0xbad) = ");
+
+ rc=mpi_init_hcall(0xdeadbeef,0xbad);
+
+ printf("0x%x\n",rc);
+
+ printf("mpi_comm_rank_hcall(0xabc,0xdef) = ");
+
+ rc=mpi_comm_rank_hcall(0xabc,0xdef);
+
+ printf("0x%x\n",rc);
+
+ printf("mpi_send_hcall(0x100,0x101,0x102,0x103,0x104,0x105) = ");
+
+ rc=mpi_send_hcall(0x100,0x101,0x102,0x103,0x104,0x105);
+
+ printf("0x%x\n",rc);
+
+ printf("mpi_recv_hcall(0x99,0x98,0x97,0x96,0x95,0x94,0x93) = ");
+
+ rc=mpi_recv_hcall(0x99,0x98,0x97,0x96,0x95,0x94,0x93);
+
+ printf("0x%x\n",rc);
+
+ printf("mpi_deinit_hcall() = ");
+
+ rc=mpi_deinit_hcall();
+
+ printf("0x%x\n",rc);
+
+ printf("Done.\n");
+
+}
palacios-vnet-ctrl.o \
palacios-vnet-brg.o
+v3vee-$(V3_CONFIG_HOST_HYPERCALL) += iface-host-hypercall.o
+v3vee-$(V3_CONFIG_EXT_CODE_INJECT) += iface-code-inject.o
+v3vee-$(V3_CONFIG_EXT_ENV_INJECT) += iface-env-inject.o
v3vee-objs := $(v3vee-y) ../libv3vee.a
obj-m := v3vee.o
--- /dev/null
+/*
+ * Linux interface for guest-context code injection
+ *
+ * (c) Kyle C. Hale 2011
+ *
+ */
+
+#include <linux/elf.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+#include <linux/module.h>
+
+#include <gears/code_inject.h>
+
+#include "palacios.h"
+#include "vm.h"
+#include "linux-exts.h"
+#include "iface-code-inject.h"
+
+
+/* eventually this should probably be a hash table,
+ * hashed on unique inject data
+ */
+static struct top_half_data *top_map[MAX_INJ] = {[0 ... MAX_INJ - 1] = 0};
+
+static int register_top(struct top_half_data *top) {
+ int i;
+
+ for (i = 0; i < MAX_INJ; i++) {
+ if (!top_map[i]) {
+ top_map[i] = top;
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+
+static void free_inject_data (void) {
+ int i;
+
+ for(i = 0; i < MAX_INJ; i++) {
+ if (top_map[i]) {
+ kfree(top_map[i]->elf_data);
+ kfree(top_map[i]);
+ }
+ }
+}
+
+
+
+static int vm_tophalf_inject (struct v3_guest * guest, unsigned int cmd, unsigned long arg, void * priv_data) {
+ struct top_half_data top_arg;
+ struct top_half_data * top;
+
+ top = kmalloc(sizeof(struct top_half_data), GFP_KERNEL);
+ if (IS_ERR(top)) {
+ printk("Palacios Error: could not allocate space for top half data\n");
+ return -EFAULT;
+ }
+ memset(top, 0, sizeof(struct top_half_data));
+
+ printk("Palacios: Loading ELF data...\n");
+ if (copy_from_user(&top_arg, (void __user *)arg, sizeof(struct top_half_data))) {
+ printk("palacios: error copying ELF from userspace\n");
+ return -EFAULT;
+ }
+
+ top->elf_size = top_arg.elf_size;
+ top->func_offset = top_arg.func_offset;
+ top->is_dyn = top_arg.is_dyn;
+
+ /* we have a binary name */
+ if (top_arg.is_exec_hooked) {
+ strcpy(top->bin_file, top_arg.bin_file);
+ top->is_exec_hooked = 1;
+ printk("top->bin_file is %s\n", top->bin_file);
+ }
+
+ printk("Palacios: Allocating %lu B of kernel memory for ELF binary data...\n", top->elf_size);
+ top->elf_data = kmalloc(top->elf_size, GFP_KERNEL);
+ if (IS_ERR(top->elf_data)) {
+ printk("Palacios Error: could not allocate space for binary image\n");
+ return -EFAULT;
+ }
+ memset(top->elf_data, 0, top->elf_size);
+
+ printk("Palacios: Copying ELF image into kernel module...\n");
+ if (copy_from_user(top->elf_data, (void __user *)top_arg.elf_data, top->elf_size)) {
+ printk("Palacios: Error loading elf data\n");
+ return -EFAULT;
+ }
+
+ if (register_top(top) < 0)
+ return -1;
+
+ printk("Palacios: setting up inject code...\n");
+ if (v3_insert_code_inject(guest->v3_ctx, top->elf_data, top->elf_size,
+ top->bin_file, top->is_dyn, top->is_exec_hooked, top->func_offset) < 0) {
+ printk("Palacios Error: error setting up inject code\n");
+ return -1;
+ }
+
+ printk("Palacios: injection registration complete\n");
+ return 0;
+}
+
+
+static int init_code_inject (void) {
+ return 0;
+}
+
+
+static int deinit_code_inject (void) {
+ return 0;
+}
+
+
+static int guest_init_code_inject (struct v3_guest * guest, void ** vm_data) {
+ add_guest_ctrl(guest, V3_VM_TOPHALF_INJECT, vm_tophalf_inject, NULL);
+ return 0;
+}
+
+
+static int guest_deinit_code_inject (struct v3_guest * guest, void * vm_data) {
+ free_inject_data();
+ return 0;
+}
+
+
+static struct linux_ext code_inject_ext = {
+ .name = "CODE_INJECT",
+ .init = init_code_inject,
+ .deinit = deinit_code_inject,
+ .guest_init = guest_init_code_inject,
+ .guest_deinit = guest_deinit_code_inject
+};
+
+register_extension(&code_inject_ext);
--- /dev/null
+#ifndef __IFACE_CODE_INJECT_H__
+#define __IFACE_CODE_INJECT_H__
+
+#define V3_VM_TOPHALF_INJECT 12123
+#define V3_VM_HYPERCALL_ADD 12124
+#define V3_VM_HYPERCALL_REMOVE 12125
+
+#define MAX_INJ 128
+
+struct top_half_data {
+ unsigned long elf_size;
+ void *elf_data;
+ int got_offset;
+ int plt_offset;
+ int func_offset;
+ char bin_file[256];
+ int hcall_nr;
+ int inject_id;
+ int is_dyn;
+ int is_exec_hooked;
+};
+
+#define HCALL_NAME_MAX 256
+
+struct hcall_data {
+ int fd;
+ int hcall_nr;
+ char fn[HCALL_NAME_MAX];
+};
+
+#endif
--- /dev/null
+/*
+ * Linux interface for guest-context environment variable injection
+ *
+ * (c) Kyle C. Hale 2012
+ *
+ */
+
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+#include <gears/env_inject.h>
+
+#include "palacios.h"
+#include "vm.h"
+#include "linux-exts.h"
+#include "iface-env-inject.h"
+
+
+static struct env_data * env_map[MAX_ENV_INJECT] = {[0 ... MAX_ENV_INJECT - 1] = 0};
+
+
+static int register_env(struct env_data * env) {
+ int i;
+
+ for (i = 0; i < MAX_ENV_INJECT; i++) {
+ if (!env_map[i]) {
+ env_map[i] = env;
+ return i;
+ }
+ }
+ return -1;
+}
+
+
+static void free_inject_data (void) {
+ int i, j;
+
+ for(i = 0; i < MAX_ENV_INJECT; i++) {
+ if (env_map[i]) {
+ for (j = 0; j < env_map[i]->num_strings; j++)
+ kfree(env_map[i]->strings[j]);
+
+ kfree(env_map[i]->strings);
+ kfree(env_map[i]);
+ }
+ }
+}
+
+
+
+static int vm_env_inject (struct v3_guest * guest, unsigned int cmd, unsigned long arg, void * priv_data) {
+ struct env_data env_arg;
+ struct env_data * env;
+ int i;
+
+ printk("Palacios: Loading environment data...\n");
+ if (copy_from_user(&env_arg, (void __user *)arg, sizeof(struct env_data))) {
+ printk("palacios: error copying environment data from userspace\n");
+ return -EFAULT;
+ }
+
+ env = kmalloc(sizeof(struct env_data), GFP_KERNEL);
+ if (IS_ERR(env)) {
+ printk("Palacios Error: could not allocate space for environment data\n");
+ return -EFAULT;
+ }
+
+ memset(env, 0, sizeof(struct env_data));
+
+ env->num_strings = env_arg.num_strings;
+
+ strcpy(env->bin_name, env_arg.bin_name);
+ printk("Binary hooked on: %s\n", env->bin_name);
+
+ //printk("Palacios: Allocating space for %u env var string ptrs...\n", env->num_strings);
+ env->strings = kmalloc(env->num_strings*sizeof(char*), GFP_KERNEL);
+ if (IS_ERR(env->strings)) {
+ printk("Palacios Error: could not allocate space for env var strings\n");
+ return -EFAULT;
+ }
+ memset(env->strings, 0, env->num_strings*sizeof(char*));
+
+ //printk("Palacios: copying env var string pointers\n");
+ if (copy_from_user(env->strings, (void __user *)env_arg.strings, env->num_strings*sizeof(char*))) {
+ printk("Palacios: Error copying string pointers\n");
+ return -EFAULT;
+ }
+
+ for (i = 0; i < env->num_strings; i++) {
+ char * tmp = kmalloc(MAX_STRING_LEN, GFP_KERNEL);
+ if (IS_ERR(tmp)) {
+ printk("Palacios Error: could not allocate space for env var string #%d\n", i);
+ return -EFAULT;
+ }
+
+ if (copy_from_user(tmp, (void __user *)env->strings[i], MAX_STRING_LEN)) {
+ printk("Palacios: Error copying string #%d\n", i);
+ return -EFAULT;
+ }
+ env->strings[i] = tmp;
+ }
+
+ printk("Palacios: registering environment data...\n");
+ if (register_env(env) < 0)
+ return -1;
+
+ printk("Palacios: passing data off to palacios...\n");
+ if (v3_insert_env_inject(guest->v3_ctx, env->strings, env->num_strings, env->bin_name) < 0) {
+ printk("Palacios: Error passing off environment data\n");
+ return -1;
+ }
+
+ printk("Palacios: environment injection registration complete\n");
+ return 0;
+}
+
+
+static int init_env_inject (void) {
+ return 0;
+}
+
+
+static int deinit_env_inject (void) {
+ return 0;
+}
+
+
+static int guest_init_env_inject (struct v3_guest * guest, void ** vm_data) {
+ add_guest_ctrl(guest, V3_VM_ENV_INJECT, vm_env_inject, NULL);
+ return 0;
+}
+
+
+static int guest_deinit_env_inject (struct v3_guest * guest, void * vm_data) {
+ free_inject_data();
+ return 0;
+}
+
+
+static struct linux_ext env_inject_ext = {
+ .name = "ENV_INJECT",
+ .init = init_env_inject,
+ .deinit = deinit_env_inject,
+ .guest_init = guest_init_env_inject,
+ .guest_deinit = guest_deinit_env_inject
+};
+
+register_extension(&env_inject_ext);
--- /dev/null
+#ifndef __IFACE_ENV_INJECT_H__
+#define __IFACE_ENV_INJECT_H__
+
+
+#define MAX_NUM_STRINGS 10
+#define MAX_STRING_LEN 128
+#define MAX_ENV_INJECT 10
+
+#define V3_VM_ENV_INJECT 13125
+
+struct env_data {
+ int num_strings;
+ char ** strings;
+ char bin_name[MAX_STRING_LEN];
+};
+
+
+#endif
--- /dev/null
+/*
+ * Linux interface for guest-context code injection
+ *
+ * (c) Kyle C. Hale 2011
+ *
+ */
+
+#include <linux/elf.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+#include <linux/module.h>
+
+#include <interfaces/vmm_host_hypercall.h>
+
+#include "palacios.h"
+#include "vm.h"
+#include "linux-exts.h"
+#include "iface-host-hypercall.h"
+
+static int host_hypercall_nop(palacios_core_t core,
+ unsigned int hcall_id,
+ struct guest_accessors *acc,
+ void *priv_data) {
+ printk("palacios: host_hypercall_nop dummy handler invoked\n");
+ printk(" rip=%p\n rsp=%p\n rbp=%p\n rflags=%p\n",
+ (void*)(acc->get_rip(core)),
+ (void*)(acc->get_rsp(core)),
+ (void*)(acc->get_rbp(core)),
+ (void*)(acc->get_rflags(core)));
+
+ printk(" rax=%p\n rbx=%p\n rcx=%p\n rdx=%p\n rsi=%p\n rdi=%p\n",
+ (void*)(acc->get_rax(core)),
+ (void*)(acc->get_rbx(core)),
+ (void*)(acc->get_rcx(core)),
+ (void*)(acc->get_rdx(core)),
+ (void*)(acc->get_rsi(core)),
+ (void*)(acc->get_rdi(core)));
+ printk(" r8=%p\n r9=%p\n r10=%p\n r11=%p\n r12=%p\n r13=%p\n r14=%p\n r15=%p\n",
+ (void*)(acc->get_r8(core)),
+ (void*)(acc->get_r9(core)),
+ (void*)(acc->get_r10(core)),
+ (void*)(acc->get_r11(core)),
+ (void*)(acc->get_r12(core)),
+ (void*)(acc->get_r13(core)),
+ (void*)(acc->get_r14(core)),
+ (void*)(acc->get_r15(core)));
+ printk(" cr0=%p\n cr2=%p\n cr3=%p\n cr4=%p\n cr8=%p\n efer=%p\n",
+ (void*)(acc->get_cr0(core)),
+ (void*)(acc->get_cr2(core)),
+ (void*)(acc->get_cr3(core)),
+ (void*)(acc->get_cr4(core)),
+ (void*)(acc->get_cr8(core)),
+ (void*)(acc->get_efer(core)));
+ return 0;
+}
+
+
+static int vm_hypercall_add (struct v3_guest *guest,
+ unsigned int cmd,
+ unsigned long arg,
+ void *priv_data) {
+
+ struct hcall_data hdata;
+ void *func;
+
+ if (copy_from_user(&hdata,(void __user *) arg, sizeof(struct hcall_data))) {
+ printk("palacios: copy from user in getting input for hypercall add\n");
+ return -EFAULT;
+ }
+
+ if (0==strcmp(hdata.fn,"")) {
+ printk("palacios: no hypercall function supplied, using default\n");
+ func = (void*) host_hypercall_nop;
+ } else {
+ func = __symbol_get(hdata.fn);
+ }
+
+ if (func == NULL) {
+ printk("palacios: cannot find function '%s' for hypercall addition - perhaps your module hasn't been loaded yet?\n",hdata.fn);
+ return -EFAULT;
+ }
+
+ if (v3_register_host_hypercall(guest->v3_ctx,
+ hdata.hcall_nr,
+ func,
+ NULL)) {
+ printk("palacios: cannot register hypercall 0x%x for function %s (%p)\n",
+ hdata.hcall_nr, hdata.fn, func);
+ return -EFAULT;
+ }
+
+ printk("palacios: hypercall %d (0x%x) registered for function %s (%p)\n",
+ hdata.hcall_nr,hdata.hcall_nr,hdata.fn,func);
+ return 0;
+}
+
+static int vm_hypercall_remove (struct v3_guest *guest,
+ unsigned int cmd,
+ unsigned long arg,
+ void *priv_data) {
+
+ struct hcall_data hdata;
+
+ if (copy_from_user(&hdata,(void __user *) arg, sizeof(struct hcall_data))) {
+ printk("palacios: copy from user in getting input for hypercall remove\n");
+ return -EFAULT;
+ }
+ if (v3_unregister_host_hypercall(guest->v3_ctx,
+ hdata.hcall_nr)) {
+ printk("palacios: cannot unregister hypercall 0x%x\n", hdata.hcall_nr);
+ return -EFAULT;
+ }
+
+ printk("palacios: hypercall %d (0x%x) unregistered\n",
+ hdata.hcall_nr,hdata.hcall_nr);
+
+ return 0;
+}
+
+static int init_host_hypercall (void) {
+ return 0;
+}
+
+
+static int deinit_host_hypercall (void) {
+ return 0;
+}
+
+static int guest_init_host_hypercall (struct v3_guest * guest, void ** vm_data) {
+ add_guest_ctrl(guest, V3_VM_HYPERCALL_ADD, vm_hypercall_add, NULL);
+ add_guest_ctrl(guest, V3_VM_HYPERCALL_REMOVE, vm_hypercall_remove, NULL);
+ return 0;
+}
+
+
+static int guest_deinit_host_hypercall (struct v3_guest * guest, void * vm_data) {
+ return 0;
+}
+
+
+static struct linux_ext host_hypercall_ext = {
+ .name = "HOST_HYPERCALL",
+ .init = init_host_hypercall,
+ .deinit = deinit_host_hypercall,
+ .guest_init = guest_init_host_hypercall,
+ .guest_deinit = guest_deinit_host_hypercall
+};
+
+register_extension(&host_hypercall_ext);
--- /dev/null
+#ifndef __IFACE_HOST_HYPERCALL_H__
+#define __IFACE_HOST_HYPERCALL_H__
+
+#define V3_VM_HYPERCALL_ADD 12124
+#define V3_VM_HYPERCALL_REMOVE 12125
+
+
+#define HCALL_NAME_MAX 256
+
+struct hcall_data {
+ int hcall_nr;
+ char fn[HCALL_NAME_MAX];
+};
+
+#endif
258 -- (IFACE) VGA Console Framebuf Query
10245 -- (IFACE) Connect Host Device
+
+12123 -- (EXT) Inject Top Half Code into Guest
+
+12124 -- (EXT) Register host hypercall (remember to insmod first)
+12124 -- (EXT) Unregister host hypercall
+
+13125 -- (EXT) Inject Environment Variables into Guest Process
+
#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/errno.h>
#include <linux/percpu.h>
#include <linux/fs.h>
MODULE_LICENSE("GPL");
+// Module parameter
+int cpu_list[NR_CPUS] = {};
+int cpu_list_len = 0;
+module_param_array(cpu_list, int, &cpu_list_len, 0644);
+MODULE_PARM_DESC(cpu_list, "Comma-delimited list of CPUs that Palacios will run on");
+
int mod_allocs = 0;
int mod_frees = 0;
palacios_init_mm();
-
// Initialize Palacios
-
palacios_vmm_init();
extern unsigned int cpu_khz;
+extern int cpu_list[NR_CPUS];
+extern int cpu_list_len;
+
/**
* Prints a message to the console.
int palacios_vmm_init( void )
{
+ int num_cpus = num_online_cpus();
+ char * cpu_mask = NULL;
+
+ if (cpu_list_len > 0) {
+ int major = 0;
+ int minor = 0;
+ int i = 0;
+
+ cpu_mask = kmalloc((num_cpus / 8) + 1, GFP_KERNEL);
+ memset(cpu_mask, 0, (num_cpus / 8) + 1);
+
+ for (i = 0; i < cpu_list_len; i++) {
+ if (cpu_list[i] >= num_cpus) {
+ printk("CPU (%d) exceeds number of available CPUs. Ignoring...\n", cpu_list[i]);
+ continue;
+ }
+
+ major = cpu_list[i] / 8;
+ minor = cpu_list[i] % 8;
+ *(cpu_mask + major) |= (0x1 << minor);
+ }
+ }
+
memset(irq_to_guest_map, 0, sizeof(struct v3_vm_info *) * 256);
-
+
printk("palacios_init starting - calling init_v3\n");
- Init_V3(&palacios_os_hooks, num_online_cpus());
+ Init_V3(&palacios_os_hooks, cpu_mask, num_cpus);
return 0;
v3_cons_sc \
v3_stream \
v3_monitor \
+ v3_hypercall
#
# Examples
EXAMPLE_EXECS = v3_user_host_dev_example \
v3_os_debug \
v3_user_keyed_stream_example \
- v3_user_keyed_stream_file \
+ v3_user_keyed_stream_file
#
# Currently experimental things
#
EXPERIMENTAL_EXECS = v3_simulate \
- v3_inject_ecc_scrubber_mce
+ v3_inject_ecc_scrubber_mce \
+ v3_top_inject \
+ v3_env_inject
+
+
+
#
# Things that have been built elsewhere - just for reference here
CFLAGS += -static
endif
+CFLAGS += -I../linux_module
+
CC = gcc
AR = ar
depend:
$(CC) -MM $(CFILES) -I../linux_module > .dependencies
-include .dependencies
--- /dev/null
+/*
+ * V3 Environment Variable Injection Utility
+ * This code allows a user to inject environment variables into a process
+ * marked by a specific binary name in a running guest.
+ *
+ * (c) Kyle C. Hale, 2012
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+
+#include "iface-env-inject.h"
+
+
+static void usage (char* bin) {
+ fprintf(stderr, "Usage: %s /dev/v3-vm<N> env-file inject-point-exe\n", bin);
+}
+
+int main (int argc, char **argv) {
+ char *vm_dev, *env_file, *bin_name;
+ int vm_fd, err, bytes_read, num_strings;
+ struct stat t_stat;
+ struct env_data env;
+ char * strings[MAX_NUM_STRINGS];
+ char tmp_str[MAX_STRING_LEN];
+ int i = 0;
+ FILE * t_fd;
+
+ if (argc < 4) {
+ usage(argv[0]);
+ return -1;
+ }
+
+ vm_dev = argv[1];
+ env_file = argv[2];
+ bin_name = argv[3];
+
+ t_fd = fopen(env_file, "r");
+ if (!t_fd) {
+ fprintf(stderr, "Error opening environment variable file: %s\n", env_file);
+ return -1;
+ }
+
+ /* copy in the vars line by line */
+ while (fgets(tmp_str, MAX_STRING_LEN, t_fd) != NULL) {
+ int len = strlen(tmp_str) - 1;
+ if (tmp_str[len] == '\n')
+ tmp_str[len] = 0;
+ strings[i] = (char*)malloc(MAX_STRING_LEN);
+ strcpy(strings[i], tmp_str);
+ i++;
+ }
+
+ env.num_strings = i;
+ printf("Found %d environment variables to inject\n", i);
+
+ env.strings = (char**) strings;
+
+ if (!bin_name) {
+ fprintf(stderr, "Error: no binary hook provided\n");
+ return -1;
+ }
+
+ strncpy(env.bin_name, bin_name, MAX_STRING_LEN);
+
+ vm_fd = open(vm_dev, O_RDONLY);
+ if (vm_fd == -1) {
+ fprintf(stderr, "Error opening VM device: %s\n", vm_dev);
+ return -1;
+ }
+
+ printf("Transferring control to Palacios\n");
+ err = ioctl(vm_fd, V3_VM_ENV_INJECT, &env);
+ if (err < 0) {
+ fprintf(stderr, "Error providing env var data to palacios\n");
+ return -1;
+ }
+
+ close(t_fd);
+ close(vm_fd);
+ return 0;
+}
--- /dev/null
+/*
+ * V3 Hypercall Add Utility
+ * Allows hypercalls to be added to Palacios at run-time
+ *
+ * (c) Kyle C. Hale, 2011
+ */
+
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <malloc.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+
+#include "../linux_module/iface-host-hypercall.h"
+
+static void usage (char * bin) {
+ fprintf(stderr, "%s /dev/v3-vm<N> add|remove <nr> [function]\n", bin);
+ fprintf(stderr, "<nr> = hypercall number\n"
+ "[function] = kernel symbol to bind to\n"
+ " (defaults to a nop if not given)\n");
+}
+
+int main (int argc, char ** argv) {
+ char * vm_dev = NULL;
+ int vm_fd, err;
+ struct hcall_data hd;
+ enum {ADD,REMOVE} task;
+
+
+ if (argc < 4 || argc>5) {
+ usage(argv[0]);
+ return -1;
+ }
+
+ vm_dev = argv[1];
+
+ hd.hcall_nr = strtol(argv[3], NULL, 0);
+
+
+ if (!strcasecmp(argv[2],"add")) {
+ task=ADD;
+ if (argc==4) {
+ hd.fn[0]=0; // blank
+ } else {
+ strcpy(hd.fn,argv[4]);
+ }
+ } else if (!strcasecmp(argv[2],"remove")) {
+ task=REMOVE;
+ } else {
+ usage(argv[0]);
+ return -1;
+ }
+
+ printf("%s hypercall %d (0x%x) -> '%s' on %s\n",
+ task==ADD ? "Adding" : "Removing",
+ hd.hcall_nr, hd.hcall_nr,
+ task==REMOVE ? "(unimportant)"
+ : strcmp(hd.fn,"") ? hd.fn : "(default nop)", vm_dev);
+
+ vm_fd = open(vm_dev, O_RDONLY);
+ if (vm_fd == -1) {
+ perror("Cannot open VM device");
+ return -1;
+ }
+
+ if (ioctl(vm_fd,
+ task==ADD ? V3_VM_HYPERCALL_ADD : V3_VM_HYPERCALL_REMOVE,
+ &hd) < 0) {
+ perror("Cannot complete task due ioctl failure");
+ close(vm_fd);
+ return -1;
+ }
+
+ close(vm_fd);
+
+ printf("Done.\n");
+
+ return 0;
+}
+
+
perror("Could not open block size file: " SYS_PATH "block_size_bytes");
return -1;
}
-
+
if (read(tmp_fd, tmp_buf, BUF_SIZE) <= 0) {
perror("Could not read block size file: " SYS_PATH "block_size_bytes");
return -1;
}
-
+
close(tmp_fd);
block_size_bytes = strtoll(tmp_buf, NULL, 16);
num_blocks = mem_size_bytes / block_size_bytes;
- if (block_size_bytes % mem_size_bytes) num_blocks++;
+ if (mem_size_bytes % block_size_bytes) num_blocks++;
printf("Looking for %d blocks of memory\n", num_blocks);
struct dirent ** namelist = NULL;
int size = 0;
int i = 0;
+ int j = 0;
+ int last_block = 0;
- bitmap_entries = scandir(SYS_PATH, &namelist, dir_filter, dir_cmp);
+ last_block = scandir(SYS_PATH, &namelist, dir_filter, dir_cmp);
+ bitmap_entries = atoi(namelist[last_block - 1]->d_name + 6) + 1;
size = bitmap_entries / 8;
if (bitmap_entries % 8) size++;
bitmap = malloc(size);
memset(bitmap, 0, size);
- for (i = 0; i < bitmap_entries; i++) {
+ for (i = 0; j < bitmap_entries - 1; i++) {
struct dirent * tmp_dir = namelist[i];
- int major = i / 8;
- int minor = i % 8;
int block_fd = 0;
char status_str[BUF_SIZE];
char fname[BUF_SIZE];
snprintf(fname, BUF_SIZE, "%s%s/removable", SYS_PATH, tmp_dir->d_name);
+ j = atoi(tmp_dir->d_name + 6);
+ int major = j / 8;
+ int minor = j % 8;
+
printf("Checking %s...", fname);
block_fd = open(fname, O_RDONLY);
-
+
if (block_fd == -1) {
printf("Hotpluggable memory not supported...\n");
return -1;
}
close(block_fd);
-
+
if (atoi(status_str) == 1) {
printf("Removable\n");
bitmap[major] |= (0x1 << minor);
// bitmap: bitmap of blocks (1 == allocatable)
// bitmap_entries: number of blocks in the system/number of bits in bitmap
// reg_start: The block index where our allocation will start
-
+
int i = 0;
int run_len = 0;
-
+
for (i = 0; i < bitmap_entries; i++) {
int i_major = i / 8;
int i_minor = i % 8;
-
-
+
if (!(bitmap[i_major] & (0x1 << i_minor))) {
reg_start = i + 1; // skip the region start to next entry
run_len = 0;
continue;
}
-
+
run_len++;
if (run_len >= num_blocks) {
}
}
-
+
if (run_len < num_blocks) {
fprintf(stderr, "Could not find enough consecutive memory blocks... (found %d)\n", run_len);
return -1;
memset(fname, 0, 256);
snprintf(fname, 256, "%smemory%d/state", SYS_PATH, i + reg_start);
-
+
block_file = fopen(fname, "r+");
if (block_file == NULL) {
for (i = 0; i < num_blocks; i++) {
- int block_fd = NULL;
+ int block_fd = 0;
char fname[BUF_SIZE];
char status_buf[BUF_SIZE];
snprintf(fname, BUF_SIZE, "%smemory%d/state", SYS_PATH, i + reg_start);
-
+
block_fd = open(fname, O_RDONLY);
if (block_fd == -1) {
perror("Could not open block file");
return -1;
}
-
+
if (read(block_fd, status_buf, BUF_SIZE) <= 0) {
perror("Could not read block status");
return -1;
fclose(block_file);
}
-
break;
}
{
int v3_fd = 0;
struct v3_mem_region mem;
- unsigned long long num_bytes = num_blocks * block_size_bytes;
- unsigned long long base_addr = reg_start * block_size_bytes;
+ unsigned long long num_bytes = (unsigned long long)(num_blocks) * (unsigned long long)(block_size_bytes);
+ unsigned long long base_addr = (unsigned long long)(reg_start) * (unsigned long long)(block_size_bytes);
- printf("Giving Palacios %dMB of memory at (%p) \n",
+ printf("Giving Palacios %lluMB of memory at (%p) \n",
num_bytes / (1024 * 1024), base_addr);
mem.base_addr = base_addr;
--- /dev/null
+/*
+ * V3 Top Half Injection Utility
+ * This code allows a user to inject a "top half" of code into a running guest.
+ * The "bottom half" (a hypercall handler) is inserted using another utility.
+ *
+ * (c) Kyle C. Hale, 2011
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <malloc.h>
+#include <string.h>
+#include <elf.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+
+#include "iface-code-inject.h"
+
+#define __ELF_INJECT_CLASS 32
+
+#define ElfW(type) _ElfW (Elf, __ELF_INJECT_CLASS, type)
+#define _ElfW(e,w,t) _ElfW_1 (e, w, _##t)
+#define _ElfW_1(e,w,t) e##w##t
+
+
+static void usage (char* bin) {
+ fprintf(stderr, "Usage: %s /dev/v3-vm<N> <inject-code> <code-entry-offset> [inject-point-exe]\n", bin);
+}
+
+
+/* look for PT_DYNAMIC to see if it's dynamically linked */
+static int is_dynamic (ElfW(Ehdr) * ehdr) {
+ int i;
+ ElfW(Phdr) * phdr = (ElfW(Phdr)*)((char*)ehdr + ehdr->e_phoff);
+ ElfW(Phdr) * phdr_cursor;
+
+ phdr_cursor = phdr;
+
+ for (i = 0; i < ehdr->e_phnum; i++, phdr_cursor++) {
+ if (phdr_cursor->p_type == PT_DYNAMIC)
+ return 1;
+ }
+
+ return 0;
+}
+
+
+int main (int argc, char **argv) {
+ char *vm_dev = NULL;
+ char *top_half = NULL;
+ char *bin_file = NULL;
+ int vm_fd, t_fd, err, bytes_read, entry;
+ struct stat t_stat;
+ struct top_half_data elf;
+ ElfW(Ehdr) * elf_hdr;
+
+ if (argc < 4 || argc > 5) {
+ usage(argv[0]);
+ return -1;
+ }
+
+ vm_dev = argv[1];
+ top_half = argv[2];
+ entry = strtol(argv[3], NULL, 0);
+ if (argv[4])
+ bin_file = argv[4];
+
+
+ t_fd = open(top_half, O_RDONLY);
+ if (t_fd == -1) {
+ fprintf(stderr, "Error opening top half .o file: %s\n", top_half);
+ return -1;
+ }
+
+ if (fstat(t_fd, &t_stat) < 0) {
+ fprintf(stderr, "Error: could not stat ELF binary file %s\n", top_half);
+ return -1;
+ }
+
+ memset(&elf, 0, sizeof(struct top_half_data));
+
+ elf.elf_size = t_stat.st_size;
+
+ if (bin_file) {
+ strcpy(elf.bin_file, bin_file);
+ elf.is_exec_hooked = 1;
+ } else {
+ elf.is_exec_hooked = 0;
+ }
+
+
+ /* read in the ELF */
+ elf.elf_data = malloc(elf.elf_size);
+ if (!elf.elf_data) {
+ fprintf(stderr, "Error allocating memory for ELF data\n");
+ return -1;
+ }
+
+ printf("Loading ELF binary...\n");
+ if ((bytes_read = read(t_fd, elf.elf_data, elf.elf_size)) < 0) {
+ fprintf(stderr, "Error loading ELF binary %s\n", top_half);
+ return -1;
+ }
+
+ printf("Loaded. %d Bytes of data read\n", bytes_read);
+ elf_hdr = (ElfW(Ehdr)*)elf.elf_data;
+
+ /* set the entry point */
+ elf.func_offset = entry;
+
+ /* check the ELF magic nr to make sure this is a valid ELF */
+ if (elf_hdr->e_ident[EI_MAG0] != 0x7f ||
+ elf_hdr->e_ident[EI_MAG1] != 'E' ||
+ elf_hdr->e_ident[EI_MAG2] != 'L' ||
+ elf_hdr->e_ident[EI_MAG3] != 'F') {
+
+ fprintf(stderr, "Error: Invalid ELF binary %s\n", top_half);
+ return -1;
+ }
+
+ /* make sure the ELF is an actual executable file */
+ if (elf_hdr->e_type != ET_EXEC) {
+ fprintf(stderr, "Error: ELF must be an executable file %s\n", top_half);
+ return -1;
+ }
+
+ /* is it a dynamically linked executable? */
+ elf.is_dyn = is_dynamic(elf_hdr);
+
+ vm_fd = open(vm_dev, O_RDONLY);
+ if (vm_fd == -1) {
+ fprintf(stderr, "Error opening VM device: %s\n", vm_dev);
+ return -1;
+ }
+
+ printf("Transferring control to Palacios\n");
+ err = ioctl(vm_fd, V3_VM_TOPHALF_INJECT, &elf);
+ if (err < 0) {
+ fprintf(stderr, "Error providing top half to palacios: %s\n", top_half);
+ return -1;
+ }
+
+ free(elf.elf_data);
+ close(t_fd);
+ close(vm_fd);
+
+ return 0;
+}
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Kyle C. Hale <kh@u.northwestern.edu>
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Kyle C. Hale <kh@u.northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __CODE_INJECT_H__
+#define __CODE_INJECT_H__
+
+int v3_insert_code_inject (void *ginfo, void *code, int size, char *bin_file,
+ int is_dyn, int is_exec_hooked, int func_offset);
+
+#ifdef __V3VEE__
+
+#define E_NEED_PF -2
+
+#define MMAP_SIZE 86
+#define MUNMAP_SIZE 22
+#define VMMCALL_SIZE 10
+
+#define PAGES_BACK 50
+#define ELF_MAG_SIZE 4
+#define NO_MMAP 0
+#define MMAP_COMPLETE 1
+
+struct v3_code_injects {
+ struct list_head code_inject_list;
+ struct list_head hooked_code_injects;
+ int active;
+};
+
+
+// TODO: adjust size of boolean members
+struct v3_code_inject_info {
+
+ // pointer to ELF and its size
+ void *code;
+ int code_size;
+
+
+ // indicates this is a hooked inject
+ int is_exec_hooked;
+ char * bin_file;
+
+ // important offsets to ELF sections
+ // for the injected code
+ int func_offset;
+ int got_offset;
+ int plt_offset;
+
+
+ int is_dyn;
+ addr_t code_region_gva;
+ // continuation-style function for
+ // page fault handling
+ struct v3_cont *cont;
+
+
+ // the following are for saving context
+ char *old_code;
+ struct v3_gprs regs;
+ struct v3_ctrl_regs ctrl_regs;
+ uint64_t rip;
+
+ struct list_head inject_node;
+
+ int in_progress;
+};
+
+struct v3_cont {
+ addr_t check_addr;
+ int (*cont_func)(struct guest_info * core, struct v3_code_inject_info * inject,
+ addr_t check);
+};
+
+int v3_remove_code_inject(struct v3_vm_info * vm, struct v3_code_inject_info * inject);
+int v3_do_inject(struct guest_info * core, struct v3_code_inject_info * inject, int mmap_state);
+int v3_do_static_inject(struct guest_info * core, struct v3_code_inject_info * inject,
+ int mmap_state, addr_t region_gva);
+int v3_handle_guest_inject(struct guest_info * core, void * priv_data);
+
+#endif /* ! __V3VEE__ */
+
+#endif
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2012, Kyle C. Hale <kh@u.northwestern.edu>
+ * Copyright (c) 2012, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Kyle C. Hale <kh@u.northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __ENV_INJECT_H__
+#define __ENV_INJECT_H__
+
+int v3_insert_env_inject (void *ginfo, char ** strings, int num_strings, char * bin_name);
+
+#ifdef __V3VEE__
+
+struct v3_env_injects {
+ struct list_head env_inject_list;
+};
+
+
+struct v3_env_inject_info {
+ char ** env_vars;
+ int num_env_vars;
+ struct list_head inject_node;
+ char * bin_name;
+};
+
+int v3_remove_env_inject (struct v3_vm_info * vm, struct v3_env_inject_info * inject);
+
+#endif /* ! __V3VEE__ */
+
+#endif
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Kyle C. Hale <kh@u.norhtwestern.edu>
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Kyle C. Hale <kh@u.northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+#ifndef __EXECVE_HOOK_H__
+#define __EXECVE_HOOK_H__
+
+#ifdef __V3VEE__
+
+struct v3_exec_hooks {
+ struct list_head hook_list;
+ struct hashtable * bin_table;
+};
+
+
+struct exec_hook {
+ int (*handler)(struct guest_info * core, void * priv_data);
+ struct list_head hook_node;
+ void * priv_data;
+};
+
+
+int v3_execve_handler (struct guest_info * core, uint_t syscall_nr, void * priv_data);
+
+#endif
+
+int v3_hook_executable (struct v3_vm_info * vm,
+ const uchar_t * binfile,
+ int (*handler)(struct guest_info * core, void * priv_data),
+ void * priv_data);
+
+int v3_unhook_executable (struct v3_vm_info * core, const uchar_t * binfile);
+#endif
* redistribute, and modify it as specified in the file "V3VEE_LICENSE".
*/
-#ifndef __SYSCALL_HIJACK_H__
-#define __SYSCALL_HIJACK_H__
+#ifndef __PROCESS_ENVIRONMENT_H__
+#define __PROCESS_ENVIRONMENT_H__
+#ifdef __V3VEE__
-int v3_hook_syscall (struct guest_info * core,
- uint_t syscall_nr,
- int (*handler)(struct guest_info * core, uint_t syscall_nr, void * priv_data),
- void * priv_data);
-int v3_hook_passthrough_syscall (struct guest_info * core, uint_t syscall_nr);
+#include <palacios/vmm.h>
+#include <palacios/vmm_types.h>
+#include <palacios/vm_guest.h>
+
+
+struct v3_execve_varchunk {
+ char ** argv;
+ char ** envp;
+ uint_t argc;
+ uint_t envc;
+ uint_t bytes;
+ int active;
+};
+
+int v3_replace_arg (struct guest_info * core, uint_t argnum, const char * newval);
+int v3_replace_env (struct guest_info * core, const char * envname, const char * newval);
+
+int v3_inject_strings (struct guest_info * core, const char ** argstrs, const char ** envstrs, uint_t argcnt, uint_t envcnt);
+
+addr_t v3_prepare_guest_stack (struct guest_info * core, uint_t bytes_needed);
#endif
+
+#endif
+
#include <palacios/vmm.h>
+#define SW_INTR_SYSCALL_VEC 0x80
int v3_handle_swintr (struct guest_info * core);
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Kyle C. Hale <kh@u.northwestern.edu>
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Kyle C. Hale <kh@u.northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __SYSCALL_HIJACK_H__
+#define __SYSCALL_HIJACK_H__
+
+#define STAR_MSR 0xc0000081 /* Legacy mode SYSCALL target */
+#define LSTAR_MSR 0xc0000082 /* Long mode SYSCALL target */
+#define CSTAR_MSR 0xc0000083 /* compat mode SYSCALL target */
+#define SF_MASK_MSR 0xc0000084 /* EFLAGS mask for syscall */
+#define SYSENTER_CS_MSR 0x00000174 /* SYSENTER/EXIT are for legacy mode only on AMD */
+#define SYSENTER_ESP_MSR 0x00000175
+#define SYSENTER_EIP_MSR 0x00000176
+
+/* Intel specific */
+#define IA32_SYSENTER_CS_MSR 0x00000174
+#define IA32_SYSENTER_ESP_MSR 0x00000175
+#define IA32_SYSENTER_EIP_MSR 0x00000176
+
+#define MAX_CHARS 256
+#ifndef max
+ #define max(a, b) ( ((a) > (b)) ? (a) : (b) )
+#endif
+
+#define SYSCALL_INT_VECTOR 0x80
+#define SYSCALL_CPUID_NUM 0x80000001
+#define SYSENTER_CPUID_NUM 0x00000001
+
+#define SYSCALL_MAGIC_ADDR 0xffffffffffffffff
+
+#define KERNEL_PHYS_LOAD_ADDR 0x1000000
+
+
+struct v3_syscall_info {
+ uint64_t target_addr;
+ uint8_t syscall_map_injected;
+ char * syscall_page_backup;
+ uint8_t * syscall_map;
+ addr_t syscall_stub;
+ // state save area
+ addr_t ssa;
+};
+
+int v3_hook_syscall (struct guest_info * core,
+ uint_t syscall_nr,
+ int (*handler)(struct guest_info * core, uint_t syscall_nr, void * priv_data),
+ void * priv_data);
+
+int v3_hook_passthrough_syscall (struct guest_info * core, uint_t syscall_nr);
+int v3_syscall_handler (struct guest_info * core, uint8_t vector, void * priv_data);
+
+#endif
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Kyle C. Hale <kh@u.northwestern.edu>
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Kyle C. Hale <kh@u.northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __SYSCALL_REF_H__
+#define __SYSCALL_REF_H__
+
+char * get_linux_syscall_name32 (uint_t syscall_nr);
+char * get_linux_syscall_name64 (uint_t syscall_nr);
+
+
+/* 32bit syscalls */
+#define SYS32_RESTART_SYSCALL 0
+#define SYS32_EXIT 1
+#define SYS32_FORK 2
+#define SYS32_READ 3
+#define SYS32_WRITE 4
+#define SYS32_OPEN 5
+#define SYS32_CLOSE 6
+#define SYS32_WAITPID 7
+#define SYS32_CREAT 8
+#define SYS32_LINK 9
+#define SYS32_UNLINK 10
+#define SYS32_EXECVE 11
+#define SYS32_CHDIR 12
+#define SYS32_TIME 13
+#define SYS32_MKNOD 14
+#define SYS32_CHMOD 15
+#define SYS32_LCHOWN 16
+#define SYS32_BREAK 17
+#define SYS32_OLDSTAT 18
+#define SYS32_LSEEK 19
+#define SYS32_GETPID 20
+#define SYS32_MOUNT 21
+#define SYS32_UMOUNT 22
+#define SYS32_SETUID 23
+#define SYS32_GETUID 24
+#define SYS32_STIME 25
+#define SYS32_PTRACE 26
+#define SYS32_ALARM 27
+#define SYS32_OLDFSTAT 28
+#define SYS32_PAUSE 29
+#define SYS32_UTIME 30
+#define SYS32_STTY 31
+#define SYS32_GTTY 32
+#define SYS32_ACCESS 33
+#define SYS32_NICE 34
+#define SYS32_FTIME 35
+#define SYS32_SYNC 36
+#define SYS32_KILL 37
+#define SYS32_RENAME 38
+#define SYS32_MKDIR 39
+#define SYS32_RMDIR 40
+#define SYS32_DUP 41
+#define SYS32_PIPE 42
+#define SYS32_TIMES 43
+#define SYS32_PROF 44
+#define SYS32_BRK 45
+#define SYS32_SETGID 46
+#define SYS32_GETGID 47
+#define SYS32_SIGNAL 48
+#define SYS32_GETEUID 49
+#define SYS32_GETEGID 50
+#define SYS32_ACCT 51
+#define SYS32_UMOUNT2 52
+#define SYS32_LOCK 53
+#define SYS32_IOCTL 54
+#define SYS32_FCNTL 55
+#define SYS32_MPX 56
+#define SYS32_SETPGID 57
+#define SYS32_ULIMIT 58
+#define SYS32_OLDOLDUNAME 59
+#define SYS32_UMASK 60
+#define SYS32_CHROOT 61
+#define SYS32_USTAT 62
+#define SYS32_DUP2 63
+#define SYS32_GETPPID 64
+#define SYS32_GETPGRP 65
+#define SYS32_SETSID 66
+#define SYS32_SIGACTION 67
+#define SYS32_SGETMASK 68
+#define SYS32_SSETMASK 69
+#define SYS32_SETREUID 70
+#define SYS32_SETREGID 71
+#define SYS32_SIGSUSPEND 72
+#define SYS32_SIGPENDING 73
+#define SYS32_SETHOSTNAME 74
+#define SYS32_SETRLIMIT 75
+#define SYS32_GETRLIMIT 76
+#define SYS32_GETRUSAGE 77
+#define SYS32_GETTIMEOFDAY 78
+#define SYS32_SETTIMEOFDAY 79
+#define SYS32_GETGROUPS 80
+#define SYS32_SETGROUPS 81
+#define SYS32_SELECT 82
+#define SYS32_SYMLINK 83
+#define SYS32_OLDLSTAT 84
+#define SYS32_READLINK 85
+#define SYS32_USELIB 86
+#define SYS32_SWAPON 87
+#define SYS32_REBOOT 88
+#define SYS32_READDIR 89
+#define SYS32_MMAP 90
+#define SYS32_MUNMAP 91
+#define SYS32_TRUNCATE 92
+#define SYS32_FTRUNCATE 93
+#define SYS32_FCHMOD 94
+#define SYS32_FCHOWN 95
+#define SYS32_GETPRIORITY 96
+#define SYS32_SETPRIORITY 97
+#define SYS32_PROFIL 98
+#define SYS32_STATFS 99
+#define SYS32_FSTATFS 100
+#define SYS32_IOPERM 101
+#define SYS32_SOCKETCALL 102
+#define SYS32_SYSLOG 103
+#define SYS32_SETITIMER 104
+#define SYS32_GETITIMER 105
+#define SYS32_STAT 106
+#define SYS32_LSTAT 107
+#define SYS32_FSTAT 108
+#define SYS32_OLDUNAME 109
+#define SYS32_IOPL 110
+#define SYS32_VHANGUP 111
+#define SYS32_IDLE 112
+#define SYS32_VM86OLD 113
+#define SYS32_WAIT4 114
+#define SYS32_SWAPOFF 115
+#define SYS32_SYSINFO 116
+#define SYS32_IPC 117
+#define SYS32_FSYNC 118
+#define SYS32_SIGRETURN 119
+#define SYS32_CLONE 120
+#define SYS32_SETDOMAINNAME 121
+#define SYS32_UNAME 122
+#define SYS32_MODIFY_LDT 123
+#define SYS32_ADJTIMEX 124
+#define SYS32_MPROTECT 125
+#define SYS32_SIGPROCMASK 126
+#define SYS32_CREATE_MODULE 127
+#define SYS32_INIT_MODULE 128
+#define SYS32_DELETE_MODULE 129
+#define SYS32_GET_KERNEL_SYMS 130
+#define SYS32_QUOTACTL 131
+#define SYS32_GETPGID 132
+#define SYS32_FCHDIR 133
+#define SYS32_BDFLUSH 134
+#define SYS32_SYSFS 135
+#define SYS32_PERSONALITY 136
+#define SYS32_AFS_SYSCALL 137
+#define SYS32_SETFSUID 138
+#define SYS32_SETFSGID 139
+#define SYS32__LLSEEK 140
+#define SYS32_GETDENTS 141
+#define SYS32__NEWSELECT 142
+#define SYS32_FLOCK 143
+#define SYS32_MSYNC 144
+#define SYS32_READV 145
+#define SYS32_WRITEV 146
+#define SYS32_GETSID 147
+#define SYS32_FDATASYNC 148
+#define SYS32__SYSCTL 149
+#define SYS32_MLOCK 150
+#define SYS32_MUNLOCK 151
+#define SYS32_MLOCKALL 152
+#define SYS32_MUNLOCKALL 153
+#define SYS32_SCHED_SETPARAM 154
+#define SYS32_SCHED_GETPARAM 155
+#define SYS32_SCHED_SETSCHEDULER 156
+#define SYS32_SCHED_GETSCHEDULER 157
+#define SYS32_SCHED_YIELD 158
+#define SYS32_SCHED_GET_PRIORITY_MAX 159
+#define SYS32_SCHED_GET_PRIORITY_MIN 160
+#define SYS32_SCHED_RR_GET_INTERVAL 161
+#define SYS32_NANOSLEEP 162
+#define SYS32_MREMAP 163
+#define SYS32_SETRESUID 164
+#define SYS32_GETRESUID 165
+#define SYS32_VM86 166
+#define SYS32_QUERY_MODULE 167
+#define SYS32_POLL 168
+#define SYS32_NFSSERVCTL 169
+#define SYS32_SETRESGID 170
+#define SYS32_GETRESGID 171
+#define SYS32_PRCTL 172
+#define SYS32_RT_SIGRETURN 173
+#define SYS32_RT_SIGACTION 174
+#define SYS32_RT_SIGPROCMASK 175
+#define SYS32_RT_SIGPENDING 176
+#define SYS32_RT_SIGTIMEDWAIT 177
+#define SYS32_RT_SIGQUEUEINFO 178
+#define SYS32_RT_SIGSUSPEND 179
+#define SYS32_PREAD64 180
+#define SYS32_PWRITE64 181
+#define SYS32_CHOWN 182
+#define SYS32_GETCWD 183
+#define SYS32_CAPGET 184
+#define SYS32_CAPSET 185
+#define SYS32_SIGALTSTACK 186
+#define SYS32_SENDFILE 187
+#define SYS32_GETPMSG 188
+#define SYS32_PUTPMSG 189
+#define SYS32_VFORK 190
+#define SYS32_UGETRLIMIT 191
+#define SYS32_MMAP2 192
+#define SYS32_TRUNCATE64 193
+#define SYS32_FTRUNCATE64 194
+#define SYS32_STAT64 195
+#define SYS32_LSTAT64 196
+#define SYS32_FSTAT64 197
+#define SYS32_LCHOWN32 198
+#define SYS32_GETUID32 199
+#define SYS32_GETGID32 200
+#define SYS32_GETEUID32 201
+#define SYS32_GETEGID32 202
+#define SYS32_SETREUID32 203
+#define SYS32_SETREGID32 204
+#define SYS32_GETGROUPS32 205
+#define SYS32_SETGROUPS32 206
+#define SYS32_FCHOWN32 207
+#define SYS32_SETRESUID32 208
+#define SYS32_GETRESUID32 209
+#define SYS32_SETRESGID32 210
+#define SYS32_GETRESGID32 211
+#define SYS32_CHOWN32 212
+#define SYS32_SETUID32 213
+#define SYS32_SETGID32 214
+#define SYS32_SETFSUID32 215
+#define SYS32_SETFSGID32 216
+#define SYS32_PIVOT_ROOT 217
+#define SYS32_MINCORE 218
+#define SYS32_MADVISE1 219
+#define SYS32_GETDENTS64 220
+#define SYS32_FCNTL64 221
+#define SYS32_GETTID 224
+#define SYS32_READAHEAD 225
+#define SYS32_SETXATTR 226
+#define SYS32_LSETXATTR 227
+#define SYS32_FSETXATTR 228
+#define SYS32_GETXATTR 229
+#define SYS32_LGETXATTR 230
+#define SYS32_FGETXATTR 231
+#define SYS32_LISTXATTR 232
+#define SYS32_LLISTXATTR 233
+#define SYS32_FLISTXATTR 234
+#define SYS32_REMOVEXATTR 235
+#define SYS32_LREMOVEXATTR 236
+#define SYS32_FREMOVEXATTR 237
+#define SYS32_TKILL 238
+#define SYS32_SENDFILE64 239
+#define SYS32_FUTEX 240
+#define SYS32_SCHED_SETAFFINITY 241
+#define SYS32_SCHED_GETAFFINITY 242
+#define SYS32_SET_THREAD_AREA 243
+#define SYS32_GET_THREAD_AREA 244
+#define SYS32_IO_SETUP 245
+#define SYS32_IO_DESTROY 246
+#define SYS32_IO_GETEVENTS 247
+#define SYS32_IO_SUBMIT 248
+#define SYS32_IO_CANCEL 249
+#define SYS32_FADVISE64 250
+#define SYS32_EXIT_GROUP 252
+#define SYS32_LOOKUP_DCOOKIE 253
+#define SYS32_EPOLL_CREATE 254
+#define SYS32_EPOLL_CTL 255
+#define SYS32_EPOLL_WAIT 256
+#define SYS32_REMAP_FILE_PAGES 257
+#define SYS32_SET_TID_ADDRESS 258
+#define SYS32_TIMER_CREATE 259
+#define SYS32_TIMER_SETTIME 260
+#define SYS32_TIMER_GETTIME 261
+#define SYS32_TIMER_GETOVERRUN 262
+#define SYS32_TIMER_DELETE 263
+#define SYS32_CLOCK_SETTIME 264
+#define SYS32_CLOCK_GETTIME 265
+#define SYS32_CLOCK_GETRES 266
+#define SYS32_CLOCK_NANOSLEEP 267
+#define SYS32_STATFS64 268
+#define SYS32_FSTATFS64 269
+#define SYS32_TGKILL 270
+#define SYS32_UTIMES 271
+#define SYS32_FADVISE64_64 272
+#define SYS32_VSERVER 273
+#define SYS32_MBIND 274
+#define SYS32_GET_MEMPOLICY 275
+#define SYS32_SET_MEMPOLICY 276
+#define SYS32_MQ_OPEN 277
+#define SYS32_MQ_UNLINK 278
+#define SYS32_MQ_TIMEDSEND 279
+#define SYS32_MQ_TIMEDRECEIVE 280
+#define SYS32_MQ_NOTIFY 281
+#define SYS32_MQ_GETSETATTR 282
+#define SYS32_KEXEC_LOAD 283
+#define SYS32_WAITID 284
+#define SYS32_SYS32_SETALTROOT 285
+#define SYS32_ADD_KEY 286
+#define SYS32_REQUEST_KEY 287
+#define SYS32_KEYCTL 288
+#define SYS32_IOPRIO_SET 289
+#define SYS32_IOPRIO_GET 290
+#define SYS32_INOTIFY_INIT 291
+#define SYS32_INOTIFY_ADD_WATCH 292
+#define SYS32_INOTIFY_RM_WATCH 293
+#define SYS32_MIGRATE_PAGES 294
+#define SYS32_OPENAT 295
+#define SYS32_MKDIRAT 296
+#define SYS32_MKNODAT 297
+#define SYS32_FCHOWNAT 298
+#define SYS32_FUTIMESAT 299
+#define SYS32_FSTATAT64 300
+#define SYS32_UNLINKAT 301
+#define SYS32_RENAMEAT 302
+#define SYS32_LINKAT 303
+#define SYS32_SYMLINKAT 304
+#define SYS32_READLINKAT 305
+#define SYS32_FCHMODAT 306
+#define SYS32_FACCESSAT 307
+#define SYS32_PSELECT6 308
+#define SYS32_PPOLL 309
+#define SYS32_UNSHARE 310
+#define SYS32_SET_ROBUST_LIST 311
+#define SYS32_GET_ROBUST_LIST 312
+#define SYS32_SPLICE 313
+#define SYS32_SYNC_FILE_RANGE 314
+#define SYS32_TEE 315
+#define SYS32_VMSPLICE 316
+#define SYS32_MOVE_PAGES 317
+#define SYS32_GETCPU 318
+#define SYS32_EPOLL_PWAIT 319
+#define SYS32_UTIMENSAT 320
+#define SYS32_SIGNALFD 321
+#define SYS32_TIMERFD_CREATE 322
+#define SYS32_EVENTFD 323
+#define SYS32_FALLOCATE 324
+#define SYS32_TIMERFD_SETTIME 325
+#define SYS32_TIMERFD_GETTIME 326
+#define SYS32_SIGNALFD4 327
+#define SYS32_EVENTFD2 328
+#define SYS32_EPOLL_CREATE1 329
+#define SYS32_DUP3 330
+#define SYS32_PIPE2 331
+#define SYS32_INOTIFY_INIT1 332
+#define SYS32_PREADV 333
+#define SYS32_PWRITEV 334
+#define SYS32_RT_TGSIGQUEUEINFO 335
+#define SYS32_PERF_EVENT_OPEN 336
+
+/* 64bit syscalls */
+#define SYS64_READ 0
+#define SYS64_WRITE 1
+#define SYS64_OPEN 2
+#define SYS64_CLOSE 3
+#define SYS64_STAT 4
+#define SYS64_FSTAT 5
+#define SYS64_LSTAT 6
+#define SYS64_POLL 7
+#define SYS64_LSEEK 8
+#define SYS64_MMAP 9
+#define SYS64_MPROTECT 10
+#define SYS64_MUNMAP 11
+#define SYS64_BRK 12
+#define SYS64_RT_SIGACTION 13
+#define SYS64_RT_SIGPROCMASK 14
+#define SYS64_RT_SIGRETURN 15
+#define SYS64_IOCTL 16
+#define SYS64_PREAD64 17
+#define SYS64_PWRITE64 18
+#define SYS64_READV 19
+#define SYS64_WRITEV 20
+#define SYS64_ACCESS 21
+#define SYS64_PIPE 22
+#define SYS64_SELECT 23
+#define SYS64_SCHED_YIELD 24
+#define SYS64_MREMAP 25
+#define SYS64_MSYNC 26
+#define SYS64_MINCORE 27
+#define SYS64_MADVISE 28
+#define SYS64_SHMGET 29
+#define SYS64_SHMAT 30
+#define SYS64_SHMCTL 31
+#define SYS64_DUP 32
+#define SYS64_DUP2 33
+#define SYS64_PAUSE 34
+#define SYS64_NANOSLEEP 35
+#define SYS64_GETITIMER 36
+#define SYS64_ALARM 37
+#define SYS64_SETITIMER 38
+#define SYS64_GETPID 39
+#define SYS64_SENDFILE 40
+#define SYS64_SOCKET 41
+#define SYS64_CONNECT 42
+#define SYS64_ACCEPT 43
+#define SYS64_SENDTO 44
+#define SYS64_RECVFROM 45
+#define SYS64_SENDMSG 46
+#define SYS64_RECVMSG 47
+#define SYS64_SHUTDOWN 48
+#define SYS64_BIND 49
+#define SYS64_LISTEN 50
+#define SYS64_GETSOCKNAME 51
+#define SYS64_GETPEERNAME 52
+#define SYS64_SOCKETPAIR 53
+#define SYS64_SETSOCKOPT 54
+#define SYS64_GETSOCKOPT 55
+#define SYS64_CLONE 56
+#define SYS64_FORK 57
+#define SYS64_VFORK 58
+#define SYS64_EXECVE 59
+#define SYS64_EXIT 60
+#define SYS64_WAIT4 61
+#define SYS64_KILL 62
+#define SYS64_UNAME 63
+#define SYS64_SEMGET 64
+#define SYS64_SEMOP 65
+#define SYS64_SEMCTL 66
+#define SYS64_SHMDT 67
+#define SYS64_MSGGET 68
+#define SYS64_MSGSND 69
+#define SYS64_MSGRCV 70
+#define SYS64_MSGCTL 71
+#define SYS64_FCNTL 72
+#define SYS64_FLOCK 73
+#define SYS64_FSYNC 74
+#define SYS64_FDATASYNC 75
+#define SYS64_TRUNCATE 76
+#define SYS64_FTRUNCATE 77
+#define SYS64_GETDENTS 78
+#define SYS64_GETCWD 79
+#define SYS64_CHDIR 80
+#define SYS64_FCHDIR 81
+#define SYS64_RENAME 82
+#define SYS64_MKDIR 83
+#define SYS64_RMDIR 84
+#define SYS64_CREAT 85
+#define SYS64_LINK 86
+#define SYS64_UNLINK 87
+#define SYS64_SYMLINK 88
+#define SYS64_READLINK 89
+#define SYS64_CHMOD 90
+#define SYS64_FCHMOD 91
+#define SYS64_CHOWN 92
+#define SYS64_FCHOWN 93
+#define SYS64_LCHOWN 94
+#define SYS64_UMASK 95
+#define SYS64_GETTIMEOFDAY 96
+#define SYS64_GETRLIMIT 97
+#define SYS64_GETRUSAGE 98
+#define SYS64_SYSINFO 99
+#define SYS64_TIMES 100
+#define SYS64_PTRACE 101
+#define SYS64_GETUID 102
+#define SYS64_SYSLOG 103
+#define SYS64_GETGID 104
+#define SYS64_SETUID 105
+#define SYS64_SETGID 106
+#define SYS64_GETEUID 107
+#define SYS64_GETEGID 108
+#define SYS64_SETPGID 109
+#define SYS64_GETPPID 110
+#define SYS64_GETPGRP 111
+#define SYS64_SETSID 112
+#define SYS64_SETREUID 113
+#define SYS64_SETREGID 114
+#define SYS64_GETGROUPS 115
+#define SYS64_SETGROUPS 116
+#define SYS64_SETRESUID 117
+#define SYS64_GETRESUID 118
+#define SYS64_SETRESGID 119
+#define SYS64_GETRESGID 120
+#define SYS64_GETPGID 121
+#define SYS64_SETFSUID 122
+#define SYS64_SETFSGID 123
+#define SYS64_GETSID 124
+#define SYS64_CAPGET 125
+#define SYS64_CAPSET 126
+#define SYS64_RT_SIGPENDING 127
+#define SYS64_RT_SIGTIMEDWAIT 128
+#define SYS64_RT_SIGQUEUEINFO 129
+#define SYS64_RT_SIGSUSPEND 130
+#define SYS64_SIGALTSTACK 131
+#define SYS64_UTIME 132
+#define SYS64_MKNOD 133
+#define SYS64_USELIB 134
+#define SYS64_PERSONALITY 135
+#define SYS64_USTAT 136
+#define SYS64_STATFS 137
+#define SYS64_FSTATFS 138
+#define SYS64_SYSFS 139
+#define SYS64_GETPRIORITY 140
+#define SYS64_SETPRIORITY 141
+#define SYS64_SCHED_SETPARAM 142
+#define SYS64_SCHED_GETPARAM 143
+#define SYS64_SCHED_SETSCHEDULER 144
+#define SYS64_SCHED_GETSCHEDULER 145
+#define SYS64_SCHED_GET_PRIORITY_MAX 146
+#define SYS64_SCHED_GET_PRIORITY_MIN 147
+#define SYS64_SCHED_RR_GET_INTERVAL 148
+#define SYS64_MLOCK 149
+#define SYS64_MUNLOCK 150
+#define SYS64_MLOCKALL 151
+#define SYS64_MUNLOCKALL 152
+#define SYS64_VHANGUP 153
+#define SYS64_MODIFY_LDT 154
+#define SYS64_PIVOT_ROOT 155
+#define SYS64__SYSCTL 156
+#define SYS64_PRCTL 157
+#define SYS64_ARCH_PRCTL 158
+#define SYS64_ADJTIMEX 159
+#define SYS64_SETRLIMIT 160
+#define SYS64_CHROOT 161
+#define SYS64_SYNC 162
+#define SYS64_ACCT 163
+#define SYS64_SETTIMEOFDAY 164
+#define SYS64_MOUNT 165
+#define SYS64_UMOUNT2 166
+#define SYS64_SWAPON 167
+#define SYS64_SWAPOFF 168
+#define SYS64_REBOOT 169
+#define SYS64_SETHOSTNAME 170
+#define SYS64_SETDOMAINNAME 171
+#define SYS64_IOPL 172
+#define SYS64_IOPERM 173
+#define SYS64_CREATE_MODULE 174
+#define SYS64_INIT_MODULE 175
+#define SYS64_DELETE_MODULE 176
+#define SYS64_GET_KERNEL_SYMS 177
+#define SYS64_QUERY_MODULE 178
+#define SYS64_QUOTACTL 179
+#define SYS64_NFSSERVCTL 180
+#define SYS64_GETPMSG 181
+#define SYS64_PUTPMSG 182
+#define SYS64_AFS_SYSCALL 183
+#define SYS64_TUXCALL 184
+#define SYS64_SECURITY 185
+#define SYS64_GETTID 186
+#define SYS64_READAHEAD 187
+#define SYS64_SETXATTR 188
+#define SYS64_LSETXATTR 189
+#define SYS64_FSETXATTR 190
+#define SYS64_GETXATTR 191
+#define SYS64_LGETXATTR 192
+#define SYS64_FGETXATTR 193
+#define SYS64_LISTXATTR 194
+#define SYS64_LLISTXATTR 195
+#define SYS64_FLISTXATTR 196
+#define SYS64_REMOVEXATTR 197
+#define SYS64_LREMOVEXATTR 198
+#define SYS64_FREMOVEXATTR 199
+#define SYS64_TKILL 200
+#define SYS64_TIME 201
+#define SYS64_FUTEX 202
+#define SYS64_SCHED_SETAFFINITY 203
+#define SYS64_SCHED_GETAFFINITY 204
+#define SYS64_SET_THREAD_AREA 205
+#define SYS64_IO_SETUP 206
+#define SYS64_IO_DESTROY 207
+#define SYS64_IO_GETEVENTS 208
+#define SYS64_IO_SUBMIT 209
+#define SYS64_IO_CANCEL 210
+#define SYS64_GET_THREAD_AREA 211
+#define SYS64_LOOKUP_DCOOKIE 212
+#define SYS64_EPOLL_CREATE 213
+#define SYS64_EPOLL_CTL_OLD 214
+#define SYS64_EPOLL_WAIT_OLD 215
+#define SYS64_REMAP_FILE_PAGES 216
+#define SYS64_GETDENTS64 217
+#define SYS64_SET_TID_ADDRESS 218
+#define SYS64_RESTART_SYSCALL 219
+#define SYS64_SEMTIMEDOP 220
+#define SYS64_FADVISE64 221
+#define SYS64_TIMER_CREATE 222
+#define SYS64_TIMER_SETTIME 223
+#define SYS64_TIMER_GETTIME 224
+#define SYS64_TIMER_GETOVERRUN 225
+#define SYS64_TIMER_DELETE 226
+#define SYS64_CLOCK_SETTIME 227
+#define SYS64_CLOCK_GETTIME 228
+#define SYS64_CLOCK_GETRES 229
+#define SYS64_CLOCK_NANOSLEEP 230
+#define SYS64_EXIT_GROUP 231
+#define SYS64_EPOLL_WAIT 232
+#define SYS64_EPOLL_CTL 233
+#define SYS64_TGKILL 234
+#define SYS64_UTIMES 235
+#define SYS64_VSERVER 236
+#define SYS64_MBIND 237
+#define SYS64_SET_MEMPOLICY 238
+#define SYS64_GET_MEMPOLICY 239
+#define SYS64_MQ_OPEN 240
+#define SYS64_MQ_UNLINK 241
+#define SYS64_MQ_TIMEDSEND 242
+#define SYS64_MQ_TIMEDRECEIVE 243
+#define SYS64_MQ_NOTIFY 244
+#define SYS64_MQ_GETSETATTR 245
+#define SYS64_KEXEC_LOAD 246
+#define SYS64_WAITID 247
+#define SYS64_ADD_KEY 248
+#define SYS64_REQUEST_KEY 249
+#define SYS64_KEYCTL 250
+#define SYS64_IOPRIO_SET 251
+#define SYS64_IOPRIO_GET 252
+#define SYS64_INOTIFY_INIT 253
+#define SYS64_INOTIFY_ADD_WATCH 254
+#define SYS64_INOTIFY_RM_WATCH 255
+#define SYS64_MIGRATE_PAGES 256
+#define SYS64_OPENAT 257
+#define SYS64_MKDIRAT 258
+#define SYS64_MKNODAT 259
+#define SYS64_FCHOWNAT 260
+#define SYS64_FUTIMESAT 261
+#define SYS64_NEWFSTATAT 262
+#define SYS64_UNLINKAT 263
+#define SYS64_RENAMEAT 264
+#define SYS64_LINKAT 265
+#define SYS64_SYMLINKAT 266
+#define SYS64_READLINKAT 267
+#define SYS64_FCHMODAT 268
+#define SYS64_FACCESSAT 269
+#define SYS64_PSELECT6 270
+#define SYS64_PPOLL 271
+#define SYS64_UNSHARE 272
+#define SYS64_SET_ROBUST_LIST 273
+#define SYS64_GET_ROBUST_LIST 274
+#define SYS64_SPLICE 275
+#define SYS64_TEE 276
+#define SYS64_SYNC_FILE_RANGE 277
+#define SYS64_VMSPLICE 278
+#define SYS64_MOVE_PAGES 279
+#define SYS64_UTIMENSAT 280
+#define SYS64_EPOLL_PWAIT 281
+#define SYS64_SIGNALFD 282
+#define SYS64_TIMERFD_CREATE 283
+#define SYS64_EVENTFD 284
+#define SYS64_FALLOCATE 285
+#define SYS64_TIMERFD_SETTIME 286
+#define SYS64_TIMERFD_GETTIME 287
+#define SYS64_ACCEPT4 288
+#define SYS64_SIGNALFD4 289
+#define SYS64_EVENTFD2 290
+#define SYS64_EPOLL_CREATE1 291
+#define SYS64_DUP3 292
+#define SYS64_PIPE2 293
+#define SYS64_INOTIFY_INIT1 294
+#define SYS64_PREADV 295
+#define SYS64_PWRITEV 296
+#define SYS64_RT_TGSIGQUEUEINFO 297
+#define SYS64_PERF_EVENT_OPEN 298
+
+
+#endif
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2012, Kyle C. Hale <kh@u.northwestern.edu>
+ * Copyright (c) 2012, Peter Dinda <pdinda@northwestern.edu>
+ * Copyright (c) 2012, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Authors: Kyle C. Hale <kh@u.northwestern.edu>
+ * Peter Dinda <pdinda@northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __VMM_HOST_HYPERCALL_H__
+#define __VMM_HOST_HYPERCALL_H__
+
+#include <palacios/vmm.h>
+
+/* palacios v3_vm_info struct is opaque to the host */
+typedef void * host_vm_info_t;
+
+typedef void * palacios_core_t;
+
+
+// Notice that host implementation is itself
+// palacios-specific at this point. It must be
+// include the palacios-headers needed to understand
+// a guest_info, etc.
+//
+// The idea here is to make it possible to create something
+// like a linux kernel module, that is compiled against
+// palacios itself, but inserted after palacios.
+// The module then make full use of palacios functions
+// to manipulate guest state, as if it were a part of
+// palacios
+//
+
+#define GET_SET_REG_DECL(R) \
+ uint64_t (*get_##R)(palacios_core_t core); \
+ void (*set_##R)(palacios_core_t core, uint64_t val);
+
+
+
+struct guest_accessors {
+ // You can read/write the GPRs
+ GET_SET_REG_DECL(rax)
+ GET_SET_REG_DECL(rbx)
+ GET_SET_REG_DECL(rcx)
+ GET_SET_REG_DECL(rdx)
+ GET_SET_REG_DECL(rsi)
+ GET_SET_REG_DECL(rdi)
+ GET_SET_REG_DECL(rbp)
+ GET_SET_REG_DECL(rsp)
+ GET_SET_REG_DECL(r8)
+ GET_SET_REG_DECL(r9)
+ GET_SET_REG_DECL(r10)
+ GET_SET_REG_DECL(r11)
+ GET_SET_REG_DECL(r12)
+ GET_SET_REG_DECL(r13)
+ GET_SET_REG_DECL(r14)
+ GET_SET_REG_DECL(r15)
+
+ GET_SET_REG_DECL(rip);
+ GET_SET_REG_DECL(rflags)
+ GET_SET_REG_DECL(cr0)
+ GET_SET_REG_DECL(cr2)
+ GET_SET_REG_DECL(cr3)
+ GET_SET_REG_DECL(cr4)
+ GET_SET_REG_DECL(cr8)
+ GET_SET_REG_DECL(efer)
+
+ int (*gva_to_hva)(palacios_core_t core, uint64_t gva, uint64_t *hva);
+ int (*gva_to_gpa)(palacios_core_t core, uint64_t gva, uint64_t *gpa);
+ int (*gpa_to_hva)(palacios_core_t core, uint64_t gpa, uint64_t *hva);
+
+ int (*read_gva)(palacios_core_t core, uint64_t addr,
+ int n, void *dest);
+ int (*read_gpa)(palacios_core_t core, uint64_t addr,
+ int n, void *dest);
+
+ int (*write_gva)(palacios_core_t core, uint64_t addr,
+ int n, void *src);
+ int (*write_gpa)(palacios_core_t core, uint64_t addr,
+ int n, void *src);
+};
+
+
+
+int v3_register_host_hypercall(host_vm_info_t * vm,
+ unsigned int hypercall_id,
+ int (*hypercall)(palacios_core_t core,
+ unsigned int hcall_id,
+ struct guest_accessors *accessors,
+ void *priv_data),
+ void *priv_data);
+
+int v3_unregister_host_hypercall(host_vm_info_t *vm,
+ unsigned int hypercall_id);
+
+#ifdef __V3VEE__
+
+#endif /* !__V3VEE__ */
+#endif
+
struct v3_ctrl_regs ctrl_regs;
struct v3_dbg_regs dbg_regs;
struct v3_segments segments;
+ struct v3_msrs msrs;
void * vmm_data;
-int v3_read_gva_memory(struct guest_info * guest_info, addr_t guest_va, int count, uchar_t * dest);
-int v3_read_gpa_memory(struct guest_info * guest_info, addr_t guest_pa, int count, uchar_t * dest);
-int v3_write_gpa_memory(struct guest_info * guest_info, addr_t guest_pa, int count, uchar_t * src);
-// TODO int write_guest_va_memory(struct guest_info * guest_info, addr_t guest_va, int count, char * src);
+int v3_read_gva_memory(struct guest_info * guest_info, addr_t guest_va, int count, uint8_t * dest);
+int v3_read_gpa_memory(struct guest_info * guest_info, addr_t guest_pa, int count, uint8_t * dest);
+int v3_write_gpa_memory(struct guest_info * guest_info, addr_t guest_pa, int count, uint8_t * src);
+int v3_write_gva_memory(struct guest_info * guest_info, addr_t guest_va, int count, uint8_t * src);
#endif // ! __V3VEE__
-void Init_V3(struct v3_os_hooks * hooks, int num_cpus);
+void Init_V3(struct v3_os_hooks * hooks, char * cpus, int num_cpus);
void Shutdown_V3( void );
};
+struct v3_msrs {
+ v3_reg_t star;
+ v3_reg_t lstar;
+ v3_reg_t sfmask;
+ v3_reg_t kern_gs_base;
+};
+
struct v3_dbg_regs {
v3_reg_t dr0;
help
Provides the inspection extension
-config EXT_SW_INTERRUPTS
- bool "Enable interception and hooking of software interrupts"
- default n
- help
- This feature will cause the VMM to intercept the execution
- of software interrupts (i.e. the INTn instruction) and enable
- any INT vector to be hooked. Extension name is "swintr_intercept"
-
-config DEBUG_EXT_SW_INTERRUPTS
- bool "Enable debugging of software interrupt interception code"
- depends on EXT_SW_INTERRUPTS
- default n
- help
- This will enable useful debugging printouts for software
- intercept code
-
-config EXT_SWINTR_PASSTHROUGH
- bool "Hook all unhandled sofware interrupts for passthrough"
- depends on EXT_SW_INTERRUPTS
- default n
- help
- If enabled, this will cause all software interrupts
- (INT instruction vectors) to be hooked for passthrough.
- May reduce performance but useful for debugging.
-
-config EXT_SYSCALL_HIJACK
- bool "Enable System Call Hijacking"
- depends on EXT_SW_INTERRUPTS
- default n
- help
- Enable the VMM to hijack system calls executed by the guest.
- If enabled, the VMM will hook execution of INT 80
-
-config DEBUG_EXT_SYSCALL_HIJACK
- bool "Enable Syscall Hijack Debug in Palacios"
- depends on EXT_SYSCALL_HIJACK
- default n
- help
- Enable Debugging printouts for syscall hijacking code
- in Palacios
-
-config EXT_SYSCALL_PASSTHROUGH
- bool "Hook all unhandled system calls for passthrough"
- depends on EXT_SYSCALL_HIJACK
- default n
- help
- If enabled, this option will cause all system calls
- that are not explicitly hooked to be hooked for
- passthrough. This is useful for debugging.
-
endmenu
obj-$(V3_CONFIG_EXT_VTIME) += ext_vtime.o
obj-$(V3_CONFIG_EXT_INSPECTOR) += ext_inspector.o
obj-$(V3_CONFIG_EXT_MACH_CHECK) += ext_mcheck.o
-obj-$(V3_CONFIG_EXT_SW_INTERRUPTS) += ext_sw_intr.o
-obj-$(V3_CONFIG_EXT_SYSCALL_HIJACK) += ext_syscall_hijack.o
+++ /dev/null
-/*
- * This file is part of the Palacios Virtual Machine Monitor developed
- * by the V3VEE Project with funding from the United States National
- * Science Foundation and the Department of Energy.
- *
- * The V3VEE Project is a joint project between Northwestern University
- * and the University of New Mexico. You can find out more at
- * http://www.v3vee.org
- *
- * Copyright (c) 2011, Kyle C. Hale <kh@u.norhtwestern.edu>
- * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
- * All rights reserved.
- *
- * Author: Kyle C. Hale <kh@u.northwestern.edu>
- *
- * This is free software. You are permitted to use,
- * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
- */
-
-
-#include <palacios/vmm.h>
-#include <palacios/vm_guest.h>
-#include <palacios/vmm_string.h>
-#include <palacios/vmm_syscall_hijack.h>
-#include <palacios/vmm_hashtable.h>
-#include <palacios/vmm_execve_hook.h>
-
-
-
-static int free_hook (struct guest_info * core, struct exec_hook * hook) {
- list_del(&(hook->hook_node));
- V3_Free(hook);
- return 0;
-}
-
-static uint_t exec_hash_fn (addr_t key) {
- return v3_hash_long(key, sizeof(void *) * 8);
-}
-
-
-static int exec_eq_fn (addr_t key1, addr_t key2) {
- return (key1 == key2);
-}
-
-
-int v3_init_exec_hooks (struct guest_info * core) {
- struct v3_exec_hooks * hooks = &(core->exec_hooks);
-
- INIT_LIST_HEAD(&(hooks->hook_list));
-
- hooks->bin_table = v3_create_htable(0, exec_hash_fn, exec_eq_fn);
- return 0;
-}
-
-
-int v3_deinit_exec_hooks (struct guest_info * core) {
- struct v3_exec_hooks * hooks = &(core->exec_hooks);
- struct exec_hook * hook = NULL;
- struct exec_hook * tmp = NULL;
-
- list_for_each_entry_safe(hook, tmp, &(hooks->hook_list), hook_node) {
- free_hook(core, hook);
- }
-
- v3_free_htable(hooks->bin_table, 0, 0);
-
- return 0;
-}
-
-
-int v3_hook_executable (struct guest_info * core,
- const uchar_t * binfile,
- int (*handler)(struct guest_info * core, void * priv_data),
- void * priv_data)
-{
- struct exec_hook * hook = V3_Malloc(sizeof(struct exec_hook));
- struct v3_exec_hooks * hooks = &(core->exec_hooks);
- addr_t key;
-
- memset(hook, 0, sizeof(struct exec_hook));
-
- hook->handler = handler;
- hook->priv_data = priv_data;
-
- // we hash the name of the file to produce a key
- key = v3_hash_buffer((uchar_t*)binfile, strlen(binfile));
-
- v3_htable_insert(hooks->bin_table, key, (addr_t)hook);
- list_add(&(hook->hook_node), &(hooks->hook_list));
-
- return 0;
-}
-
-
+++ /dev/null
-/*
- * This file is part of the Palacios Virtual Machine Monitor developed
- * by the V3VEE Project with funding from the United States National
- * Science Foundation and the Department of Energy.
- *
- * The V3VEE Project is a joint project between Northwestern University
- * and the University of New Mexico. You can find out more at
- * http://www.v3vee.org
- *
- * Copyright (c) 2011, Kyle C. Hale <kh@u.norhtwestern.edu>
- * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
- * All rights reserved.
- *
- * Author: Kyle C. Hale <kh@u.northwestern.edu>
- *
- * This is free software. You are permitted to use,
- * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
- */
-
-
-#include <palacios/vmm.h>
-#include <palacios/vm_guest.h>
-#include <palacios/vmm_intr.h>
-#include <palacios/vmm_syscall_hijack.h>
-#include <palacios/vmm_mpi_accel.h>
-#include <palacios/vmm_process_environment.h>
-#include <palacios/vmm_execve_hook.h>
-
-
-int v3_init_mpi_accel (struct guest_info * core) {
- //binfile = "./envtest";
- //args[1] = "LD_PRELOAD=./libcwrap.so";
-
- v3_hook_swintr(core, 0x80, v3_syscall_handler, NULL);
- v3_hook_syscall(core, 11, v3_sysexecve_handler, NULL);
- v3_hook_executable(core, "./envtest", v3_mpi_preload_handler, NULL);
-
- return 0;
-}
-
-
-int v3_deinit_mpi_accel (struct guest_info * core) {
-
- return 0;
-}
-
-
-int v3_mpi_preload_handler (struct guest_info * core, void * priv_data) {
-
- char * a[3];
- a[0] = "TEST=HITHERE";
- a[1] = "TEST2=/blah/blah/blah";
- a[2] = "LD_PRELOAD=./libcwrap.so";
-
- int ret = v3_inject_strings(core, (const char**)NULL, (const char**)a, 0, 3);
- if (ret == -1) {
- PrintDebug("Error injecting strings in execve handler\n");
- return -1;
- }
-
- return 0;
-}
-
-
+++ /dev/null
-/*
- * This file is part of the Palacios Virtual Machine Monitor developed
- * by the V3VEE Project with funding from the United States National
- * Science Foundation and the Department of Energy.
- *
- * The V3VEE Project is a joint project between Northwestern University
- * and the University of New Mexico. You can find out more at
- * http://www.v3vee.org
- *
- * Copyright (c) 2011, Kyle C. Hale <kh@u.norhtwestern.edu>
- * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
- * All rights reserved.
- *
- * Author: Kyle C. Hale <kh@u.northwestern.edu>
- *
- * This is free software. You are permitted to use,
- * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
- */
-
-#include <palacios/vmm.h>
-#include <palacios/vm_guest_mem.h>
-#include <palacios/vm_guest.h>
-#include <palacios/vmm_intr.h>
-#include <palacios/vmm_decoder.h>
-#include <palacios/vmm_string.h>
-#include <palacios/vmm_shadow_paging.h>
-#include <palacios/vmm_extensions.h>
-#include <palacios/vmm_intr.h>
-
-#include <interfaces/syscall_hijack.h>
-#include <interfaces/sw_intr.h>
-
-#include "syscall_ref.h"
-
-#ifndef V3_CONFIG_DEBUG_EXT_SYSCALL_HIJACK
-#undef PrintDebug
-#define PrintDebug(fmt, args...)
-#endif
-
-#define MAX_CHARS 256
-#ifndef max
- #define max(a, b) ( ((a) > (b)) ? (a) : (b) )
-#endif
-
-#define SYSCALL_INT_VECTOR 0x80
-
-
-struct v3_syscall_hook {
- int (*handler)(struct guest_info * core, uint_t syscall_nr, void * priv_data);
- void * priv_data;
-};
-
-static struct v3_syscall_hook * syscall_hooks[512];
-
-
-static int v3_syscall_handler (struct guest_info * core, uint8_t vector, void * priv_data) {
-
- uint_t syscall_nr = (uint_t) core->vm_regs.rax;
- int err = 0;
-
- struct v3_syscall_hook * hook = syscall_hooks[syscall_nr];
- if (hook == NULL) {
-#ifdef V3_CONFIG_EXT_SYSCALL_PASSTHROUGH
- if (v3_hook_passthrough_syscall(core, syscall_nr) == -1) {
- PrintDebug("Error hooking passthrough syscall\n");
- return -1;
- }
- hook = syscall_hooks[syscall_nr];
-#else
- return v3_raise_swintr(core, vector);
-#endif
- }
-
- err = hook->handler(core, syscall_nr, hook->priv_data);
- if (err == -1) {
- PrintDebug("V3 Syscall Handler: Error in syscall hook\n");
- return -1;
- }
-
- return 0;
-}
-
-
-static int init_syscall_hijack (struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data) {
-
- return 0;
-}
-
-
-static int init_syscall_hijack_core (struct guest_info * core, void * priv_data) {
-
- v3_hook_swintr(core, SYSCALL_INT_VECTOR, v3_syscall_handler, NULL);
- return 0;
-}
-
-
-static void print_arg (struct guest_info * core, v3_reg_t reg, uint8_t argnum) {
-
- addr_t hva;
- int ret = 0;
-
- PrintDebug("\t ARG%d: INT - %ld\n", argnum, (long) reg);
-
- if (core->mem_mode == PHYSICAL_MEM) {
- ret = v3_gpa_to_hva(core, get_addr_linear(core, reg, &(core->segments.ds)), &hva);
- }
- else {
- ret = v3_gva_to_hva(core, get_addr_linear(core, reg, &(core->segments.ds)), &hva);
- }
-
- PrintDebug("\t STR - ");
- if (ret == -1) {
- PrintDebug("\n");
- return;
- }
-
- uint32_t c = max(MAX_CHARS, 4096 - (hva % 4096));
- int i = 0;
- for (; i < c && *((char*)(hva + i)) != 0; i++) {
- PrintDebug("%c", *((char*)(hva + i)));
- }
- PrintDebug("\n");
-}
-
-
-static void print_syscall (uint8_t is64, struct guest_info * core) {
-
- if (is64) {
- PrintDebug("Syscall #%ld: \"%s\"\n", (long)core->vm_regs.rax, get_linux_syscall_name64(core->vm_regs.rax));
- } else {
- PrintDebug("Syscall #%ld: \"%s\"\n", (long)core->vm_regs.rax, get_linux_syscall_name32(core->vm_regs.rax));
- }
-
- print_arg(core, core->vm_regs.rbx, 1);
- print_arg(core, core->vm_regs.rcx, 2);
- print_arg(core, core->vm_regs.rdx, 3);
-}
-
-
-
-
-static struct v3_extension_impl syscall_impl = {
- .name = "syscall_intercept",
- .init = init_syscall_hijack,
- .deinit = NULL,
- .core_init = init_syscall_hijack_core,
- .core_deinit = NULL,
- .on_entry = NULL,
- .on_exit = NULL
-};
-
-register_extension(&syscall_impl);
-
-
-
-
-static inline struct v3_syscall_hook * get_syscall_hook (struct guest_info * core, uint_t syscall_nr) {
- return syscall_hooks[syscall_nr];
-}
-
-
-int v3_hook_syscall (struct guest_info * core,
- uint_t syscall_nr,
- int (*handler)(struct guest_info * core, uint_t syscall_nr, void * priv_data),
- void * priv_data)
-{
- struct v3_syscall_hook * hook = (struct v3_syscall_hook *)V3_Malloc(sizeof(struct v3_syscall_hook));
-
-
- if (hook == NULL) {
- return -1;
- }
-
- if (get_syscall_hook(core, syscall_nr) != NULL) {
- PrintError("System Call #%d already hooked\n", syscall_nr);
- return -1;
- }
-
- hook->handler = handler;
- hook->priv_data = priv_data;
-
- syscall_hooks[syscall_nr] = hook;
-
- return 0;
-}
-
-
-static int passthrough_syscall_handler (struct guest_info * core, uint_t syscall_nr, void * priv_data) {
- print_syscall(0, core);
- return 0;
-}
-
-
-int v3_hook_passthrough_syscall (struct guest_info * core, uint_t syscall_nr) {
-
- int rc = v3_hook_syscall(core, syscall_nr, passthrough_syscall_handler, NULL);
-
- if (rc) {
- PrintError("failed to hook syscall 0x%x for passthrough (guest=0x%p)\n", syscall_nr, (void *)core);
- return -1;
- } else {
- PrintDebug("hooked syscall 0x%x for passthrough (guest=0x%p)\n", syscall_nr, (void *)core);
- return 0;
- }
-
- /* shouldn't get here */
- return 0;
-}
-
-/*
-int v3_sysexecve_handler (struct guest_info * core, uint_t syscall_nr, void * priv_data) {
- addr_t hva, key;
- struct exec_hook * hook;
- int ret;
-
- ret = v3_gva_to_hva(core, get_addr_linear(core, (addr_t)core->vm_regs.rbx, &(core->segments.ds)), &hva);
- if (ret == -1) {
- PrintDebug("Error translating file path in sysexecve handler\n");
- return -1;
- }
-
- key = v3_hash_buffer((uchar_t*)hva, strlen((uchar_t*)hva));
- if ((hook = (struct exec_hook*)v3_htable_search(core->exec_hooks.bin_table, key)) != NULL) {
- if (hook->handler(core, NULL) == -1) {
- PrintDebug("Error handling execve hook\n");
- return -1;
- }
- }
-
- return 0;
-}
-
-*/
--- /dev/null
+menu GEARS
+
+config GEARS
+ bool "Enable GEARS in Palacios"
+ default n
+ help
+ This is the Guest Examination and Revision Services, an extension used
+ to modify guest code and implement guest-context VMM services in Palacios
+
+config EXT_SW_INTERRUPTS
+ bool "Enable interception and hooking of software interrupts"
+ depends on GEARS
+ default n
+ help
+ This feature will cause the VMM to intercept the execution
+ of software interrupts (i.e. the INTn instruction) and enable
+ any INT vector to be hooked. Extension name is "swintr_intercept"
+
+config DEBUG_EXT_SW_INTERRUPTS
+ bool "Enable debugging of software interrupt interception code"
+ depends on GEARS
+ depends on EXT_SW_INTERRUPTS
+ default n
+ help
+ This will enable useful debugging printouts for software
+ intercept code
+
+config EXT_SWINTR_PASSTHROUGH
+ bool "Hook all unhandled sofware interrupts for passthrough"
+ depends on GEARS
+ depends on EXT_SW_INTERRUPTS
+ default n
+ help
+ If enabled, this will cause all software interrupts
+ (INT instruction vectors) to be hooked for passthrough.
+ May reduce performance but useful for debugging.
+
+config EXT_SYSCALL_HIJACK
+ bool "Enable System Call Hijacking"
+ default n
+ depends on GEARS
+ depends on EXT_SW_INTERRUPTS
+ help
+ Enable the VMM to hijack system calls executed by the guest.
+ If enabled, the VMM will hook execution of INT 80
+
+
+config EXT_SELECTIVE_SYSCALL_EXIT
+ bool "Enable dynamic selective exiting of system calls"
+ default n
+ depends on GEARS
+ depends on EXT_SYSCALL_HIJACK
+ help
+ If enabled, this option will allow the VMM to selectively
+ enable and disable exiting on specific system calls.
+
+config DEBUG_EXT_SYSCALL_HIJACK
+ bool "Enable Syscall Hijack Debug in Palacios"
+ depends on GEARS
+ depends on EXT_SYSCALL_HIJACK
+ default n
+ help
+ Enable Debugging printouts for syscall hijacking code
+ in Palacios
+
+
+config EXT_SYSCALL_PASSTHROUGH
+ bool "Hook all unhandled system calls for passthrough"
+ depends on GEARS
+ depends on EXT_SYSCALL_HIJACK
+ default n
+ help
+ If enabled, this option will cause all system calls
+ that are not explicitly hooked to be hooked for
+ passthrough. This is useful for debugging.
+
+config EXT_EXECVE_HOOK
+ bool "Hook calls to execve"
+ depends on GEARS
+ depends on EXT_SYSCALL_HIJACK
+ default n
+ help
+ This option will cause the interception of all calls
+ to the execve system call
+
+config EXT_ENV_INJECT
+ bool "Enable environment variable injection"
+ depends on GEARS
+ depends on EXT_EXECVE_HOOK
+ default n
+ help
+ Enables development extension for overriding env vars
+ for certain executables
+
+config EXT_CODE_INJECT
+ bool "Allow code injection"
+ depends on GEARS
+ depends on EXT_SYSCALL_HIJACK
+ default n
+ help
+ This option will allow code to be injected and run in the
+ guest context
+
+endmenu
--- /dev/null
+obj-$(V3_CONFIG_EXT_SW_INTERRUPTS) += ext_sw_intr.o
+obj-$(V3_CONFIG_EXT_SYSCALL_HIJACK) += ext_syscall_hijack.o
+obj-$(V3_CONFIG_EXT_EXECVE_HOOK) += ext_execve_hook.o
+obj-$(V3_CONFIG_EXT_EXECVE_HOOK) += ext_process_environment.o
+obj-$(V3_CONFIG_EXT_CODE_INJECT) += ext_code_inject.o
+obj-$(V3_CONFIG_EXT_ENV_INJECT) += ext_env_inject.o
+
+obj-y += null.o
+
+
--- /dev/null
+/* This file defines standard ELF types, structures, and macros.
+ Copyright (C) 1995-2003,2004,2005,2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _ELF_H_
+#define _ELF_H_
+
+
+#include <palacios/vmm_types.h>
+
+#define __ELF_SIZE 32
+
+#define ElfW(type) _ElfW (Elf, __ELF_SIZE, type)
+#define _ElfW(e,w,t) _ElfW_1 (e, w, _##t)
+#define _ElfW_1(e,w,t) e##w##t
+
+/* Standard ELF types. */
+
+/* Type for a 16-bit quantity. */
+typedef uint16_t Elf32_Half;
+typedef uint16_t Elf64_Half;
+
+/* Types for signed and unsigned 32-bit quantities. */
+typedef uint32_t Elf32_Word;
+typedef sint32_t Elf32_Sword;
+typedef uint32_t Elf64_Word;
+typedef sint32_t Elf64_Sword;
+
+/* Types for signed and unsigned 64-bit quantities. */
+typedef uint64_t Elf32_Xword;
+typedef sint64_t Elf32_Sxword;
+typedef uint64_t Elf64_Xword;
+typedef sint64_t Elf64_Sxword;
+
+/* Type of addresses. */
+typedef uint32_t Elf32_Addr;
+typedef uint64_t Elf64_Addr;
+
+/* Type of file offsets. */
+typedef uint32_t Elf32_Off;
+typedef uint64_t Elf64_Off;
+
+/* Type for section indices, which are 16-bit quantities. */
+typedef uint16_t Elf32_Section;
+typedef uint16_t Elf64_Section;
+
+/* Type for version symbol information. */
+typedef Elf32_Half Elf32_Versym;
+typedef Elf64_Half Elf64_Versym;
+
+
+
+/* The ELF file header. This appears at the start of every ELF file. */
+
+#define EI_NIDENT (16)
+
+typedef struct
+{
+ unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
+ Elf32_Half e_type; /* Object file type */
+ Elf32_Half e_machine; /* Architecture */
+ Elf32_Word e_version; /* Object file version */
+ Elf32_Addr e_entry; /* Entry point virtual address */
+ Elf32_Off e_phoff; /* Program header table file offset */
+ Elf32_Off e_shoff; /* Section header table file offset */
+ Elf32_Word e_flags; /* Processor-specific flags */
+ Elf32_Half e_ehsize; /* ELF header size in bytes */
+ Elf32_Half e_phentsize; /* Program header table entry size */
+ Elf32_Half e_phnum; /* Program header table entry count */
+ Elf32_Half e_shentsize; /* Section header table entry size */
+ Elf32_Half e_shnum; /* Section header table entry count */
+ Elf32_Half e_shstrndx; /* Section header string table index */
+} Elf32_Ehdr;
+
+typedef struct
+{
+ unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
+ Elf64_Half e_type; /* Object file type */
+ Elf64_Half e_machine; /* Architecture */
+ Elf64_Word e_version; /* Object file version */
+ Elf64_Addr e_entry; /* Entry point virtual address */
+ Elf64_Off e_phoff; /* Program header table file offset */
+ Elf64_Off e_shoff; /* Section header table file offset */
+ Elf64_Word e_flags; /* Processor-specific flags */
+ Elf64_Half e_ehsize; /* ELF header size in bytes */
+ Elf64_Half e_phentsize; /* Program header table entry size */
+ Elf64_Half e_phnum; /* Program header table entry count */
+ Elf64_Half e_shentsize; /* Section header table entry size */
+ Elf64_Half e_shnum; /* Section header table entry count */
+ Elf64_Half e_shstrndx; /* Section header string table index */
+} Elf64_Ehdr;
+
+/* Fields in the e_ident array. The EI_* macros are indices into the
+ array. The macros under each EI_* macro are the values the byte
+ may have. */
+
+#define EI_MAG0 0 /* File identification byte 0 index */
+#define ELFMAG0 0x7f /* Magic number byte 0 */
+
+#define EI_MAG1 1 /* File identification byte 1 index */
+#define ELFMAG1 'E' /* Magic number byte 1 */
+
+#define EI_MAG2 2 /* File identification byte 2 index */
+#define ELFMAG2 'L' /* Magic number byte 2 */
+
+#define EI_MAG3 3 /* File identification byte 3 index */
+#define ELFMAG3 'F' /* Magic number byte 3 */
+
+/* Conglomeration of the identification bytes, for easy testing as a word. */
+#define ELFMAG "\177ELF"
+#define SELFMAG 4
+
+#define EI_CLASS 4 /* File class byte index */
+#define ELFCLASSNONE 0 /* Invalid class */
+#define ELFCLASS32 1 /* 32-bit objects */
+#define ELFCLASS64 2 /* 64-bit objects */
+#define ELFCLASSNUM 3
+
+#define EI_DATA 5 /* Data encoding byte index */
+#define ELFDATANONE 0 /* Invalid data encoding */
+#define ELFDATA2LSB 1 /* 2's complement, little endian */
+#define ELFDATA2MSB 2 /* 2's complement, big endian */
+#define ELFDATANUM 3
+
+#define EI_VERSION 6 /* File version byte index */
+ /* Value must be EV_CURRENT */
+
+#define EI_OSABI 7 /* OS ABI identification */
+#define ELFOSABI_NONE 0 /* UNIX System V ABI */
+#define ELFOSABI_SYSV 0 /* Alias. */
+#define ELFOSABI_HPUX 1 /* HP-UX */
+#define ELFOSABI_NETBSD 2 /* NetBSD. */
+#define ELFOSABI_LINUX 3 /* Linux. */
+#define ELFOSABI_SOLARIS 6 /* Sun Solaris. */
+#define ELFOSABI_AIX 7 /* IBM AIX. */
+#define ELFOSABI_IRIX 8 /* SGI Irix. */
+#define ELFOSABI_FREEBSD 9 /* FreeBSD. */
+#define ELFOSABI_TRU64 10 /* Compaq TRU64 UNIX. */
+#define ELFOSABI_MODESTO 11 /* Novell Modesto. */
+#define ELFOSABI_OPENBSD 12 /* OpenBSD. */
+#define ELFOSABI_ARM 97 /* ARM */
+#define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */
+
+#define EI_ABIVERSION 8 /* ABI version */
+
+#define EI_PAD 9 /* Byte index of padding bytes */
+
+/* Legal values for e_type (object file type). */
+
+#define ET_NONE 0 /* No file type */
+#define ET_REL 1 /* Relocatable file */
+#define ET_EXEC 2 /* Executable file */
+#define ET_DYN 3 /* Shared object file */
+#define ET_CORE 4 /* Core file */
+#define ET_NUM 5 /* Number of defined types */
+#define ET_LOOS 0xfe00 /* OS-specific range start */
+#define ET_HIOS 0xfeff /* OS-specific range end */
+#define ET_LOPROC 0xff00 /* Processor-specific range start */
+#define ET_HIPROC 0xffff /* Processor-specific range end */
+
+/* Legal values for e_machine (architecture). */
+
+#define EM_NONE 0 /* No machine */
+#define EM_M32 1 /* AT&T WE 32100 */
+#define EM_SPARC 2 /* SUN SPARC */
+#define EM_386 3 /* Intel 80386 */
+#define EM_68K 4 /* Motorola m68k family */
+#define EM_88K 5 /* Motorola m88k family */
+#define EM_860 7 /* Intel 80860 */
+#define EM_MIPS 8 /* MIPS R3000 big-endian */
+#define EM_S370 9 /* IBM System/370 */
+#define EM_MIPS_RS3_LE 10 /* MIPS R3000 little-endian */
+
+#define EM_PARISC 15 /* HPPA */
+#define EM_VPP500 17 /* Fujitsu VPP500 */
+#define EM_SPARC32PLUS 18 /* Sun's "v8plus" */
+#define EM_960 19 /* Intel 80960 */
+#define EM_PPC 20 /* PowerPC */
+#define EM_PPC64 21 /* PowerPC 64-bit */
+#define EM_S390 22 /* IBM S390 */
+
+#define EM_V800 36 /* NEC V800 series */
+#define EM_FR20 37 /* Fujitsu FR20 */
+#define EM_RH32 38 /* TRW RH-32 */
+#define EM_RCE 39 /* Motorola RCE */
+#define EM_ARM 40 /* ARM */
+#define EM_FAKE_ALPHA 41 /* Digital Alpha */
+#define EM_SH 42 /* Hitachi SH */
+#define EM_SPARCV9 43 /* SPARC v9 64-bit */
+#define EM_TRICORE 44 /* Siemens Tricore */
+#define EM_ARC 45 /* Argonaut RISC Core */
+#define EM_H8_300 46 /* Hitachi H8/300 */
+#define EM_H8_300H 47 /* Hitachi H8/300H */
+#define EM_H8S 48 /* Hitachi H8S */
+#define EM_H8_500 49 /* Hitachi H8/500 */
+#define EM_IA_64 50 /* Intel Merced */
+#define EM_MIPS_X 51 /* Stanford MIPS-X */
+#define EM_COLDFIRE 52 /* Motorola Coldfire */
+#define EM_68HC12 53 /* Motorola M68HC12 */
+#define EM_MMA 54 /* Fujitsu MMA Multimedia Accelerator*/
+#define EM_PCP 55 /* Siemens PCP */
+#define EM_NCPU 56 /* Sony nCPU embeeded RISC */
+#define EM_NDR1 57 /* Denso NDR1 microprocessor */
+#define EM_STARCORE 58 /* Motorola Start*Core processor */
+#define EM_ME16 59 /* Toyota ME16 processor */
+#define EM_ST100 60 /* STMicroelectronic ST100 processor */
+#define EM_TINYJ 61 /* Advanced Logic Corp. Tinyj emb.fam*/
+#define EM_X86_64 62 /* AMD x86-64 architecture */
+#define EM_PDSP 63 /* Sony DSP Processor */
+
+#define EM_FX66 66 /* Siemens FX66 microcontroller */
+#define EM_ST9PLUS 67 /* STMicroelectronics ST9+ 8/16 mc */
+#define EM_ST7 68 /* STmicroelectronics ST7 8 bit mc */
+#define EM_68HC16 69 /* Motorola MC68HC16 microcontroller */
+#define EM_68HC11 70 /* Motorola MC68HC11 microcontroller */
+#define EM_68HC08 71 /* Motorola MC68HC08 microcontroller */
+#define EM_68HC05 72 /* Motorola MC68HC05 microcontroller */
+#define EM_SVX 73 /* Silicon Graphics SVx */
+#define EM_ST19 74 /* STMicroelectronics ST19 8 bit mc */
+#define EM_VAX 75 /* Digital VAX */
+#define EM_CRIS 76 /* Axis Communications 32-bit embedded processor */
+#define EM_JAVELIN 77 /* Infineon Technologies 32-bit embedded processor */
+#define EM_FIREPATH 78 /* Element 14 64-bit DSP Processor */
+#define EM_ZSP 79 /* LSI Logic 16-bit DSP Processor */
+#define EM_MMIX 80 /* Donald Knuth's educational 64-bit processor */
+#define EM_HUANY 81 /* Harvard University machine-independent object files */
+#define EM_PRISM 82 /* SiTera Prism */
+#define EM_AVR 83 /* Atmel AVR 8-bit microcontroller */
+#define EM_FR30 84 /* Fujitsu FR30 */
+#define EM_D10V 85 /* Mitsubishi D10V */
+#define EM_D30V 86 /* Mitsubishi D30V */
+#define EM_V850 87 /* NEC v850 */
+#define EM_M32R 88 /* Mitsubishi M32R */
+#define EM_MN10300 89 /* Matsushita MN10300 */
+#define EM_MN10200 90 /* Matsushita MN10200 */
+#define EM_PJ 91 /* picoJava */
+#define EM_OPENRISC 92 /* OpenRISC 32-bit embedded processor */
+#define EM_ARC_A5 93 /* ARC Cores Tangent-A5 */
+#define EM_XTENSA 94 /* Tensilica Xtensa Architecture */
+#define EM_NUM 95
+
+/* If it is necessary to assign new unofficial EM_* values, please
+ pick large random numbers (0x8523, 0xa7f2, etc.) to minimize the
+ chances of collision with official or non-GNU unofficial values. */
+
+#define EM_ALPHA 0x9026
+
+/* Legal values for e_version (version). */
+
+#define EV_NONE 0 /* Invalid ELF version */
+#define EV_CURRENT 1 /* Current version */
+#define EV_NUM 2
+
+/* Section header. */
+
+typedef struct
+{
+ Elf32_Word sh_name; /* Section name (string tbl index) */
+ Elf32_Word sh_type; /* Section type */
+ Elf32_Word sh_flags; /* Section flags */
+ Elf32_Addr sh_addr; /* Section virtual addr at execution */
+ Elf32_Off sh_offset; /* Section file offset */
+ Elf32_Word sh_size; /* Section size in bytes */
+ Elf32_Word sh_link; /* Link to another section */
+ Elf32_Word sh_info; /* Additional section information */
+ Elf32_Word sh_addralign; /* Section alignment */
+ Elf32_Word sh_entsize; /* Entry size if section holds table */
+} Elf32_Shdr;
+
+typedef struct
+{
+ Elf64_Word sh_name; /* Section name (string tbl index) */
+ Elf64_Word sh_type; /* Section type */
+ Elf64_Xword sh_flags; /* Section flags */
+ Elf64_Addr sh_addr; /* Section virtual addr at execution */
+ Elf64_Off sh_offset; /* Section file offset */
+ Elf64_Xword sh_size; /* Section size in bytes */
+ Elf64_Word sh_link; /* Link to another section */
+ Elf64_Word sh_info; /* Additional section information */
+ Elf64_Xword sh_addralign; /* Section alignment */
+ Elf64_Xword sh_entsize; /* Entry size if section holds table */
+} Elf64_Shdr;
+
+/* Special section indices. */
+
+#define SHN_UNDEF 0 /* Undefined section */
+#define SHN_LORESERVE 0xff00 /* Start of reserved indices */
+#define SHN_LOPROC 0xff00 /* Start of processor-specific */
+#define SHN_BEFORE 0xff00 /* Order section before all others
+ (Solaris). */
+#define SHN_AFTER 0xff01 /* Order section after all others
+ (Solaris). */
+#define SHN_HIPROC 0xff1f /* End of processor-specific */
+#define SHN_LOOS 0xff20 /* Start of OS-specific */
+#define SHN_HIOS 0xff3f /* End of OS-specific */
+#define SHN_ABS 0xfff1 /* Associated symbol is absolute */
+#define SHN_COMMON 0xfff2 /* Associated symbol is common */
+#define SHN_XINDEX 0xffff /* Index is in extra table. */
+#define SHN_HIRESERVE 0xffff /* End of reserved indices */
+
+/* Legal values for sh_type (section type). */
+
+#define SHT_NULL 0 /* Section header table entry unused */
+#define SHT_PROGBITS 1 /* Program data */
+#define SHT_SYMTAB 2 /* Symbol table */
+#define SHT_STRTAB 3 /* String table */
+#define SHT_RELA 4 /* Relocation entries with addends */
+#define SHT_HASH 5 /* Symbol hash table */
+#define SHT_DYNAMIC 6 /* Dynamic linking information */
+#define SHT_NOTE 7 /* Notes */
+#define SHT_NOBITS 8 /* Program space with no data (bss) */
+#define SHT_REL 9 /* Relocation entries, no addends */
+#define SHT_SHLIB 10 /* Reserved */
+#define SHT_DYNSYM 11 /* Dynamic linker symbol table */
+#define SHT_INIT_ARRAY 14 /* Array of constructors */
+#define SHT_FINI_ARRAY 15 /* Array of destructors */
+#define SHT_PREINIT_ARRAY 16 /* Array of pre-constructors */
+#define SHT_GROUP 17 /* Section group */
+#define SHT_SYMTAB_SHNDX 18 /* Extended section indeces */
+#define SHT_NUM 19 /* Number of defined types. */
+#define SHT_LOOS 0x60000000 /* Start OS-specific. */
+#define SHT_GNU_HASH 0x6ffffff6 /* GNU-style hash table. */
+#define SHT_GNU_LIBLIST 0x6ffffff7 /* Prelink library list */
+#define SHT_CHECKSUM 0x6ffffff8 /* Checksum for DSO content. */
+#define SHT_LOSUNW 0x6ffffffa /* Sun-specific low bound. */
+#define SHT_SUNW_move 0x6ffffffa
+#define SHT_SUNW_COMDAT 0x6ffffffb
+#define SHT_SUNW_syminfo 0x6ffffffc
+#define SHT_GNU_verdef 0x6ffffffd /* Version definition section. */
+#define SHT_GNU_verneed 0x6ffffffe /* Version needs section. */
+#define SHT_GNU_versym 0x6fffffff /* Version symbol table. */
+#define SHT_HISUNW 0x6fffffff /* Sun-specific high bound. */
+#define SHT_HIOS 0x6fffffff /* End OS-specific type */
+#define SHT_LOPROC 0x70000000 /* Start of processor-specific */
+#define SHT_HIPROC 0x7fffffff /* End of processor-specific */
+#define SHT_LOUSER 0x80000000 /* Start of application-specific */
+#define SHT_HIUSER 0x8fffffff /* End of application-specific */
+
+/* Legal values for sh_flags (section flags). */
+
+#define SHF_WRITE (1 << 0) /* Writable */
+#define SHF_ALLOC (1 << 1) /* Occupies memory during execution */
+#define SHF_EXECINSTR (1 << 2) /* Executable */
+#define SHF_MERGE (1 << 4) /* Might be merged */
+#define SHF_STRINGS (1 << 5) /* Contains nul-terminated strings */
+#define SHF_INFO_LINK (1 << 6) /* `sh_info' contains SHT index */
+#define SHF_LINK_ORDER (1 << 7) /* Preserve order after combining */
+#define SHF_OS_NONCONFORMING (1 << 8) /* Non-standard OS specific handling
+ required */
+#define SHF_GROUP (1 << 9) /* Section is member of a group. */
+#define SHF_TLS (1 << 10) /* Section hold thread-local data. */
+#define SHF_MASKOS 0x0ff00000 /* OS-specific. */
+#define SHF_MASKPROC 0xf0000000 /* Processor-specific */
+#define SHF_ORDERED (1 << 30) /* Special ordering requirement
+ (Solaris). */
+#define SHF_EXCLUDE (1 << 31) /* Section is excluded unless
+ referenced or allocated (Solaris).*/
+
+/* Section group handling. */
+#define GRP_COMDAT 0x1 /* Mark group as COMDAT. */
+
+/* Symbol table entry. */
+
+typedef struct
+{
+ Elf32_Word st_name; /* Symbol name (string tbl index) */
+ Elf32_Addr st_value; /* Symbol value */
+ Elf32_Word st_size; /* Symbol size */
+ unsigned char st_info; /* Symbol type and binding */
+ unsigned char st_other; /* Symbol visibility */
+ Elf32_Section st_shndx; /* Section index */
+} Elf32_Sym;
+
+typedef struct
+{
+ Elf64_Word st_name; /* Symbol name (string tbl index) */
+ unsigned char st_info; /* Symbol type and binding */
+ unsigned char st_other; /* Symbol visibility */
+ Elf64_Section st_shndx; /* Section index */
+ Elf64_Addr st_value; /* Symbol value */
+ Elf64_Xword st_size; /* Symbol size */
+} Elf64_Sym;
+
+/* The syminfo section if available contains additional information about
+ every dynamic symbol. */
+
+typedef struct
+{
+ Elf32_Half si_boundto; /* Direct bindings, symbol bound to */
+ Elf32_Half si_flags; /* Per symbol flags */
+} Elf32_Syminfo;
+
+typedef struct
+{
+ Elf64_Half si_boundto; /* Direct bindings, symbol bound to */
+ Elf64_Half si_flags; /* Per symbol flags */
+} Elf64_Syminfo;
+
+/* Possible values for si_boundto. */
+#define SYMINFO_BT_SELF 0xffff /* Symbol bound to self */
+#define SYMINFO_BT_PARENT 0xfffe /* Symbol bound to parent */
+#define SYMINFO_BT_LOWRESERVE 0xff00 /* Beginning of reserved entries */
+
+/* Possible bitmasks for si_flags. */
+#define SYMINFO_FLG_DIRECT 0x0001 /* Direct bound symbol */
+#define SYMINFO_FLG_PASSTHRU 0x0002 /* Pass-thru symbol for translator */
+#define SYMINFO_FLG_COPY 0x0004 /* Symbol is a copy-reloc */
+#define SYMINFO_FLG_LAZYLOAD 0x0008 /* Symbol bound to object to be lazy
+ loaded */
+/* Syminfo version values. */
+#define SYMINFO_NONE 0
+#define SYMINFO_CURRENT 1
+#define SYMINFO_NUM 2
+
+
+/* How to extract and insert information held in the st_info field. */
+
+#define ELF32_ST_BIND(val) (((unsigned char) (val)) >> 4)
+#define ELF32_ST_TYPE(val) ((val) & 0xf)
+#define ELF32_ST_INFO(bind, type) (((bind) << 4) + ((type) & 0xf))
+
+/* Both Elf32_Sym and Elf64_Sym use the same one-byte st_info field. */
+#define ELF64_ST_BIND(val) ELF32_ST_BIND (val)
+#define ELF64_ST_TYPE(val) ELF32_ST_TYPE (val)
+#define ELF64_ST_INFO(bind, type) ELF32_ST_INFO ((bind), (type))
+
+/* Legal values for ST_BIND subfield of st_info (symbol binding). */
+
+#define STB_LOCAL 0 /* Local symbol */
+#define STB_GLOBAL 1 /* Global symbol */
+#define STB_WEAK 2 /* Weak symbol */
+#define STB_NUM 3 /* Number of defined types. */
+#define STB_LOOS 10 /* Start of OS-specific */
+#define STB_HIOS 12 /* End of OS-specific */
+#define STB_LOPROC 13 /* Start of processor-specific */
+#define STB_HIPROC 15 /* End of processor-specific */
+
+/* Legal values for ST_TYPE subfield of st_info (symbol type). */
+
+#define STT_NOTYPE 0 /* Symbol type is unspecified */
+#define STT_OBJECT 1 /* Symbol is a data object */
+#define STT_FUNC 2 /* Symbol is a code object */
+#define STT_SECTION 3 /* Symbol associated with a section */
+#define STT_FILE 4 /* Symbol's name is file name */
+#define STT_COMMON 5 /* Symbol is a common data object */
+#define STT_TLS 6 /* Symbol is thread-local data object*/
+#define STT_NUM 7 /* Number of defined types. */
+#define STT_LOOS 10 /* Start of OS-specific */
+#define STT_HIOS 12 /* End of OS-specific */
+#define STT_LOPROC 13 /* Start of processor-specific */
+#define STT_HIPROC 15 /* End of processor-specific */
+
+
+/* Symbol table indices are found in the hash buckets and chain table
+ of a symbol hash table section. This special index value indicates
+ the end of a chain, meaning no further symbols are found in that bucket. */
+
+#define STN_UNDEF 0 /* End of a chain. */
+
+
+/* How to extract and insert information held in the st_other field. */
+
+#define ELF32_ST_VISIBILITY(o) ((o) & 0x03)
+
+/* For ELF64 the definitions are the same. */
+#define ELF64_ST_VISIBILITY(o) ELF32_ST_VISIBILITY (o)
+
+/* Symbol visibility specification encoded in the st_other field. */
+#define STV_DEFAULT 0 /* Default symbol visibility rules */
+#define STV_INTERNAL 1 /* Processor specific hidden class */
+#define STV_HIDDEN 2 /* Sym unavailable in other modules */
+#define STV_PROTECTED 3 /* Not preemptible, not exported */
+
+
+/* Relocation table entry without addend (in section of type SHT_REL). */
+
+typedef struct
+{
+ Elf32_Addr r_offset; /* Address */
+ Elf32_Word r_info; /* Relocation type and symbol index */
+} Elf32_Rel;
+
+/* I have seen two different definitions of the Elf64_Rel and
+ Elf64_Rela structures, so we'll leave them out until Novell (or
+ whoever) gets their act together. */
+/* The following, at least, is used on Sparc v9, MIPS, and Alpha. */
+
+typedef struct
+{
+ Elf64_Addr r_offset; /* Address */
+ Elf64_Xword r_info; /* Relocation type and symbol index */
+} Elf64_Rel;
+
+/* Relocation table entry with addend (in section of type SHT_RELA). */
+
+typedef struct
+{
+ Elf32_Addr r_offset; /* Address */
+ Elf32_Word r_info; /* Relocation type and symbol index */
+ Elf32_Sword r_addend; /* Addend */
+} Elf32_Rela;
+
+typedef struct
+{
+ Elf64_Addr r_offset; /* Address */
+ Elf64_Xword r_info; /* Relocation type and symbol index */
+ Elf64_Sxword r_addend; /* Addend */
+} Elf64_Rela;
+
+/* How to extract and insert information held in the r_info field. */
+
+#define ELF32_R_SYM(val) ((val) >> 8)
+#define ELF32_R_TYPE(val) ((val) & 0xff)
+#define ELF32_R_INFO(sym, type) (((sym) << 8) + ((type) & 0xff))
+
+#define ELF64_R_SYM(i) ((i) >> 32)
+#define ELF64_R_TYPE(i) ((i) & 0xffffffff)
+#define ELF64_R_INFO(sym,type) ((((Elf64_Xword) (sym)) << 32) + (type))
+
+/* Program segment header. */
+
+typedef struct
+{
+ Elf32_Word p_type; /* Segment type */
+ Elf32_Off p_offset; /* Segment file offset */
+ Elf32_Addr p_vaddr; /* Segment virtual address */
+ Elf32_Addr p_paddr; /* Segment physical address */
+ Elf32_Word p_filesz; /* Segment size in file */
+ Elf32_Word p_memsz; /* Segment size in memory */
+ Elf32_Word p_flags; /* Segment flags */
+ Elf32_Word p_align; /* Segment alignment */
+} Elf32_Phdr;
+
+typedef struct
+{
+ Elf64_Word p_type; /* Segment type */
+ Elf64_Word p_flags; /* Segment flags */
+ Elf64_Off p_offset; /* Segment file offset */
+ Elf64_Addr p_vaddr; /* Segment virtual address */
+ Elf64_Addr p_paddr; /* Segment physical address */
+ Elf64_Xword p_filesz; /* Segment size in file */
+ Elf64_Xword p_memsz; /* Segment size in memory */
+ Elf64_Xword p_align; /* Segment alignment */
+} Elf64_Phdr;
+
+/* Legal values for p_type (segment type). */
+
+#define PT_NULL 0 /* Program header table entry unused */
+#define PT_LOAD 1 /* Loadable program segment */
+#define PT_DYNAMIC 2 /* Dynamic linking information */
+#define PT_INTERP 3 /* Program interpreter */
+#define PT_NOTE 4 /* Auxiliary information */
+#define PT_SHLIB 5 /* Reserved */
+#define PT_PHDR 6 /* Entry for header table itself */
+#define PT_TLS 7 /* Thread-local storage segment */
+#define PT_NUM 8 /* Number of defined types */
+#define PT_LOOS 0x60000000 /* Start of OS-specific */
+#define PT_GNU_EH_FRAME 0x6474e550 /* GCC .eh_frame_hdr segment */
+#define PT_GNU_STACK 0x6474e551 /* Indicates stack executability */
+#define PT_GNU_RELRO 0x6474e552 /* Read-only after relocation */
+#define PT_LOSUNW 0x6ffffffa
+#define PT_SUNWBSS 0x6ffffffa /* Sun Specific segment */
+#define PT_SUNWSTACK 0x6ffffffb /* Stack segment */
+#define PT_HISUNW 0x6fffffff
+#define PT_HIOS 0x6fffffff /* End of OS-specific */
+#define PT_LOPROC 0x70000000 /* Start of processor-specific */
+#define PT_HIPROC 0x7fffffff /* End of processor-specific */
+
+/* Legal values for p_flags (segment flags). */
+
+#define PF_X (1 << 0) /* Segment is executable */
+#define PF_W (1 << 1) /* Segment is writable */
+#define PF_R (1 << 2) /* Segment is readable */
+#define PF_MASKOS 0x0ff00000 /* OS-specific */
+#define PF_MASKPROC 0xf0000000 /* Processor-specific */
+
+/* Legal values for note segment descriptor types for core files. */
+
+#define NT_PRSTATUS 1 /* Contains copy of prstatus struct */
+#define NT_FPREGSET 2 /* Contains copy of fpregset struct */
+#define NT_PRPSINFO 3 /* Contains copy of prpsinfo struct */
+#define NT_PRXREG 4 /* Contains copy of prxregset struct */
+#define NT_TASKSTRUCT 4 /* Contains copy of task structure */
+#define NT_PLATFORM 5 /* String from sysinfo(SI_PLATFORM) */
+#define NT_AUXV 6 /* Contains copy of auxv array */
+#define NT_GWINDOWS 7 /* Contains copy of gwindows struct */
+#define NT_ASRS 8 /* Contains copy of asrset struct */
+#define NT_PSTATUS 10 /* Contains copy of pstatus struct */
+#define NT_PSINFO 13 /* Contains copy of psinfo struct */
+#define NT_PRCRED 14 /* Contains copy of prcred struct */
+#define NT_UTSNAME 15 /* Contains copy of utsname struct */
+#define NT_LWPSTATUS 16 /* Contains copy of lwpstatus struct */
+#define NT_LWPSINFO 17 /* Contains copy of lwpinfo struct */
+#define NT_PRFPXREG 20 /* Contains copy of fprxregset struct*/
+
+/* Legal values for the note segment descriptor types for object files. */
+
+#define NT_VERSION 1 /* Contains a version string. */
+
+
+/* Dynamic section entry. */
+
+typedef struct
+{
+ Elf32_Sword d_tag; /* Dynamic entry type */
+ union
+ {
+ Elf32_Word d_val; /* Integer value */
+ Elf32_Addr d_ptr; /* Address value */
+ } d_un;
+} Elf32_Dyn;
+
+typedef struct
+{
+ Elf64_Sxword d_tag; /* Dynamic entry type */
+ union
+ {
+ Elf64_Xword d_val; /* Integer value */
+ Elf64_Addr d_ptr; /* Address value */
+ } d_un;
+} Elf64_Dyn;
+
+/* Legal values for d_tag (dynamic entry type). */
+
+#define DT_NULL 0 /* Marks end of dynamic section */
+#define DT_NEEDED 1 /* Name of needed library */
+#define DT_PLTRELSZ 2 /* Size in bytes of PLT relocs */
+#define DT_PLTGOT 3 /* Processor defined value */
+#define DT_HASH 4 /* Address of symbol hash table */
+#define DT_STRTAB 5 /* Address of string table */
+#define DT_SYMTAB 6 /* Address of symbol table */
+#define DT_RELA 7 /* Address of Rela relocs */
+#define DT_RELASZ 8 /* Total size of Rela relocs */
+#define DT_RELAENT 9 /* Size of one Rela reloc */
+#define DT_STRSZ 10 /* Size of string table */
+#define DT_SYMENT 11 /* Size of one symbol table entry */
+#define DT_INIT 12 /* Address of init function */
+#define DT_FINI 13 /* Address of termination function */
+#define DT_SONAME 14 /* Name of shared object */
+#define DT_RPATH 15 /* Library search path (deprecated) */
+#define DT_SYMBOLIC 16 /* Start symbol search here */
+#define DT_REL 17 /* Address of Rel relocs */
+#define DT_RELSZ 18 /* Total size of Rel relocs */
+#define DT_RELENT 19 /* Size of one Rel reloc */
+#define DT_PLTREL 20 /* Type of reloc in PLT */
+#define DT_DEBUG 21 /* For debugging; unspecified */
+#define DT_TEXTREL 22 /* Reloc might modify .text */
+#define DT_JMPREL 23 /* Address of PLT relocs */
+#define DT_BIND_NOW 24 /* Process relocations of object */
+#define DT_INIT_ARRAY 25 /* Array with addresses of init fct */
+#define DT_FINI_ARRAY 26 /* Array with addresses of fini fct */
+#define DT_INIT_ARRAYSZ 27 /* Size in bytes of DT_INIT_ARRAY */
+#define DT_FINI_ARRAYSZ 28 /* Size in bytes of DT_FINI_ARRAY */
+#define DT_RUNPATH 29 /* Library search path */
+#define DT_FLAGS 30 /* Flags for the object being loaded */
+#define DT_ENCODING 32 /* Start of encoded range */
+#define DT_PREINIT_ARRAY 32 /* Array with addresses of preinit fct*/
+#define DT_PREINIT_ARRAYSZ 33 /* size in bytes of DT_PREINIT_ARRAY */
+#define DT_NUM 34 /* Number used */
+#define DT_LOOS 0x6000000d /* Start of OS-specific */
+#define DT_HIOS 0x6ffff000 /* End of OS-specific */
+#define DT_LOPROC 0x70000000 /* Start of processor-specific */
+#define DT_HIPROC 0x7fffffff /* End of processor-specific */
+#define DT_PROCNUM DT_MIPS_NUM /* Most used by any processor */
+
+/* DT_* entries which fall between DT_VALRNGHI & DT_VALRNGLO use the
+ Dyn.d_un.d_val field of the Elf*_Dyn structure. This follows Sun's
+ approach. */
+#define DT_VALRNGLO 0x6ffffd00
+#define DT_GNU_PRELINKED 0x6ffffdf5 /* Prelinking timestamp */
+#define DT_GNU_CONFLICTSZ 0x6ffffdf6 /* Size of conflict section */
+#define DT_GNU_LIBLISTSZ 0x6ffffdf7 /* Size of library list */
+#define DT_CHECKSUM 0x6ffffdf8
+#define DT_PLTPADSZ 0x6ffffdf9
+#define DT_MOVEENT 0x6ffffdfa
+#define DT_MOVESZ 0x6ffffdfb
+#define DT_FEATURE_1 0x6ffffdfc /* Feature selection (DTF_*). */
+#define DT_POSFLAG_1 0x6ffffdfd /* Flags for DT_* entries, effecting
+ the following DT_* entry. */
+#define DT_SYMINSZ 0x6ffffdfe /* Size of syminfo table (in bytes) */
+#define DT_SYMINENT 0x6ffffdff /* Entry size of syminfo */
+#define DT_VALRNGHI 0x6ffffdff
+#define DT_VALTAGIDX(tag) (DT_VALRNGHI - (tag)) /* Reverse order! */
+#define DT_VALNUM 12
+
+/* DT_* entries which fall between DT_ADDRRNGHI & DT_ADDRRNGLO use the
+ Dyn.d_un.d_ptr field of the Elf*_Dyn structure.
+
+ If any adjustment is made to the ELF object after it has been
+ built these entries will need to be adjusted. */
+#define DT_ADDRRNGLO 0x6ffffe00
+#define DT_GNU_HASH 0x6ffffef5 /* GNU-style hash table. */
+#define DT_TLSDESC_PLT 0x6ffffef6
+#define DT_TLSDESC_GOT 0x6ffffef7
+#define DT_GNU_CONFLICT 0x6ffffef8 /* Start of conflict section */
+#define DT_GNU_LIBLIST 0x6ffffef9 /* Library list */
+#define DT_CONFIG 0x6ffffefa /* Configuration information. */
+#define DT_DEPAUDIT 0x6ffffefb /* Dependency auditing. */
+#define DT_AUDIT 0x6ffffefc /* Object auditing. */
+#define DT_PLTPAD 0x6ffffefd /* PLT padding. */
+#define DT_MOVETAB 0x6ffffefe /* Move table. */
+#define DT_SYMINFO 0x6ffffeff /* Syminfo table. */
+#define DT_ADDRRNGHI 0x6ffffeff
+#define DT_ADDRTAGIDX(tag) (DT_ADDRRNGHI - (tag)) /* Reverse order! */
+#define DT_ADDRNUM 11
+
+/* The versioning entry types. The next are defined as part of the
+ GNU extension. */
+#define DT_VERSYM 0x6ffffff0
+
+#define DT_RELACOUNT 0x6ffffff9
+#define DT_RELCOUNT 0x6ffffffa
+
+/* These were chosen by Sun. */
+#define DT_FLAGS_1 0x6ffffffb /* State flags, see DF_1_* below. */
+#define DT_VERDEF 0x6ffffffc /* Address of version definition
+ table */
+#define DT_VERDEFNUM 0x6ffffffd /* Number of version definitions */
+#define DT_VERNEED 0x6ffffffe /* Address of table with needed
+ versions */
+#define DT_VERNEEDNUM 0x6fffffff /* Number of needed versions */
+#define DT_VERSIONTAGIDX(tag) (DT_VERNEEDNUM - (tag)) /* Reverse order! */
+#define DT_VERSIONTAGNUM 16
+
+/* Sun added these machine-independent extensions in the "processor-specific"
+ range. Be compatible. */
+#define DT_AUXILIARY 0x7ffffffd /* Shared object to load before self */
+#define DT_FILTER 0x7fffffff /* Shared object to get values from */
+#define DT_EXTRATAGIDX(tag) ((Elf32_Word)-((Elf32_Sword) (tag) <<1>>1)-1)
+#define DT_EXTRANUM 3
+
+/* Values of `d_un.d_val' in the DT_FLAGS entry. */
+#define DF_ORIGIN 0x00000001 /* Object may use DF_ORIGIN */
+#define DF_SYMBOLIC 0x00000002 /* Symbol resolutions starts here */
+#define DF_TEXTREL 0x00000004 /* Object contains text relocations */
+#define DF_BIND_NOW 0x00000008 /* No lazy binding for this object */
+#define DF_STATIC_TLS 0x00000010 /* Module uses the static TLS model */
+
+/* State flags selectable in the `d_un.d_val' element of the DT_FLAGS_1
+ entry in the dynamic section. */
+#define DF_1_NOW 0x00000001 /* Set RTLD_NOW for this object. */
+#define DF_1_GLOBAL 0x00000002 /* Set RTLD_GLOBAL for this object. */
+#define DF_1_GROUP 0x00000004 /* Set RTLD_GROUP for this object. */
+#define DF_1_NODELETE 0x00000008 /* Set RTLD_NODELETE for this object.*/
+#define DF_1_LOADFLTR 0x00000010 /* Trigger filtee loading at runtime.*/
+#define DF_1_INITFIRST 0x00000020 /* Set RTLD_INITFIRST for this object*/
+#define DF_1_NOOPEN 0x00000040 /* Set RTLD_NOOPEN for this object. */
+#define DF_1_ORIGIN 0x00000080 /* $ORIGIN must be handled. */
+#define DF_1_DIRECT 0x00000100 /* Direct binding enabled. */
+#define DF_1_TRANS 0x00000200
+#define DF_1_INTERPOSE 0x00000400 /* Object is used to interpose. */
+#define DF_1_NODEFLIB 0x00000800 /* Ignore default lib search path. */
+#define DF_1_NODUMP 0x00001000 /* Object can't be dldump'ed. */
+#define DF_1_CONFALT 0x00002000 /* Configuration alternative created.*/
+#define DF_1_ENDFILTEE 0x00004000 /* Filtee terminates filters search. */
+#define DF_1_DISPRELDNE 0x00008000 /* Disp reloc applied at build time. */
+#define DF_1_DISPRELPND 0x00010000 /* Disp reloc applied at run-time. */
+
+/* Flags for the feature selection in DT_FEATURE_1. */
+#define DTF_1_PARINIT 0x00000001
+#define DTF_1_CONFEXP 0x00000002
+
+/* Flags in the DT_POSFLAG_1 entry effecting only the next DT_* entry. */
+#define DF_P1_LAZYLOAD 0x00000001 /* Lazyload following object. */
+#define DF_P1_GROUPPERM 0x00000002 /* Symbols from next object are not
+ generally available. */
+
+/* Version definition sections. */
+
+typedef struct
+{
+ Elf32_Half vd_version; /* Version revision */
+ Elf32_Half vd_flags; /* Version information */
+ Elf32_Half vd_ndx; /* Version Index */
+ Elf32_Half vd_cnt; /* Number of associated aux entries */
+ Elf32_Word vd_hash; /* Version name hash value */
+ Elf32_Word vd_aux; /* Offset in bytes to verdaux array */
+ Elf32_Word vd_next; /* Offset in bytes to next verdef
+ entry */
+} Elf32_Verdef;
+
+typedef struct
+{
+ Elf64_Half vd_version; /* Version revision */
+ Elf64_Half vd_flags; /* Version information */
+ Elf64_Half vd_ndx; /* Version Index */
+ Elf64_Half vd_cnt; /* Number of associated aux entries */
+ Elf64_Word vd_hash; /* Version name hash value */
+ Elf64_Word vd_aux; /* Offset in bytes to verdaux array */
+ Elf64_Word vd_next; /* Offset in bytes to next verdef
+ entry */
+} Elf64_Verdef;
+
+
+/* Legal values for vd_version (version revision). */
+#define VER_DEF_NONE 0 /* No version */
+#define VER_DEF_CURRENT 1 /* Current version */
+#define VER_DEF_NUM 2 /* Given version number */
+
+/* Legal values for vd_flags (version information flags). */
+#define VER_FLG_BASE 0x1 /* Version definition of file itself */
+#define VER_FLG_WEAK 0x2 /* Weak version identifier */
+
+/* Versym symbol index values. */
+#define VER_NDX_LOCAL 0 /* Symbol is local. */
+#define VER_NDX_GLOBAL 1 /* Symbol is global. */
+#define VER_NDX_LORESERVE 0xff00 /* Beginning of reserved entries. */
+#define VER_NDX_ELIMINATE 0xff01 /* Symbol is to be eliminated. */
+
+/* Auxialiary version information. */
+
+typedef struct
+{
+ Elf32_Word vda_name; /* Version or dependency names */
+ Elf32_Word vda_next; /* Offset in bytes to next verdaux
+ entry */
+} Elf32_Verdaux;
+
+typedef struct
+{
+ Elf64_Word vda_name; /* Version or dependency names */
+ Elf64_Word vda_next; /* Offset in bytes to next verdaux
+ entry */
+} Elf64_Verdaux;
+
+
+/* Version dependency section. */
+
+typedef struct
+{
+ Elf32_Half vn_version; /* Version of structure */
+ Elf32_Half vn_cnt; /* Number of associated aux entries */
+ Elf32_Word vn_file; /* Offset of filename for this
+ dependency */
+ Elf32_Word vn_aux; /* Offset in bytes to vernaux array */
+ Elf32_Word vn_next; /* Offset in bytes to next verneed
+ entry */
+} Elf32_Verneed;
+
+typedef struct
+{
+ Elf64_Half vn_version; /* Version of structure */
+ Elf64_Half vn_cnt; /* Number of associated aux entries */
+ Elf64_Word vn_file; /* Offset of filename for this
+ dependency */
+ Elf64_Word vn_aux; /* Offset in bytes to vernaux array */
+ Elf64_Word vn_next; /* Offset in bytes to next verneed
+ entry */
+} Elf64_Verneed;
+
+
+/* Legal values for vn_version (version revision). */
+#define VER_NEED_NONE 0 /* No version */
+#define VER_NEED_CURRENT 1 /* Current version */
+#define VER_NEED_NUM 2 /* Given version number */
+
+/* Auxiliary needed version information. */
+
+typedef struct
+{
+ Elf32_Word vna_hash; /* Hash value of dependency name */
+ Elf32_Half vna_flags; /* Dependency specific information */
+ Elf32_Half vna_other; /* Unused */
+ Elf32_Word vna_name; /* Dependency name string offset */
+ Elf32_Word vna_next; /* Offset in bytes to next vernaux
+ entry */
+} Elf32_Vernaux;
+
+typedef struct
+{
+ Elf64_Word vna_hash; /* Hash value of dependency name */
+ Elf64_Half vna_flags; /* Dependency specific information */
+ Elf64_Half vna_other; /* Unused */
+ Elf64_Word vna_name; /* Dependency name string offset */
+ Elf64_Word vna_next; /* Offset in bytes to next vernaux
+ entry */
+} Elf64_Vernaux;
+
+
+/* Legal values for vna_flags. */
+#define VER_FLG_WEAK 0x2 /* Weak version identifier */
+
+
+/* Auxiliary vector. */
+
+/* This vector is normally only used by the program interpreter. The
+ usual definition in an ABI supplement uses the name auxv_t. The
+ vector is not usually defined in a standard <elf.h> file, but it
+ can't hurt. We rename it to avoid conflicts. The sizes of these
+ types are an arrangement between the exec server and the program
+ interpreter, so we don't fully specify them here. */
+
+typedef struct
+{
+ uint32_t a_type; /* Entry type */
+ union
+ {
+ uint32_t a_val; /* Integer value */
+ /* We use to have pointer elements added here. We cannot do that,
+ though, since it does not work when using 32-bit definitions
+ on 64-bit platforms and vice versa. */
+ } a_un;
+} Elf32_auxv_t;
+
+typedef struct
+{
+ uint64_t a_type; /* Entry type */
+ union
+ {
+ uint64_t a_val; /* Integer value */
+ /* We use to have pointer elements added here. We cannot do that,
+ though, since it does not work when using 32-bit definitions
+ on 64-bit platforms and vice versa. */
+ } a_un;
+} Elf64_auxv_t;
+
+/* Legal values for a_type (entry type). */
+
+#define AT_NULL 0 /* End of vector */
+#define AT_IGNORE 1 /* Entry should be ignored */
+#define AT_EXECFD 2 /* File descriptor of program */
+#define AT_PHDR 3 /* Program headers for program */
+#define AT_PHENT 4 /* Size of program header entry */
+#define AT_PHNUM 5 /* Number of program headers */
+#define AT_PAGESZ 6 /* System page size */
+#define AT_BASE 7 /* Base address of interpreter */
+#define AT_FLAGS 8 /* Flags */
+#define AT_ENTRY 9 /* Entry point of program */
+#define AT_NOTELF 10 /* Program is not ELF */
+#define AT_UID 11 /* Real uid */
+#define AT_EUID 12 /* Effective uid */
+#define AT_GID 13 /* Real gid */
+#define AT_EGID 14 /* Effective gid */
+#define AT_CLKTCK 17 /* Frequency of times() */
+
+/* Some more special a_type values describing the hardware. */
+#define AT_PLATFORM 15 /* String identifying platform. */
+#define AT_HWCAP 16 /* Machine dependent hints about
+ processor capabilities. */
+
+/* This entry gives some information about the FPU initialization
+ performed by the kernel. */
+#define AT_FPUCW 18 /* Used FPU control word. */
+
+/* Cache block sizes. */
+#define AT_DCACHEBSIZE 19 /* Data cache block size. */
+#define AT_ICACHEBSIZE 20 /* Instruction cache block size. */
+#define AT_UCACHEBSIZE 21 /* Unified cache block size. */
+
+/* A special ignored value for PPC, used by the kernel to control the
+ interpretation of the AUXV. Must be > 16. */
+#define AT_IGNOREPPC 22 /* Entry should be ignored. */
+
+#define AT_SECURE 23 /* Boolean, was exec setuid-like? */
+
+/* Pointer to the global system page used for system calls and other
+ nice things. */
+#define AT_SYSINFO 32
+#define AT_SYSINFO_EHDR 33
+
+/* Shapes of the caches. Bits 0-3 contains associativity; bits 4-7 contains
+ log2 of line size; mask those to get cache size. */
+#define AT_L1I_CACHESHAPE 34
+#define AT_L1D_CACHESHAPE 35
+#define AT_L2_CACHESHAPE 36
+#define AT_L3_CACHESHAPE 37
+
+/* Note section contents. Each entry in the note section begins with
+ a header of a fixed form. */
+
+typedef struct
+{
+ Elf32_Word n_namesz; /* Length of the note's name. */
+ Elf32_Word n_descsz; /* Length of the note's descriptor. */
+ Elf32_Word n_type; /* Type of the note. */
+} Elf32_Nhdr;
+
+typedef struct
+{
+ Elf64_Word n_namesz; /* Length of the note's name. */
+ Elf64_Word n_descsz; /* Length of the note's descriptor. */
+ Elf64_Word n_type; /* Type of the note. */
+} Elf64_Nhdr;
+
+/* Known names of notes. */
+
+/* Solaris entries in the note section have this name. */
+#define ELF_NOTE_SOLARIS "SUNW Solaris"
+
+/* Note entries for GNU systems have this name. */
+#define ELF_NOTE_GNU "GNU"
+
+
+/* Defined types of notes for Solaris. */
+
+/* Value of descriptor (one word) is desired pagesize for the binary. */
+#define ELF_NOTE_PAGESIZE_HINT 1
+
+
+/* Defined note types for GNU systems. */
+
+/* ABI information. The descriptor consists of words:
+ word 0: OS descriptor
+ word 1: major version of the ABI
+ word 2: minor version of the ABI
+ word 3: subminor version of the ABI
+*/
+#define ELF_NOTE_ABI 1
+
+/* Known OSes. These value can appear in word 0 of an ELF_NOTE_ABI
+ note section entry. */
+#define ELF_NOTE_OS_LINUX 0
+#define ELF_NOTE_OS_GNU 1
+#define ELF_NOTE_OS_SOLARIS2 2
+#define ELF_NOTE_OS_FREEBSD 3
+
+
+/* Move records. */
+typedef struct
+{
+ Elf32_Xword m_value; /* Symbol value. */
+ Elf32_Word m_info; /* Size and index. */
+ Elf32_Word m_poffset; /* Symbol offset. */
+ Elf32_Half m_repeat; /* Repeat count. */
+ Elf32_Half m_stride; /* Stride info. */
+} Elf32_Move;
+
+typedef struct
+{
+ Elf64_Xword m_value; /* Symbol value. */
+ Elf64_Xword m_info; /* Size and index. */
+ Elf64_Xword m_poffset; /* Symbol offset. */
+ Elf64_Half m_repeat; /* Repeat count. */
+ Elf64_Half m_stride; /* Stride info. */
+} Elf64_Move;
+
+/* Macro to construct move records. */
+#define ELF32_M_SYM(info) ((info) >> 8)
+#define ELF32_M_SIZE(info) ((unsigned char) (info))
+#define ELF32_M_INFO(sym, size) (((sym) << 8) + (unsigned char) (size))
+
+#define ELF64_M_SYM(info) ELF32_M_SYM (info)
+#define ELF64_M_SIZE(info) ELF32_M_SIZE (info)
+#define ELF64_M_INFO(sym, size) ELF32_M_INFO (sym, size)
+
+
+/* Intel 80386 specific definitions. */
+
+/* i386 relocs. */
+
+#define R_386_NONE 0 /* No reloc */
+#define R_386_32 1 /* Direct 32 bit */
+#define R_386_PC32 2 /* PC relative 32 bit */
+#define R_386_GOT32 3 /* 32 bit GOT entry */
+#define R_386_PLT32 4 /* 32 bit PLT address */
+#define R_386_COPY 5 /* Copy symbol at runtime */
+#define R_386_GLOB_DAT 6 /* Create GOT entry */
+#define R_386_JMP_SLOT 7 /* Create PLT entry */
+#define R_386_RELATIVE 8 /* Adjust by program base */
+#define R_386_GOTOFF 9 /* 32 bit offset to GOT */
+#define R_386_GOTPC 10 /* 32 bit PC relative offset to GOT */
+#define R_386_32PLT 11
+#define R_386_TLS_TPOFF 14 /* Offset in static TLS block */
+#define R_386_TLS_IE 15 /* Address of GOT entry for static TLS
+ block offset */
+#define R_386_TLS_GOTIE 16 /* GOT entry for static TLS block
+ offset */
+#define R_386_TLS_LE 17 /* Offset relative to static TLS
+ block */
+#define R_386_TLS_GD 18 /* Direct 32 bit for GNU version of
+ general dynamic thread local data */
+#define R_386_TLS_LDM 19 /* Direct 32 bit for GNU version of
+ local dynamic thread local data
+ in LE code */
+#define R_386_16 20
+#define R_386_PC16 21
+#define R_386_8 22
+#define R_386_PC8 23
+#define R_386_TLS_GD_32 24 /* Direct 32 bit for general dynamic
+ thread local data */
+#define R_386_TLS_GD_PUSH 25 /* Tag for pushl in GD TLS code */
+#define R_386_TLS_GD_CALL 26 /* Relocation for call to
+ __tls_get_addr() */
+#define R_386_TLS_GD_POP 27 /* Tag for popl in GD TLS code */
+#define R_386_TLS_LDM_32 28 /* Direct 32 bit for local dynamic
+ thread local data in LE code */
+#define R_386_TLS_LDM_PUSH 29 /* Tag for pushl in LDM TLS code */
+#define R_386_TLS_LDM_CALL 30 /* Relocation for call to
+ __tls_get_addr() in LDM code */
+#define R_386_TLS_LDM_POP 31 /* Tag for popl in LDM TLS code */
+#define R_386_TLS_LDO_32 32 /* Offset relative to TLS block */
+#define R_386_TLS_IE_32 33 /* GOT entry for negated static TLS
+ block offset */
+#define R_386_TLS_LE_32 34 /* Negated offset relative to static
+ TLS block */
+#define R_386_TLS_DTPMOD32 35 /* ID of module containing symbol */
+#define R_386_TLS_DTPOFF32 36 /* Offset in TLS block */
+#define R_386_TLS_TPOFF32 37 /* Negated offset in static TLS block */
+/* Keep this the last entry. */
+#define R_386_NUM 38
+
+
+/* Legal values for l_flags. */
+
+#define LL_NONE 0
+#define LL_EXACT_MATCH (1 << 0) /* Require exact match */
+#define LL_IGNORE_INT_VER (1 << 1) /* Ignore interface version */
+#define LL_REQUIRE_MINOR (1 << 2)
+#define LL_EXPORTS (1 << 3)
+#define LL_DELAY_LOAD (1 << 4)
+#define LL_DELTA (1 << 5)
+
+/* Entries found in sections of type SHT_MIPS_CONFLICT. */
+
+typedef Elf32_Addr Elf32_Conflict;
+
+
+/* AMD x86-64 relocations. */
+#define R_X86_64_NONE 0 /* No reloc */
+#define R_X86_64_64 1 /* Direct 64 bit */
+#define R_X86_64_PC32 2 /* PC relative 32 bit signed */
+#define R_X86_64_GOT32 3 /* 32 bit GOT entry */
+#define R_X86_64_PLT32 4 /* 32 bit PLT address */
+#define R_X86_64_COPY 5 /* Copy symbol at runtime */
+#define R_X86_64_GLOB_DAT 6 /* Create GOT entry */
+#define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */
+#define R_X86_64_RELATIVE 8 /* Adjust by program base */
+#define R_X86_64_GOTPCREL 9 /* 32 bit signed PC relative
+ offset to GOT */
+#define R_X86_64_32 10 /* Direct 32 bit zero extended */
+#define R_X86_64_32S 11 /* Direct 32 bit sign extended */
+#define R_X86_64_16 12 /* Direct 16 bit zero extended */
+#define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */
+#define R_X86_64_8 14 /* Direct 8 bit sign extended */
+#define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */
+#define R_X86_64_DTPMOD64 16 /* ID of module containing symbol */
+#define R_X86_64_DTPOFF64 17 /* Offset in module's TLS block */
+#define R_X86_64_TPOFF64 18 /* Offset in initial TLS block */
+#define R_X86_64_TLSGD 19 /* 32 bit signed PC relative offset
+ to two GOT entries for GD symbol */
+#define R_X86_64_TLSLD 20 /* 32 bit signed PC relative offset
+ to two GOT entries for LD symbol */
+#define R_X86_64_DTPOFF32 21 /* Offset in TLS block */
+#define R_X86_64_GOTTPOFF 22 /* 32 bit signed PC relative offset
+ to GOT entry for IE symbol */
+#define R_X86_64_TPOFF32 23 /* Offset in initial TLS block */
+
+#define R_X86_64_NUM 24
+
+struct link_map {
+
+ Elf64_Addr l_addr;
+ char *l_name;
+ Elf64_Dyn *l_ld;
+ struct link_map *l_next, *l_prev;
+};
+
+#endif /* elf.h */
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Kyle C. Hale <kh@u.norhtwestern.edu>
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Kyle C. Hale <kh@u.northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+#include <palacios/vmm.h>
+#include <palacios/vm_guest.h>
+#include <palacios/vm_guest_mem.h>
+#include <palacios/vmm_intr.h>
+#include <palacios/vmm_extensions.h>
+#include <palacios/vmm_decoder.h>
+#include <palacios/vmm_types.h>
+#include <palacios/vmm_hypercall.h>
+#include <palacios/vmcb.h>
+
+#include <gears/code_inject.h>
+#include <gears/execve_hook.h>
+#include <gears/sw_intr.h>
+
+#include "elf.h"
+
+struct v3_code_injects code_injects;
+
+static char mmap_code[] = "\xb8\xc0\x00\x00\x00\x31\xdb\xb9"
+ "\x00\x00\x10\x00\xba\x01\x00\x00"
+ "\x00\xbd\x02\x00\x00\x00\x09\xea"
+ "\xbd\x04\x00\x00\x00\x09\xea\xbe"
+ "\x02\x00\x00\x00\xbd\x20\x00\x00"
+ "\x00\x09\xee\xbf\xff\xff\xff\xff"
+ "\x31\xed\xcd\x80\x89\xc3\xb9\x00"
+ "\x00\x10\x00\xc7\x00\xef\xbe\xad"
+ "\xde\x05\x00\x10\x00\x00\x81\xe9"
+ "\x00\x10\x00\x00\x75\xed\xb8\x00"
+ "\xf0\x00\x00\x0f\x01\xd9";
+
+static char munmap_code[] = "\xb8\x5b\x00\x00\x00\xb9\x00\x00"
+ "\x10\x00\xcd\x80\x89\xc3\xb8\x03"
+ "\xf0\x00\x00\x0f\x01\xd9";
+
+static char vmmcall_code[] = "\x48\xc7\xc0\x02\xf0\x00\x00\x0f"
+ "\x01\xd9";
+
+static const char elf_magic[] = {0x7f, 'E', 'L', 'F'};
+
+
+/*
+ * the presence of this is kind of a hack, and exists because
+ * when one of the below hypercall handlers is invoked, we don't
+ * have an elegant way of deciding which inject queue (normal or exec-hooked)
+ * to pull the first element from, so we have this place marker
+ *
+ * This could be ugly with more than one core...
+ */
+static struct v3_code_inject_info * current_inject;
+
+
+static int free_code_inject (struct v3_vm_info * vm, struct v3_code_inject_info * inject) {
+ list_del(&(inject->inject_node));
+ V3_Free(inject);
+ return 0;
+}
+
+
+/*
+ * helper function to save a chunk of code in an inject object's state and
+ * overwrite it with something else (mostly for injecting hypercalls)
+ */
+static int v3_plant_code (struct guest_info * core, struct v3_code_inject_info * inject,
+ char * hva, char * code, uint_t size) {
+ int i;
+
+ // first back up old code
+ inject->old_code = (char*)V3_Malloc(size);
+ for (i = 0; i < size; i++)
+ inject->old_code[i] = *(hva + i);
+
+ // overwrite
+ for (i = 0; i < size; i++)
+ *(hva + i) = *(code + i);
+
+ return 0;
+}
+
+
+static int v3_restore_pre_mmap_state (struct guest_info * core, struct v3_code_inject_info * inject) {
+ int ret;
+ addr_t rip_hva, mmap_gva;
+
+ if ((mmap_gva = (addr_t)core->vm_regs.rbx) < 0) {
+ PrintError("Error running mmap in guest: v3_restore_pre_mmap_state\n");
+ return -1;
+ }
+
+ inject->code_region_gva = mmap_gva;
+
+ ret = v3_gva_to_hva(core,
+ get_addr_linear(core, (addr_t)inject->rip, &(core->segments.cs)),
+ &rip_hva);
+ if (ret == -1) {
+ PrintError("Error translating RIP address: v3_restore_pre_mmap_state\n");
+ return -1;
+ }
+
+ // restore the code overwritten by mmap code
+ memcpy((void*)rip_hva, (void*)inject->old_code, MMAP_SIZE);
+ V3_Free(inject->old_code);
+
+ v3_do_static_inject(core, inject, MMAP_COMPLETE, mmap_gva);
+ return 0;
+}
+
+
+static int v3_restore_pre_inject_state (struct guest_info * core, struct v3_code_inject_info * inject) {
+ int ret;
+ addr_t rip_hva;
+
+ // restore original register state at int 80
+ memcpy(&core->vm_regs, &inject->regs, sizeof(struct v3_gprs));
+ memcpy(&core->ctrl_regs, &inject->ctrl_regs, sizeof(struct v3_ctrl_regs));
+
+ ret = v3_gva_to_hva(core,
+ get_addr_linear(core, (addr_t)inject->rip, &(core->segments.cs)),
+ &rip_hva);
+ if (ret == -1) {
+ PrintError("Error translating RIP address: v3_pre_inject_state\n");
+ return -1;
+ }
+
+ // increment original rip by 2 to skip the int 80
+ core->rip = inject->rip + 2;
+ return 0;
+}
+
+
+/*
+ * This function completes stage 1 of the inject. It is invoked when code to
+ * mmap space for the real code has been injected and has completed. This mmap
+ * code will hypercall back into Placios, getting us here.
+ */
+static int mmap_init_handler (struct guest_info * core, unsigned int hcall_id, void * priv_data) {
+ struct v3_code_inject_info * inject = current_inject;
+ v3_restore_pre_mmap_state(core, inject);
+ return 0;
+}
+
+
+/*
+ * This function is stage 3 of the injection process. It is invoked when the injected code
+ * has run to completeion and run a hypercall at its tail to get back into the
+ * VMM. After this, it only remains to unmap the space we injected it into (the
+ * 4th and final stage)
+ */
+static int inject_code_finish (struct guest_info * core, unsigned int hcall_id, void * priv_data) {
+ struct v3_code_inject_info * inject = current_inject;
+ addr_t hva;
+
+ // is the original int 80 page still paged in?
+ if (v3_gva_to_hva(core,
+ get_addr_linear(core, (addr_t)inject->rip, &(core->segments.cs)),
+ &hva) == -1) {
+ PrintError("No mapping in shadow page table: inject_code_finish\n");
+ return -1;
+ }
+
+ inject->old_code = V3_Malloc(MUNMAP_SIZE);
+ if (!inject->old_code) {
+ PrintError("Problem mallocing old code segment\n");
+ return -1;
+ }
+
+ // save old code and overwrite with munmap
+ v3_plant_code(core, inject, (char*)hva, munmap_code, MUNMAP_SIZE);
+
+ // set rbx with gva of code region
+ core->vm_regs.rbx = inject->code_region_gva;
+
+ // set rip back
+ core->rip = inject->rip;
+ return 0;
+}
+
+
+//
+// this is 4th and final stage of the code injection process. It is invoked after code
+// has been injected to run the munmap system call on our previosuly allocated
+// memory chunk. It results in the clean
+// up and removal of the current inject's structures and state, and its
+// removal from any injection queues
+//
+static int munmap_finish (struct guest_info * core, unsigned int hcall_id, void * priv_data) {
+ struct v3_code_inject_info * inject = current_inject;
+ int i = 0;
+ addr_t hva;
+
+ if (core->vm_regs.rbx < 0) {
+ PrintError("Problem munmapping injected code\n");
+ return -1;
+ }
+
+ if (v3_gva_to_hva(core,
+ get_addr_linear(core, (addr_t)inject->rip, &(core->segments.cs)),
+ &hva) == -1) {
+ PrintError("No mapping in shadow page table: inject_code_finish\n");
+ return -1;
+ }
+
+ for (i = 0; i < MUNMAP_SIZE; i++)
+ *(char*)(hva + i) = *(char*)(inject->old_code + i);
+
+ V3_Free(inject->old_code);
+
+ v3_restore_pre_inject_state(core, inject);
+
+ // clean up
+ v3_remove_code_inject(core->vm_info, inject);
+ current_inject = NULL;
+
+ // raise the original int 80 again, causing an exec
+ return v3_raise_swintr(core, SW_INTR_SYSCALL_VEC);
+}
+
+
+/*
+ * This function is comprises stage 2 of the injection process. Here, the
+ * injected code is copied one page at a time. Each time a new page must be
+ * copied, Palacios injects a page fault for it to bring it into the guest and
+ * host page tables. The fault address will be somewhere in our previously
+ * mmap'd region, but we will jump back to the same RIP every time, which
+ * contains the hypercall that invokes this function.
+ */
+static int mmap_pf_handler (struct guest_info * core, unsigned int hcall_id, void * priv_data) {
+ struct v3_code_inject_info * inject = current_inject;
+ pf_error_t err;
+ int i, offset = core->vm_regs.rbx;
+ addr_t hva, gva = core->vm_regs.rcx;
+ memset((void*)&err, 0, sizeof(pf_error_t));
+
+ // was page fault handled by guest kernel?
+ if (v3_gva_to_hva(core,
+ get_addr_linear(core, gva, &(core->segments.ds)),
+ &hva) == -1) {
+ PrintError("No mapping in shadow page table: mmap_pf_handler\n");
+ return -1;
+ }
+
+ if (offset >= inject->code_size) {
+ core->rip = gva - offset + inject->func_offset;
+
+ // restore registers (here, really just for sane ebp/esp)
+ memcpy(&core->vm_regs, &inject->regs, sizeof(struct v3_gprs));
+ memcpy(&core->ctrl_regs, &inject->ctrl_regs, sizeof(struct v3_ctrl_regs));
+
+ if (v3_gva_to_hva(core,
+ get_addr_linear(core, inject->rip, &(core->segments.cs)),
+ &hva) == -1) {
+ PrintError("No mapping for old RIP in shadow page table: mmap_pf_handler: %p\n", (void*)inject->rip);
+ return -1;
+ }
+
+ // restore the hypercall with original int 80 code
+ for (i = 0; i < VMMCALL_SIZE; i++)
+ *(char*)(hva + i) = *(char*)(inject->old_code + i);
+
+ V3_Free(inject->old_code);
+
+ if (v3_gva_to_hva(core,
+ get_addr_linear(core, core->rip, &(core->segments.cs)),
+ &hva) == -1) {
+ PrintError("No mapping for new RIP in shadow page table: mmap_pf_handler: %p\n", (void*)core->rip);
+ return -1;
+ }
+
+ return 0;
+ }
+
+ // copy the next page of code
+ for (i = 0; i < PAGE_SIZE; i++)
+ *(char*)(hva + i) = *(char*)(inject->code + offset + i);
+
+
+ core->vm_regs.rbx += PAGE_SIZE;
+ core->vm_regs.rcx += PAGE_SIZE;
+
+ // to account for rip being incremented by hcall handler
+ core->rip -= VMMCALL_SIZE;
+
+ // inject the page fault for next page
+ err.user = 1;
+ err.write = 1;
+ v3_inject_guest_pf(core, gva + PAGE_SIZE, err);
+
+ return 0;
+}
+
+
+static int init_code_inject (struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data) {
+ struct v3_code_injects * injects = &code_injects;
+ INIT_LIST_HEAD(&(injects->code_inject_list));
+ INIT_LIST_HEAD(&(injects->hooked_code_injects));
+
+ injects->active = 1;
+
+ current_inject = NULL;
+
+ v3_register_hypercall(vm, 0xf000, mmap_init_handler, NULL);
+ v3_register_hypercall(vm, 0xf001, inject_code_finish, NULL);
+ v3_register_hypercall(vm, 0xf002, mmap_pf_handler, NULL);
+ v3_register_hypercall(vm, 0xf003, munmap_finish, NULL);
+ return 0;
+}
+
+
+static int deinit_code_inject (struct v3_vm_info * vm, void * priv_data) {
+ struct v3_code_injects * injects = &code_injects;
+ struct v3_code_inject_info * inject = NULL;
+ struct v3_code_inject_info * tmp = NULL;
+
+ list_for_each_entry_safe(inject, tmp, &(injects->code_inject_list), inject_node) {
+ free_code_inject(vm, inject);
+ }
+
+ list_for_each_entry_safe(inject, tmp, &(injects->hooked_code_injects), inject_node) {
+ free_code_inject(vm, inject);
+ }
+
+ v3_remove_hypercall(vm, 0xf000);
+ v3_remove_hypercall(vm, 0xf001);
+ v3_remove_hypercall(vm, 0xf002);
+ v3_remove_hypercall(vm, 0xf003);
+ return 0;
+}
+
+
+
+
+/* KCH currently unused */
+/* this dynamic linking stuff will eventually be moved out of this file... */
+static addr_t v3_get_dyn_entry (struct guest_info * core, addr_t elf_gva, addr_t elf_hva,
+ int section_code) {
+ ElfW(Ehdr) *ehdr;
+ ElfW(Phdr) *phdr, *phdr_cursor;
+ ElfW(Dyn) *dyn = NULL;
+ int i, j, num_dyn;
+ addr_t hva;
+
+ ehdr = (ElfW(Ehdr)*)elf_hva;
+ phdr = (ElfW(Phdr)*)(elf_hva + ehdr->e_phoff);
+ phdr_cursor = phdr;
+
+ //PrintDebug("num phdrs: %d\n", ehdr->e_phnum);
+ for (i = 0; i < ehdr->e_phnum; i++, phdr_cursor++) {
+ if (phdr_cursor->p_type == PT_DYNAMIC) {
+ num_dyn = phdr_cursor->p_filesz / sizeof(ElfW(Dyn));
+ dyn = (ElfW(Dyn)*)(elf_hva + phdr_cursor->p_offset);
+
+ // make sure this addr is paged in
+ if (v3_gva_to_gpa(core, elf_gva + phdr_cursor->p_offset, &hva) == -1) {
+ PrintError("Dynamic segment isn't paged in\n");
+ return 0;
+ }
+
+ for (j = 0; j < num_dyn; j++, dyn++) {
+ if (dyn->d_tag == section_code) {
+ switch (section_code) {
+ case DT_STRSZ:
+ case DT_SYMENT:
+ case DT_PLTREL:
+ return (addr_t)dyn->d_un.d_val;
+ default:
+ return (addr_t)dyn->d_un.d_ptr;
+ }
+ }
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+
+static int v3_do_resolve (struct guest_info * core, addr_t elf_gva, addr_t elf_hva) {
+
+ addr_t got_gva, symtab_gva, strtab_gva;
+
+ if ((got_gva = v3_get_dyn_entry(core, elf_gva, elf_hva, DT_PLTGOT)) == 0) {
+ PrintError("Problem getting at PLTGOT in v3_do_resolve\n");
+ return -1;
+ }
+
+
+ if ((strtab_gva = v3_get_dyn_entry(core, elf_gva, elf_hva, DT_STRTAB)) == 0) {
+ PrintError("Problem getting at PLTGOT in v3_do_resolve\n");
+ return -1;
+ }
+
+ if ((symtab_gva = v3_get_dyn_entry(core, elf_gva, elf_hva, DT_SYMTAB)) == 0) {
+ PrintError("Problem getting at PLTGOT in v3_do_resolve\n");
+ return -1;
+ }
+
+
+ PrintDebug("Got gva: %p\n", (void*)got_gva);
+ PrintDebug("Symtab gva: %p\n", (void*)symtab_gva);
+ PrintDebug("Strtab gva: %p\n", (void*)strtab_gva);
+ return 0;
+}
+
+static int v3_do_cont (struct guest_info * core, struct v3_code_inject_info * inject, addr_t check) {
+
+ addr_t hva;
+ pf_error_t err_code;
+ int ret;
+
+ ret = v3_gva_to_gpa(core, check, &hva);
+
+ // page fault wasn't handled by kernel??
+ if (ret == -1) {
+ PrintError("ERROR: no mapping in guest page table!\n");
+ return -1;
+ }
+
+ ret = v3_gva_to_hva(core,
+ get_addr_linear(core, check, &(core->segments.cs)),
+ &hva);
+
+ // this should never happen...
+ if (ret == -1) {
+ PrintError("ERROR: no mapping in shadow page table\n");
+ return -1;
+ }
+
+ if (strncmp(elf_magic, (char*)hva, ELF_MAG_SIZE) != 0) {
+
+ check -= PAGE_SIZE;
+ inject->cont->check_addr = check;
+ inject->cont->cont_func = v3_do_cont;
+
+ memset((void*)&err_code, 0, sizeof(pf_error_t));
+ err_code.user = 1;
+
+ if (v3_inject_guest_pf(core, check, err_code) < 0) {
+ PrintError("Problem injecting pf\n");
+ return -1;
+ }
+
+ return E_NEED_PF;
+ }
+
+ PrintDebug("Found ELF!\n");
+ V3_Free(inject->cont);
+ inject->cont = NULL;
+ return v3_do_resolve(core, check, hva);
+}
+
+
+/*
+ * mmap_state: 0 = no inject space in procces yet
+ * 1 = code segment space mmap'd, still need data
+ * 2 = code & data segments mmap'd, ready to inject real code
+ *
+ */
+//
+// return E_NEED_PF up the call stack to signal page fault injection
+// (so rip doesn't get incremented and sw_intr doesn't get injected
+//
+int v3_do_inject (struct guest_info * core, struct v3_code_inject_info * inject, int mmap_state) {
+ addr_t rip_hva, elf_hva, elf_gva;
+ int ret = 0, i = 0;
+ pf_error_t err_code;
+
+ memset((void*)&err_code, 0, sizeof(pf_error_t));
+
+ ret = v3_gva_to_hva(core,
+ get_addr_linear(core, (addr_t)core->rip, &(core->segments.cs)),
+ &rip_hva);
+ if (ret == -1) {
+ PrintError("Error translating RIP address in v3_do_inject\n");
+ return -1;
+ }
+
+ elf_gva = (addr_t)(core->rip & 0xfffffffffffff000);
+
+ for (i = 0; i < PAGES_BACK; i++, elf_gva -= PAGE_SIZE) {
+
+ ret = v3_gva_to_hva(core,
+ get_addr_linear(core, elf_gva, &(core->segments.cs)),
+ &elf_hva);
+
+ // need to page in
+ if (ret == -1) {
+
+ PrintDebug("Found a page we need to fault in\n");
+ inject->cont = (struct v3_cont *)V3_Malloc(sizeof(struct v3_cont));
+ ret = v3_gva_to_gpa(core, elf_gva, &elf_hva);
+
+ if (ret == -1) {
+ PrintDebug("no mapping in guest page table\n");
+ }
+
+ inject->cont->check_addr = elf_gva;
+ inject->cont->cont_func = v3_do_cont;
+ err_code.user = 1;
+
+ PrintDebug("Injecting pf for addr: %p\n", (void*) elf_gva);
+
+ if (v3_inject_guest_pf(core, elf_gva, err_code) < 0) {
+ PrintError("Problem injecting pf\n");
+ return -1;
+ }
+
+ return E_NEED_PF;
+ }
+
+ if (strncmp(elf_magic, (char*)elf_hva, ELF_MAG_SIZE) == 0) {
+ PrintDebug("Found elf_magic!\n");
+ break;
+ }
+
+ }
+
+
+ V3_Free(inject->cont);
+ inject->cont = NULL;
+ return v3_do_resolve(core, elf_gva, elf_hva);
+
+ PrintDebug("Planting code\n");
+ v3_plant_code(core, inject, (char*)rip_hva, mmap_code, MMAP_SIZE);
+
+ PrintDebug("Saving register context\n");
+ PrintDebug("First 8 bytes 0x%lx\n", *(long*)rip_hva);
+ /* may need to save v3_ctrl registers too... */
+ memcpy(&inject->regs, &core->vm_regs, sizeof(struct v3_gprs));
+ inject->rip = core->rip;
+
+ /* jump to injected code */
+ PrintDebug("Jumping to injected code\n");
+ return 0;
+}
+
+
+/*
+ * mmap_state: NO_MMAP = no inject space mmap'd in procces yet
+ * MMAP_COMPLETE = mmap complete, time to do real inject
+ *
+ */
+int v3_do_static_inject (struct guest_info * core, struct v3_code_inject_info * inject,
+ int mmap_state, addr_t region_gva) {
+ addr_t rip_hva;
+ int ret;
+
+
+ ret = v3_gva_to_hva(core,
+ get_addr_linear(core, (addr_t)core->rip, &(core->segments.cs)),
+ &rip_hva);
+ if (ret == -1) {
+ PrintError("Error translating RIP address: v3_do_static_inject\n");
+ return -1;
+ }
+
+ switch (mmap_state) {
+ case NO_MMAP:
+ {
+ // inject mmap code
+ v3_plant_code(core, inject, (char*)rip_hva, mmap_code, MMAP_SIZE);
+
+ // save registers (gprs and ctrl regs, and rip)
+ memcpy(&inject->regs, &core->vm_regs, sizeof(struct v3_gprs));
+ memcpy(&inject->ctrl_regs, &core->ctrl_regs, sizeof(struct v3_ctrl_regs));
+ inject->rip = core->rip;
+
+ // jump to mmap code, and squash original swintr
+ return E_NEED_PF;
+ }
+ case MMAP_COMPLETE:
+ {
+ pf_error_t err_code;
+ memset((void*)&err_code, 0, sizeof(pf_error_t));
+
+ ret = v3_gva_to_hva(core,
+ get_addr_linear(core, (addr_t)inject->rip, &(core->segments.cs)),
+ &rip_hva);
+ if (ret == -1) {
+ PrintError("Error translating RIP address: v3_do_static_inject\n");
+ return -1;
+ }
+
+ // inject hypercall code
+ v3_plant_code(core, inject, (char*)rip_hva, vmmcall_code, VMMCALL_SIZE);
+
+ /* store current copy offset in rbx, fault gva in rcx */
+ core->vm_regs.rbx = 0;
+ core->vm_regs.rcx = region_gva;
+
+ err_code.user = 1;
+ err_code.write = 1;
+
+ // inject the first page fault for the code block
+ if (v3_inject_guest_pf(core, region_gva, err_code) < 0) {
+ PrintError("Problem injecting page fault in v3_do_static_inject\n");
+ return -1;
+ }
+
+ // returning here will run hypercall 0xf002
+ // This will get us back in v3_mmap_pf_handler
+ core->rip = inject->rip;
+ return 0;
+ }
+ default:
+ PrintError("Invalid mmap state\n");
+ return -1;
+ }
+ return 0;
+}
+
+
+/*
+ * This function is invoked in one of two ways:
+ * 1. A syscall has been intercepted and we've popped off the next pending
+ * inject
+ * 2. An exec has been intercepted and we've popped off the next hooked inject
+ *
+ */
+int v3_handle_guest_inject (struct guest_info * core, void * priv_data) {
+ struct v3_code_inject_info * inject = (struct v3_code_inject_info *)priv_data;
+
+ /* eventually this should turn into a mutex lock */
+ if (current_inject) {
+ PrintError("An inject is already in progress\n");
+ return -1;
+ } else {
+ current_inject = inject;
+ inject->in_progress = 1;
+ }
+
+ if (!inject->is_dyn) {
+ return v3_do_static_inject(core, inject, 0, (addr_t)NULL);
+ } else {
+ if (inject->cont)
+ return inject->cont->cont_func(core, inject, inject->cont->check_addr);
+ else
+ return v3_do_inject(core, inject, 0);
+ }
+
+ return 0;
+}
+
+
+int v3_insert_code_inject (void * ginfo, void * code, int size,
+ char * bin_file, int is_dyn, int is_exec_hooked, int func_offset) {
+ struct v3_code_injects * injects = &code_injects;
+ struct v3_vm_info * vm = (struct v3_vm_info *)ginfo;
+ struct v3_code_inject_info * inject;
+
+ if (!injects->active) {
+ PrintError("Code injection has not been initialized\n");
+ return -1;
+ }
+
+ inject = V3_Malloc(sizeof(struct v3_code_inject_info));
+ if (!inject) {
+ PrintError("Error allocating inject info in v3_insert_code_inject\n");
+ return -1;
+ }
+
+ memset(inject, 0, sizeof(struct v3_code_inject_info));
+
+ inject->code = code;
+ inject->code_size = size;
+ inject->is_dyn = is_dyn;
+ inject->func_offset = func_offset;
+ inject->bin_file = bin_file;
+ inject->is_exec_hooked = is_exec_hooked;
+
+ if (is_exec_hooked) {
+ v3_hook_executable(vm, bin_file, v3_handle_guest_inject, (void*)inject);
+ list_add_tail(&(inject->inject_node), &(injects->hooked_code_injects));
+ } else {
+ list_add_tail(&(inject->inject_node), &(injects->code_inject_list));
+ }
+
+ return 0;
+}
+
+
+int v3_remove_code_inject (struct v3_vm_info * vm, struct v3_code_inject_info * inject) {
+
+ PrintDebug("Removing and freeing code inject\n");
+ if (inject->is_exec_hooked) {
+ if (v3_unhook_executable(vm, inject->bin_file) < 0) {
+ PrintError("Problem unhooking executable in v3_remove_code_inject\n");
+ return -1;
+ }
+ }
+
+ free_code_inject(vm, inject);
+ return 0;
+}
+
+
+static struct v3_extension_impl code_inject_impl = {
+ .name = "code_inject",
+ .init = init_code_inject,
+ .deinit = deinit_code_inject,
+ .core_init = NULL,
+ .core_deinit = NULL,
+ .on_entry = NULL,
+ .on_exit = NULL
+};
+register_extension(&code_inject_impl);
+
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Kyle C. Hale <kh@u.norhtwestern.edu>
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Kyle C. Hale <kh@u.northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+#include <palacios/vmm.h>
+#include <palacios/vm_guest.h>
+#include <palacios/vmm_intr.h>
+#include <palacios/vmm_extensions.h>
+
+#include <gears/process_environment.h>
+#include <gears/execve_hook.h>
+#include <gears/env_inject.h>
+
+static struct v3_env_injects env_injects;
+
+static int free_env_inject (struct v3_vm_info * vm, struct v3_env_inject_info * inject) {
+ list_del(&(inject->inject_node));
+ V3_Free(inject);
+ return 0;
+}
+
+static int v3_env_inject_handler (struct guest_info * core, void * priv_data) {
+ int i = 0;
+ struct v3_env_inject_info * inject = (struct v3_env_inject_info*)priv_data;
+
+ for (; i < inject->num_env_vars; i++) {
+ PrintDebug("Envvar[%d]: %s\n", i, inject->env_vars[i]);
+ }
+
+ int ret = v3_inject_strings(core, (const char**)NULL,
+ (const char**)inject->env_vars, 0, inject->num_env_vars);
+ if (ret == -1) {
+ PrintDebug("Error injecting strings in v3_env_inject_handler\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int init_env_inject (struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data) {
+ struct v3_env_injects * injects = &env_injects;
+ INIT_LIST_HEAD(&(injects->env_inject_list));
+ return 0;
+}
+
+
+static int deinit_env_inject (struct v3_vm_info * vm, void * priv_data) {
+ struct v3_env_injects * injects = &env_injects;
+ struct v3_env_inject_info * inject = NULL;
+ struct v3_env_inject_info * tmp = NULL;
+
+ list_for_each_entry_safe(inject, tmp, &(injects->env_inject_list), inject_node) {
+ free_env_inject(vm, inject);
+ }
+
+ return 0;
+}
+
+
+int v3_insert_env_inject (void * ginfo, char ** strings, int num_strings, char * bin_name) {
+ struct v3_env_injects * injects = &env_injects;
+ struct v3_env_inject_info * inject = V3_Malloc(sizeof(struct v3_env_inject_info));
+
+ memset(inject, 0, sizeof(struct v3_env_inject_info));
+
+ inject->env_vars = strings;
+ inject->num_env_vars = num_strings;
+ inject->bin_name = bin_name;
+
+ list_add(&(inject->inject_node), &(injects->env_inject_list));
+
+ v3_hook_executable((struct v3_vm_info *)ginfo, bin_name, v3_env_inject_handler, (void*)inject);
+
+ return 0;
+}
+
+
+int v3_remove_env_inject (struct v3_vm_info * vm, struct v3_env_inject_info * inject) {
+
+ if (v3_unhook_executable(vm, inject->bin_name) < 0) {
+ PrintError("Problem unhooking executable in v3_remove_env_inject\n");
+ return -1;
+ }
+
+ free_env_inject(vm, inject);
+ return 0;
+}
+
+
+static struct v3_extension_impl env_inject_impl = {
+ .name = "env_inject",
+ .init = init_env_inject,
+ .deinit = deinit_env_inject,
+ .core_init = NULL,
+ .core_deinit = NULL,
+ .on_entry = NULL,
+ .on_exit = NULL
+};
+
+register_extension(&env_inject_impl);
+
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Kyle C. Hale <kh@u.norhtwestern.edu>
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Kyle C. Hale <kh@u.northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_string.h>
+#include <palacios/vmm_hashtable.h>
+#include <palacios/vmm_extensions.h>
+#include <palacios/vmm_decoder.h>
+#include <palacios/vm_guest.h>
+#include <palacios/vm_guest_mem.h>
+
+#include <gears/syscall_hijack.h>
+#include <gears/execve_hook.h>
+#include <gears/syscall_ref.h>
+
+#ifdef V3_CONFIG_EXT_CODE_INJECT
+#include <gears/code_inject.h>
+#endif
+
+static struct v3_exec_hooks exec_hooks;
+
+static int free_hook (struct v3_vm_info * vm, struct exec_hook * hook) {
+ list_del(&(hook->hook_node));
+ V3_Free(hook);
+ return 0;
+}
+
+static uint_t exec_hash_fn (addr_t key) {
+ return v3_hash_long(key, sizeof(void *) * 8);
+}
+
+
+static int exec_eq_fn (addr_t key1, addr_t key2) {
+ return (key1 == key2);
+}
+
+
+static int init_exec_hooks (struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data) {
+
+ return 0;
+}
+
+static int init_exec_hooks_core (struct guest_info * core, void * priv_data) {
+ struct v3_exec_hooks * hooks = &exec_hooks;
+ INIT_LIST_HEAD(&(hooks->hook_list));
+ hooks->bin_table = v3_create_htable(0, exec_hash_fn, exec_eq_fn);
+
+ if (hooks->bin_table == NULL) {
+ PrintError("Problem creating execve hash table\n");
+ return -1;
+ }
+
+ if (core->cpu_mode == LONG || core->cpu_mode == LONG_32_COMPAT) {
+ PrintDebug("Hooking execve 64\n");
+ v3_hook_syscall(core, SYS64_EXECVE, v3_execve_handler, NULL);
+ } else {
+ PrintDebug("Hooking execve, cpu mode: %x\n", core->cpu_mode);
+ v3_hook_syscall(core, SYS32_EXECVE, v3_execve_handler, NULL);
+ }
+ return 0;
+}
+
+static int deinit_exec_hooks_core (struct guest_info * core, void * priv_data) {
+ struct v3_exec_hooks * hooks = &exec_hooks;
+ struct exec_hook * hook = NULL;
+ struct exec_hook * tmp = NULL;
+
+ list_for_each_entry_safe(hook, tmp, &(hooks->hook_list), hook_node) {
+ free_hook(core->vm_info, hook);
+ }
+
+ v3_free_htable(hooks->bin_table, 0, 0);
+
+ return 0;
+}
+
+
+int v3_hook_executable (struct v3_vm_info * vm,
+ const uchar_t * binfile,
+ int (*handler)(struct guest_info * core, void * priv_data),
+ void * priv_data)
+{
+ struct exec_hook * hook = V3_Malloc(sizeof(struct exec_hook));
+ struct v3_exec_hooks * hooks = &exec_hooks;
+ addr_t key;
+
+ memset(hook, 0, sizeof(struct exec_hook));
+
+ hook->handler = handler;
+ hook->priv_data = priv_data;
+
+ // we hash the name of the file to produce a key
+ key = v3_hash_buffer((uchar_t*)binfile, strlen(binfile));
+
+ v3_htable_insert(hooks->bin_table, key, (addr_t)hook);
+ list_add(&(hook->hook_node), &(hooks->hook_list));
+
+ return 0;
+}
+
+
+int v3_unhook_executable (struct v3_vm_info * vm, const uchar_t * binfile) {
+ struct exec_hook * hook;
+ struct v3_exec_hooks * hooks = &exec_hooks;
+ addr_t key;
+
+ key = v3_hash_buffer((uchar_t*)binfile, strlen((uchar_t*)binfile));
+ if ((hook = (struct exec_hook*)v3_htable_search(hooks->bin_table, key)) != NULL) {
+ free_hook(vm, hook);
+ } else {
+ PrintError("Could not unhook executable '%s'\n", binfile);
+ return -1;
+ }
+
+ if (v3_htable_remove(hooks->bin_table, key, 0) == (addr_t)NULL) {
+ PrintError("Error trying to remove key from htable: v3_unhook_executable\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct v3_extension_impl execve_impl = {
+ .name = "execve_intercept",
+ .init = init_exec_hooks,
+ .deinit = NULL,
+ .core_init = init_exec_hooks_core,
+ .core_deinit = deinit_exec_hooks_core,
+ .on_entry = NULL,
+ .on_exit = NULL
+};
+
+register_extension(&execve_impl);
+
+
+int v3_execve_handler (struct guest_info * core, uint_t syscall_nr, void * priv_data) {
+ addr_t hva, key;
+ struct v3_exec_hooks * hooks = &exec_hooks;
+ struct exec_hook * hook;
+ int ret;
+
+
+ // TODO: make sure this returns immediately if we're not booted up already
+ if (core->mem_mode == PHYSICAL_MEM) {
+ ret = v3_gpa_to_hva(core, get_addr_linear(core, (addr_t)core->vm_regs.rbx, &(core->segments.ds)), &hva);
+ } else {
+ ret = v3_gva_to_hva(core, get_addr_linear(core, (addr_t)core->vm_regs.rbx, &(core->segments.ds)), &hva);
+ }
+
+ if (ret == -1) {
+ PrintError("Error translating file path in sysexecve handler\n");
+ return 0;
+ }
+
+ key = v3_hash_buffer((uchar_t*)hva, strlen((uchar_t*)hva));
+ if ((hook = (struct exec_hook*)v3_htable_search(hooks->bin_table, key)) != NULL) {
+
+ ret = hook->handler(core, hook->priv_data);
+ if (ret == -1) {
+ PrintDebug("Error handling execve hook\n");
+ return -1;
+ }
+
+#ifdef V3_CONFIG_EXT_CODE_INJECT
+ if (ret == E_NEED_PF) {
+ return E_NEED_PF;
+ }
+#endif
+ }
+
+ return 0;
+}
+
#include <palacios/vmm.h>
#include <palacios/vmm_decoder.h>
-#include <palacios/vmm_process_environment.h>
#include <palacios/vm_guest.h>
#include <palacios/vm_guest_mem.h>
+#include <gears/process_environment.h>
+
+static struct v3_execve_varchunk var_dump;
+
+
+/* KCH: currently only checks if we can perform a user-mode write
+ return 1 on success */
+static int v3_gva_can_access(struct guest_info * core, addr_t gva) {
+
+ v3_reg_t guest_cr3 = 0;
+ pf_error_t access_type;
+ pt_access_status_t access_status;
+
+ access_type.write = 1;
+ access_type.user = 1;
+
+ if (core->mem_mode == PHYSICAL_MEM) {
+ return -1;
+ }
+
+ if (core->shdw_pg_mode == SHADOW_PAGING) {
+ guest_cr3 = core->shdw_pg_state.guest_cr3;
+ } else {
+ guest_cr3 = core->ctrl_regs.cr3;
+ }
+
+ // guest is in paged mode
+ switch (core->cpu_mode) {
+ case PROTECTED:
+ if (v3_check_guest_pt_32(core, guest_cr3, gva, access_type, &access_status) == -1) {
+ return -1;
+ }
+ break;
+ case PROTECTED_PAE:
+ if (v3_check_guest_pt_32pae(core, guest_cr3, gva, access_type, &access_status) == -1) {
+ return -1;
+ }
+ break;
+ case LONG:
+ case LONG_32_COMPAT:
+ case LONG_16_COMPAT:
+ if (v3_check_guest_pt_64(core, guest_cr3, gva, access_type, &access_status) == -1) {
+ return -1;
+ }
+ break;
+ default:
+ return -1;
+ }
+
+ if (access_status != PT_ACCESS_OK) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
static int v3_copy_chunk_guest32(struct guest_info * core, addr_t gva, uint_t argcnt, uint_t envcnt) {
int ret = 0, i = 0;
addr_t hva;
- uint32_t tmp_args[core->var_dump.argc];
- uint32_t tmp_envs[core->var_dump.envc];
+ uint32_t tmp_args[var_dump.argc];
+ uint32_t tmp_envs[var_dump.envc];
PrintDebug("Initiating copy into guest (32bit)\n");
// copy the env strings (we're moving top-down through the stack)
char * host_cursor = (char*) hva;
uint32_t guest_cursor = (uint32_t) gva;
- host_cursor -= strlen(core->var_dump.envp[i]) + 1;
- guest_cursor -= strlen(core->var_dump.envp[i]) + 1;
- while (i < core->var_dump.envc) {
- //PrintDebug("Copying envvar#%d: %s\n", i, core->var_dump.envp[i]);
- strcpy(host_cursor, core->var_dump.envp[i]);
+ host_cursor -= strlen(var_dump.envp[i]) + 1;
+ guest_cursor -= strlen(var_dump.envp[i]) + 1;
+ while (i < var_dump.envc) {
+ //PrintDebug("Copying envvar#%d: %s\n", i, var_dump.envp[i]);
+ strcpy(host_cursor, var_dump.envp[i]);
tmp_envs[i] = guest_cursor;
i++;
- if (i != core->var_dump.envc) {
- host_cursor -= strlen(core->var_dump.envp[i]) + 1;
- guest_cursor -= strlen(core->var_dump.envp[i]) + 1;
+ if (i != var_dump.envc) {
+ host_cursor -= strlen(var_dump.envp[i]) + 1;
+ guest_cursor -= strlen(var_dump.envp[i]) + 1;
}
}
// then the arg strings
i = 0;
- host_cursor -= strlen(core->var_dump.argv[i]) + 1;
- guest_cursor -= strlen(core->var_dump.argv[i]) + 1;
- while (i < core->var_dump.argc) {
- //PrintDebug("Copying arg #%d: %s\n", i, core->var_dump.argv[i]);
- strcpy(host_cursor, core->var_dump.argv[i]);
+ host_cursor -= strlen(var_dump.argv[i]) + 1;
+ guest_cursor -= strlen(var_dump.argv[i]) + 1;
+ while (i < var_dump.argc) {
+ //PrintDebug("Copying arg #%d: %s\n", i, var_dump.argv[i]);
+ strcpy(host_cursor, var_dump.argv[i]);
tmp_args[i] = guest_cursor;
i++;
- if (i != core->var_dump.argc) {
- host_cursor -= strlen(core->var_dump.argv[i]) + 1;
- guest_cursor -= strlen(core->var_dump.argv[i]) + 1;
+ if (i != var_dump.argc) {
+ host_cursor -= strlen(var_dump.argv[i]) + 1;
+ guest_cursor -= strlen(var_dump.argv[i]) + 1;
}
}
host_cursor -= 4;
guest_cursor -= 4;
- for (i = 0; i < core->var_dump.envc; i++) {
+ for (i = 0; i < var_dump.envc; i++) {
*((uint32_t*)host_cursor) = tmp_envs[i];
host_cursor -= 4;
guest_cursor -= 4;
*((uint32_t*)host_cursor) = 0;
host_cursor -= 4;
guest_cursor -= 4;
- for (i = 0; i < core->var_dump.argc; i++) {
+ for (i = 0; i < var_dump.argc; i++) {
*((uint32_t*)host_cursor) = tmp_args[i];
host_cursor -= 4;
guest_cursor -= 4;
core->vm_regs.rcx = guest_cursor + 4;
// free up our temporary storage in the VMM
- for (i = 0; i < core->var_dump.argc; i++) {
- V3_Free(core->var_dump.argv[i]);
+ for (i = 0; i < var_dump.argc; i++) {
+ V3_Free(var_dump.argv[i]);
}
- for (i = 0; i < core->var_dump.envc; i++) {
- V3_Free(core->var_dump.envp[i]);
+ for (i = 0; i < var_dump.envc; i++) {
+ V3_Free(var_dump.envp[i]);
}
- V3_Free(core->var_dump.envp);
- V3_Free(core->var_dump.argv);
+ V3_Free(var_dump.envp);
+ V3_Free(var_dump.argv);
return 0;
}
/* account for new args */
argc += argcnt;
- core->var_dump.argv = (char**)V3_Malloc(sizeof(char*)*argc);
- core->var_dump.argc = argc;
+ var_dump.argv = (char**)V3_Malloc(sizeof(char*)*argc);
+ var_dump.argc = argc;
bytes += sizeof(uint32_t)*argc;
cursor = (char*)argv;
char * tmpstr = (char*)V3_Malloc(strlen((char*)argvn) + 1);
/* copy the pointer */
- core->var_dump.argv[i] = tmpstr;
+ var_dump.argv[i] = tmpstr;
/* copy the string */
strncpy(tmpstr, (char*)argvn, strlen((char*)argvn) + 1);
while (j < argcnt) {
char * tmpstr = (char*)V3_Malloc(strlen(argstrs[j]) + 1);
strncpy(tmpstr, argstrs[i], strlen(argstrs[j]) + 1);
- core->var_dump.argv[i] = tmpstr;
+ var_dump.argv[i] = tmpstr;
bytes += strlen(argstrs[j]) + 1;
i++; j++;
}
}
envc += envcnt;
- core->var_dump.envp = (char**)V3_Malloc(sizeof(char*)*envc);
- core->var_dump.envc = envc;
+ var_dump.envp = (char**)V3_Malloc(sizeof(char*)*envc);
+ var_dump.envc = envc;
bytes += sizeof(uint32_t)*envc;
cursor = (char*)envp;
char * tmpstr = (char*)V3_Malloc(strlen((char*)envpn) + 1);
/* copy the pointer */
- core->var_dump.envp[i] = tmpstr;
+ var_dump.envp[i] = tmpstr;
/* deepcopy the string */
strncpy(tmpstr, (char*)envpn, strlen((char*)envpn) + 1);
while (j < envcnt) {
char * tmpstr = (char*)V3_Malloc(strlen(envstrs[j]) + 1);
strncpy(tmpstr, envstrs[j], strlen(envstrs[j]) + 1);
- core->var_dump.envp[i] = tmpstr;
+ var_dump.envp[i] = tmpstr;
bytes += strlen(envstrs[j]) + 1;
i++; j++;
}
/* account for padding for strings
and 2 null pointers */
bytes += (bytes % 4) + 8;
- core->var_dump.bytes = bytes;
+ var_dump.bytes = bytes;
return bytes;
}
int ret = 0, i = 0;
addr_t hva;
- uint64_t tmp_args[core->var_dump.argc];
- uint64_t tmp_envs[core->var_dump.envc];
+ uint64_t tmp_args[var_dump.argc];
+ uint64_t tmp_envs[var_dump.envc];
PrintDebug("Initiating copy into guest (64bit)\n");
char * host_cursor = (char*) hva;
uint64_t guest_cursor = (uint64_t) gva;
- host_cursor -= strlen(core->var_dump.envp[i]) + 1;
- guest_cursor -= strlen(core->var_dump.envp[i]) + 1;
- while (i < core->var_dump.envc) {
- //PrintDebug("Copying envvar#%d: %s\n", i, core->var_dump.envp[i]);
- strcpy(host_cursor, core->var_dump.envp[i]);
+ host_cursor -= strlen(var_dump.envp[i]) + 1;
+ guest_cursor -= strlen(var_dump.envp[i]) + 1;
+ while (i < var_dump.envc) {
+ //PrintDebug("Copying envvar#%d: %s\n", i, var_dump.envp[i]);
+ strcpy(host_cursor, var_dump.envp[i]);
tmp_envs[i] = guest_cursor;
i++;
- if (i != core->var_dump.envc) {
- host_cursor -= strlen(core->var_dump.envp[i]) + 1;
- guest_cursor -= strlen(core->var_dump.envp[i]) + 1;
+ if (i != var_dump.envc) {
+ host_cursor -= strlen(var_dump.envp[i]) + 1;
+ guest_cursor -= strlen(var_dump.envp[i]) + 1;
}
}
i = 0;
- host_cursor -= strlen(core->var_dump.argv[i]) + 1;
- guest_cursor -= strlen(core->var_dump.argv[i]) + 1;
- while (i < core->var_dump.argc) {
- //PrintDebug("Copying arg #%d: %s\n", i, core->var_dump.argv[i]);
- strcpy(host_cursor, core->var_dump.argv[i]);
+ host_cursor -= strlen(var_dump.argv[i]) + 1;
+ guest_cursor -= strlen(var_dump.argv[i]) + 1;
+ while (i < var_dump.argc) {
+ //PrintDebug("Copying arg #%d: %s\n", i, var_dump.argv[i]);
+ strcpy(host_cursor, var_dump.argv[i]);
tmp_args[i] = guest_cursor;
i++;
- if (i != core->var_dump.argc) {
- host_cursor -= strlen(core->var_dump.argv[i]) + 1;
- guest_cursor -= strlen(core->var_dump.argv[i]) + 1;
+ if (i != var_dump.argc) {
+ host_cursor -= strlen(var_dump.argv[i]) + 1;
+ guest_cursor -= strlen(var_dump.argv[i]) + 1;
}
}
host_cursor -= 8;
guest_cursor -= 8;
- for (i = 0; i < core->var_dump.envc; i++) {
+ for (i = 0; i < var_dump.envc; i++) {
*((uint64_t*)host_cursor) = tmp_envs[i];
host_cursor -= 8;
guest_cursor -= 8;
*((uint64_t*)host_cursor) = 0;
host_cursor -= 8;
guest_cursor -= 8;
- for (i = 0; i < core->var_dump.argc; i++) {
+ for (i = 0; i < var_dump.argc; i++) {
*((uint64_t*)host_cursor) = tmp_args[i];
host_cursor -= 8;
guest_cursor -= 8;
core->vm_regs.rcx = guest_cursor + 8;
- for (i = 0; i < core->var_dump.argc; i++) {
- V3_Free(core->var_dump.argv[i]);
+ for (i = 0; i < var_dump.argc; i++) {
+ V3_Free(var_dump.argv[i]);
}
- for (i = 0; i < core->var_dump.envc; i++) {
- V3_Free(core->var_dump.envp[i]);
+ for (i = 0; i < var_dump.envc; i++) {
+ V3_Free(var_dump.envp[i]);
}
- V3_Free(core->var_dump.envp);
- V3_Free(core->var_dump.argv);
+ V3_Free(var_dump.envp);
+ V3_Free(var_dump.argv);
return 0;
}
/* account for new strings */
argc += argcnt;
- core->var_dump.argv = (char**)V3_Malloc(sizeof(char*)*argc);
- core->var_dump.argc = argc;
+ var_dump.argv = (char**)V3_Malloc(sizeof(char*)*argc);
+ var_dump.argc = argc;
bytes += sizeof(char*)*argc;
cursor = (char*)argv;
char * tmpstr = (char*)V3_Malloc(strlen((char*)argvn) + 1);
/* copy the pointer */
- core->var_dump.argv[i] = tmpstr;
+ var_dump.argv[i] = tmpstr;
/* copy the string */
strncpy(tmpstr, (char*)argvn, strlen((char*)argvn) + 1);
while (j < argcnt) {
char * tmpstr = (char*)V3_Malloc(strlen(argstrs[j]) + 1);
strncpy(tmpstr, argstrs[j], strlen(argstrs[j]) + 1);
- core->var_dump.argv[i] = tmpstr;
+ var_dump.argv[i] = tmpstr;
bytes += strlen(argstrs[j]) + 1;
i++; j++;
}
}
envc += envcnt;
- core->var_dump.envp = (char**)V3_Malloc(sizeof(char*)*envc);
- core->var_dump.envc = envc;
+ var_dump.envp = (char**)V3_Malloc(sizeof(char*)*envc);
+ var_dump.envc = envc;
bytes += sizeof(uint64_t)*(envc);
char * tmpstr = (char*)V3_Malloc(strlen((char*)envpn) + 1);
/* copy the pointer */
- core->var_dump.envp[i] = tmpstr;
+ var_dump.envp[i] = tmpstr;
/* deepcopy the string */
strncpy(tmpstr, (char*)envpn, strlen((char*)envpn) + 1);
while (j < envcnt) {
char * tmpstr = (char*)V3_Malloc(strlen(envstrs[j]) + 1);
strncpy(tmpstr, envstrs[i], strlen(envstrs[j]) + 1);
- core->var_dump.envp[i] = tmpstr;
+ var_dump.envp[i] = tmpstr;
bytes += strlen(envstrs[j]) + 1;
i++; j++;
}
/* account for padding for strings
and 2 null pointers */
bytes += (bytes % 8) + 16;
- core->var_dump.bytes = bytes;
+ var_dump.bytes = bytes;
return bytes;
}
int v3_inject_strings (struct guest_info * core, const char ** argstrs, const char ** envstrs, uint_t argcnt, uint_t envcnt) {
- if (core->cpu_mode == LONG || core->cpu_mode == LONG_32_COMPAT) {
+ if (core->cpu_mode == LONG) {
if (v3_inject_strings64(core, argstrs, envstrs, argcnt, envcnt) == -1) {
PrintDebug("Error injecting strings into environment (64)\n");
return -1;
#include <palacios/vmm_extensions.h>
#include <palacios/vmm_intr.h>
-#include <interfaces/sw_intr.h>
+#include <gears/sw_intr.h>
+
+#ifdef V3_CONFIG_EXT_CODE_INJECT
+#include <gears/code_inject.h>
+#endif
#ifndef V3_CONFIG_DEBUG_EXT_SW_INTERRUPTS
#undef PrintDebug
static int init_swintr_intercept (struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data) {
-
return 0;
}
-static int init_swintr_intercept_core (struct guest_info * core, void * priv_data) {
+static int init_swintr_core_svm (struct guest_info * core, void * priv_data) {
vmcb_t * vmcb = (vmcb_t*)core->vmm_data;
vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
ctrl_area->instrs.INTn = 1;
+ return 0;
+}
+
+
+static int init_swintr_core_vmx (struct guest_info * core, void * priv_data) {
+ PrintError("Not implemented!\n");
+ return -1;
+}
+
+static int init_swintr_intercept_core (struct guest_info * core, void * priv_data) {
+ v3_cpu_arch_t cpu_type = v3_get_cpu_type(V3_Get_CPU());
+
+ switch (cpu_type) {
+ case V3_SVM_CPU:
+ case V3_SVM_REV3_CPU: {
+ if (init_swintr_core_svm(core, priv_data) == -1) {
+ PrintError("Problem initializing svm software interrupt intercept\n");
+ return -1;
+ }
+ break;
+ }
+ case V3_VMX_CPU:
+ case V3_VMX_EPT_CPU:
+ case V3_VMX_EPT_UG_CPU: {
+ if (init_swintr_core_vmx(core, priv_data) == -1) {
+ PrintError("Problem initializing vmx software interrupt intercept\n");
+ return -1;
+ }
+ break;
+ }
+ default:
+ PrintError("software interrupt interception not supported on this architecture\n");
+ return -1;
+ }
return 0;
}
static struct v3_swintr_hook * swintr_hooks[256];
-
static inline struct v3_swintr_hook * get_swintr_hook (struct guest_info * core, uint8_t vector) {
return swintr_hooks[vector];
}
return -1;
}
+#ifdef V3_CONFIG_EXT_CODE_INJECT
+// this is for injecting page faults
+// we don't want to increment rip or inject
+// the swint if we need to fault a page in
+ if (ret == E_NEED_PF) {
+ return 0;
+ }
+#endif
/* at some point we _may_ need to prioritize swints
so that they finish in time for the next
instruction... */
swintr_hooks[vector] = hook;
+ PrintDebug("Hooked Swintr #%d\n", vector);
+
return 0;
}
* and the University of New Mexico. You can find out more at
* http://www.v3vee.org
*
- * Copyright (c) 2011, Kyle C. Hale <kh@u.northwestern.edu>
+ * Copyright (c) 2011, Kyle C. Hale <kh@u.norhtwestern.edu>
* Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
* All rights reserved.
*
* redistribute, and modify it as specified in the file "V3VEE_LICENSE".
*/
-#ifndef __SYSCALL_REF_H__
-#define __SYSCALL_REF_H__
+#include <palacios/vmm.h>
+#include <palacios/vm_guest_mem.h>
+#include <palacios/vm_guest.h>
+#include <palacios/vmm_intr.h>
+#include <palacios/vmm_decoder.h>
+#include <palacios/vmm_string.h>
+#include <palacios/vmm_shadow_paging.h>
+#include <palacios/vmm_extensions.h>
+#include <palacios/vmm_paging.h>
+#include <palacios/vmcb.h>
+#include <palacios/vmm_hypercall.h>
-static char * get_linux_syscall_name32 (uint_t syscall_nr) {
+#include <gears/syscall_hijack.h>
+#include <gears/sw_intr.h>
+#include <gears/syscall_ref.h>
+
+#ifdef V3_CONFIG_EXT_CODE_INJECT
+#include <gears/code_inject.h>
+#include <palacios/vmm_list.h>
+extern struct v3_code_injects code_injects;
+#endif
+
+#ifndef V3_CONFIG_DEBUG_EXT_SYSCALL_HIJACK
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+
+struct v3_syscall_hook {
+ int (*handler)(struct guest_info * core, uint_t syscall_nr, void * priv_data);
+ void * priv_data;
+};
+
+static struct v3_syscall_hook * syscall_hooks[512];
+
+#if defined(V3_CONFIG_EXT_SELECTIVE_SYSCALL_EXIT) || defined(V3_CONFIG_EXT_SYSCALL_INSTR)
+static struct v3_syscall_info syscall_info;
+#endif
+
+static void print_arg (struct guest_info * core, v3_reg_t reg, uint8_t argnum) {
+
+ addr_t hva;
+ int ret = 0;
+
+ PrintDebug("\t ARG%d: INT - %ld\n", argnum, (long) reg);
+
+ if (core->mem_mode == PHYSICAL_MEM) {
+ ret = v3_gpa_to_hva(core, get_addr_linear(core, reg, &(core->segments.ds)), &hva);
+ }
+ else {
+ ret = v3_gva_to_hva(core, get_addr_linear(core, reg, &(core->segments.ds)), &hva);
+ }
+
+ PrintDebug("\t STR - ");
+ if (ret == -1) {
+ PrintDebug("\n");
+ return;
+ }
+
+ uint32_t c = max(MAX_CHARS, 4096 - (hva % 4096));
+ int i = 0;
+ for (; i < c && *((char*)(hva + i)) != 0; i++) {
+ PrintDebug("%c", *((char*)(hva + i)));
+ }
+ PrintDebug("\n");
+}
+
+
+static void print_syscall (uint8_t is64, struct guest_info * core) {
+
+ if (is64) {
+ PrintDebug("Syscall #%ld: \"%s\"\n", (long)core->vm_regs.rax, get_linux_syscall_name64(core->vm_regs.rax));
+ } else {
+ PrintDebug("Syscall #%ld: \"%s\"\n", (long)core->vm_regs.rax, get_linux_syscall_name32(core->vm_regs.rax));
+ }
+
+ print_arg(core, core->vm_regs.rbx, 1);
+ print_arg(core, core->vm_regs.rcx, 2);
+ print_arg(core, core->vm_regs.rdx, 3);
+}
+
+
+int v3_syscall_handler (struct guest_info * core, uint8_t vector, void * priv_data) {
+
+ uint_t syscall_nr = (uint_t) core->vm_regs.rax;
+ int err = 0, ret = 0;
+
+ struct v3_syscall_hook * hook = syscall_hooks[syscall_nr];
+
+#ifdef V3_CONFIG_EXT_SYSCALL_INSTR
+ // address originally written to LSTAR
+ if (!vector)
+ core->rip = syscall_info.target_addr;
+#endif
+
+#ifdef V3_CONFIG_EXT_SELECTIVE_SYSCALL_EXIT
+ PrintDebug("In v3_syscall_handler: syscall_nr - %d\n", syscall_nr);
+#endif
+
+
+ if (hook == NULL) {
+#ifdef V3_CONFIG_EXT_SYSCALL_PASSTHROUGH
+ if (v3_hook_passthrough_syscall(core, syscall_nr) == -1) {
+ PrintDebug("Error hooking passthrough syscall\n");
+ return -1;
+ }
+ hook = syscall_hooks[syscall_nr];
+#endif
+
+/*
+ * if this syscall isn't hooked, pop off a pending inject
+ * and run it
+ */
+#ifdef V3_CONFIG_EXT_CODE_INJECT
+ struct v3_code_injects * injects = &code_injects;
+ struct v3_code_inject_info * inject = NULL;
+
+ if (list_empty(&(injects->code_inject_list))) {
+ return 0;
+ } else {
+
+ inject = (struct v3_code_inject_info*) list_first_entry(
+ &(injects->code_inject_list),
+ struct v3_code_inject_info,
+ inject_node);
+
+ if (inject == NULL) {
+ PrintError("Problem getting inject from inject list\n");
+ return -1;
+ }
+
+ if (inject->in_progress)
+ return 0;
+
+ // do the inject and don't fall over if there's an inject already in
+ // progress
+ if ((ret = v3_handle_guest_inject(core, (void*)inject)) == -1) {
+ PrintError("Could not run code injection: v3_syscall_handler\n");
+ return 0;
+ } else {
+ return ret;
+ }
+ }
+#else
+ return 0;
+#endif
+ }
+
+ err = hook->handler(core, syscall_nr, hook->priv_data);
+ if (err == -1) {
+ PrintDebug("V3 Syscall Handler: Error in syscall hook\n");
+ return -1;
+ }
+
+#ifdef V3_CONFIG_EXT_CODE_INJECT
+ if (err == E_NEED_PF)
+ return E_NEED_PF;
+#endif
+ return 0;
+}
+
+
+#ifdef V3_CONFIG_EXT_SELECTIVE_SYSCALL_EXIT
+static int v3_handle_lstar_write (struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
+ syscall_info.target_addr = (uint64_t) ((((uint64_t)src.hi) << 32) | src.lo);
+
+ PrintDebug("LSTAR Write: %p\n", (void*)syscall_info.target_addr);
+ core->msrs.lstar = syscall_info.target_addr;
+ return 0;
+}
+
+
+// virtualize the lstar
+static int v3_handle_lstar_read (struct guest_info * core, uint_t msr, struct v3_msr * dst, void * priv_data) {
+ PrintDebug("LSTAR Read\n");
+ dst->value = syscall_info.target_addr;
+ return 0;
+}
+
+
+static int syscall_setup (struct guest_info * core, unsigned int hcall_id, void * priv_data) {
+ addr_t syscall_stub, syscall_map, ssa;
+
+ syscall_stub = (addr_t)core->vm_regs.rbx;
+ syscall_map = (addr_t)core->vm_regs.rcx;
+ ssa = (addr_t)core->vm_regs.rdx;
+
+ PrintDebug("made it to syscall setup hypercall\n");
+ PrintDebug("\t&syscall_stub (rbx): %p\n\t&syscall_map (rcx): %p\n", (void*)syscall_stub, (void*)syscall_map);
+ PrintDebug("\t&ssa (rdx): %p\n", (void*)ssa);
+
+ syscall_info.syscall_stub = syscall_stub;
+ syscall_info.syscall_map = (uint8_t*)syscall_map;
+ syscall_info.ssa = ssa;
+
+ /* return the original syscall entry point */
+ core->vm_regs.rax = syscall_info.target_addr;
+
+ /* redirect syscalls henceforth */
+ core->msrs.lstar = syscall_stub;
+ return 0;
+}
+
+
+static int syscall_cleanup (struct guest_info * core, unsigned int hcall_id, void * priv_data) {
+
+ core->msrs.lstar = syscall_info.target_addr;
+ PrintDebug("original syscall entry point restored\n");
+ return 0;
+}
+
+
+static int sel_syscall_handle (struct guest_info * core, unsigned int hcall_id, void * priv_data) {
+ int ret;
+ addr_t hva;
+ struct v3_gprs regs;
+
+ PrintDebug("caught a selectively exited syscall!\n");
+ ret = v3_gva_to_hva(core, get_addr_linear(core, syscall_info.ssa, &(core->segments.ds)), &hva);
+ if (ret == -1) {
+ PrintError("Problem translating state save area address in sel_syscall_handle\n");
+ return -1;
+ }
+
+ /* setup registers for handler routines. They should be in the same state
+ * as when the system call was originally invoked */
+ memcpy((void*)®s, (void*)&core->vm_regs, sizeof(struct v3_gprs));
+ memcpy((void*)&core->vm_regs, (void*)hva, sizeof(struct v3_gprs));
+
+ v3_print_guest_state(core);
+
+ // TODO: call syscall-independent handler
+
+ memcpy((void*)hva, (void*)&core->vm_regs, sizeof(struct v3_gprs));
+ memcpy((void*)&core->vm_regs, (void*)®s,sizeof(struct v3_gprs));
+ return 0;
+}
+
+
+#endif
+
+static int init_syscall_hijack (struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data) {
+#ifdef V3_CONFIG_EXT_SELECTIVE_SYSCALL_EXIT
+ v3_register_hypercall(vm, 0x5CA11, sel_syscall_handle, NULL);
+ v3_register_hypercall(vm, 0x5CA12, syscall_setup, NULL);
+ v3_register_hypercall(vm, 0x5CA13, syscall_cleanup, NULL);
+#endif
+ return 0;
+}
+
+
+
+#ifdef V3_CONFIG_EXT_SYSCALL_INSTR
+static int v3_handle_lstar_write (struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) {
+ PrintDebug("KCH: LSTAR Write\n");
+ //PrintDebug("\tvalue: 0x%x%x\n", src.hi, src.lo);
+ syscall_info.target_addr = (uint64_t) ((((uint64_t)src.hi) << 32) | src.lo);
+
+ // Set LSTAR value seen by hardware while the guest is running
+ PrintDebug("replacing with %lx\n", SYSCALL_MAGIC_ADDR);
+ core->msrs.lstar = SYSCALL_MAGIC_ADDR;
+ return 0;
+}
+
+static int v3_handle_lstar_read (struct guest_info * core, uint_t msr, struct v3_msr * dst, void * priv_data) {
+ PrintDebug("KCH: LSTAR Read\n");
+ dst->value = syscall_info.target_addr;
+ return 0;
+}
+#endif
+
+
+static int init_syscall_hijack_core (struct guest_info * core, void * priv_data) {
+
+#ifdef V3_CONFIG_EXT_SW_INTERRUPTS
+ v3_hook_swintr(core, SYSCALL_INT_VECTOR, v3_syscall_handler, NULL);
+#endif
+
+#if defined(V3_CONFIG_EXT_SYSCALL_INSTR) || defined(V3_CONFIG_EXT_SELECTIVE_SYSCALL_EXIT)
+ v3_hook_msr(core->vm_info, LSTAR_MSR,
+ &v3_handle_lstar_read,
+ &v3_handle_lstar_write,
+ core);
+#endif
+
+ return 0;
+}
+
+static int deinit_syscall_hijack (struct v3_vm_info * vm, void * priv_data) {
+ return 0;
+}
+
+
+static struct v3_extension_impl syscall_impl = {
+ .name = "syscall_intercept",
+ .init = init_syscall_hijack,
+ .deinit = deinit_syscall_hijack,
+ .core_init = init_syscall_hijack_core,
+ .core_deinit = NULL,
+ .on_entry = NULL,
+ .on_exit = NULL
+};
+
+register_extension(&syscall_impl);
+
+
+static inline struct v3_syscall_hook * get_syscall_hook (struct guest_info * core, uint_t syscall_nr) {
+ return syscall_hooks[syscall_nr];
+}
+
+
+int v3_hook_syscall (struct guest_info * core,
+ uint_t syscall_nr,
+ int (*handler)(struct guest_info * core, uint_t syscall_nr, void * priv_data),
+ void * priv_data)
+{
+ struct v3_syscall_hook * hook = (struct v3_syscall_hook *)V3_Malloc(sizeof(struct v3_syscall_hook));
+
+
+ if (hook == NULL) {
+ return -1;
+ }
+
+ if (get_syscall_hook(core, syscall_nr) != NULL) {
+ PrintError("System Call #%d already hooked\n", syscall_nr);
+ return -1;
+ }
+
+ hook->handler = handler;
+ hook->priv_data = priv_data;
+
+ syscall_hooks[syscall_nr] = hook;
+
+ PrintDebug("Hooked Syscall #%d\n", syscall_nr);
+
+ return 0;
+}
+
+
+static int passthrough_syscall_handler (struct guest_info * core, uint_t syscall_nr, void * priv_data) {
+ print_syscall(core->cpu_mode == LONG, core);
+ return 0;
+}
+
+
+int v3_hook_passthrough_syscall (struct guest_info * core, uint_t syscall_nr) {
+
+ int rc = v3_hook_syscall(core, syscall_nr, passthrough_syscall_handler, NULL);
+
+ if (rc) {
+ PrintError("failed to hook syscall 0x%x for passthrough (guest=0x%p)\n", syscall_nr, (void *)core);
+ return -1;
+ } else {
+ PrintDebug("hooked syscall 0x%x for passthrough (guest=0x%p)\n", syscall_nr, (void *)core);
+ return 0;
+ }
+
+ /* shouldn't get here */
+ return 0;
+}
+
+
+
+char * get_linux_syscall_name32 (uint_t syscall_nr) {
switch (syscall_nr) {
}
-static char * get_linux_syscall_name64 (uint_t syscall_nr) {
+char * get_linux_syscall_name64 (uint_t syscall_nr) {
switch (syscall_nr) {
case 242: return "mq_timedsend"; break;
case 243: return "mq_timedreceive"; break;
case 244: return "mq_notify"; break;
- case 245: return "mq_getsetattr"; break;
case 246: return "kexec_load"; break;
case 247: return "waitid"; break;
case 248: return "add_key"; break;
default: return "UNKNOWN"; break;
}
}
-
-#endif
--- /dev/null
+/** \file
+ * Do nothing module.
+ *
+ * This file only exists to appease the kbuild gods.
+ */
+
This makes it possible for virtual devices such as the generic device and the pci_front
device to make host-based device implementations appear within the guest
+config HOST_HYPERCALL
+ bool "Host hypercall support"
+ default n
+ help
+ Select this if you would like to make it possible
+ to register host-based implementations of hypercalls,
+ for example, implemented in Linux kernel modules
endmenu
obj-$(V3_CONFIG_GRAPHICS_CONSOLE) += vmm_graphics_console.o
obj-$(V3_CONFIG_KEYED_STREAMS) += vmm_keyed_stream.o
obj-$(V3_CONFIG_HOST_DEVICE) += vmm_host_dev.o
+obj-$(V3_CONFIG_HOST_HYPERCALL) += vmm_host_hypercall.o
obj-y += null.o
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2012, Kyle C. Hale <kh@u.northwestern.edu>
+ * Copyright (c) 2012, Peter Dinda <pdinda@northwestern.edu>
+ * Copyright (c) 2012, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Authors: Kyle C. Hale <kh@u.northwestern.edu>
+ * Peter Dinda <pdinda@northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vm_guest.h>
+#include <palacios/vm_guest_mem.h>
+#include <palacios/vmm_hypercall.h>
+#include <palacios/vmm_types.h>
+
+#include <interfaces/vmm_host_hypercall.h>
+
+
+#define GET_SET_GPR_IMPL(R) \
+ static uint64_t get_##R(palacios_core_t core) { return ((struct guest_info *)core)->vm_regs.R;} \
+ static void set_##R(palacios_core_t core, uint64_t val) { ((struct guest_info *)core)->vm_regs.R = val; }
+
+#define GET_SET_CR_IMPL(R) \
+ static uint64_t get_##R(palacios_core_t core) { return ((struct guest_info *)core)->ctrl_regs.R;} \
+ static void set_##R(palacios_core_t core, uint64_t val) { ((struct guest_info *)core)->ctrl_regs.R = val; }
+
+#define DECL_IT(R) .get_##R = get_##R, .set_##R = set_##R,
+
+GET_SET_GPR_IMPL(rax)
+GET_SET_GPR_IMPL(rbx)
+GET_SET_GPR_IMPL(rcx)
+GET_SET_GPR_IMPL(rdx)
+GET_SET_GPR_IMPL(rsi)
+GET_SET_GPR_IMPL(rdi)
+GET_SET_GPR_IMPL(rbp)
+GET_SET_GPR_IMPL(rsp)
+GET_SET_GPR_IMPL(r8)
+GET_SET_GPR_IMPL(r9)
+GET_SET_GPR_IMPL(r10)
+GET_SET_GPR_IMPL(r11)
+GET_SET_GPR_IMPL(r12)
+GET_SET_GPR_IMPL(r13)
+GET_SET_GPR_IMPL(r14)
+GET_SET_GPR_IMPL(r15)
+
+static uint64_t get_rip(palacios_core_t core) { return ((struct guest_info *)core)->rip;}
+
+static void set_rip(palacios_core_t core, uint64_t val) { ((struct guest_info *)core)->rip = val; }
+
+
+GET_SET_CR_IMPL(cr0)
+GET_SET_CR_IMPL(cr2)
+GET_SET_CR_IMPL(cr3)
+GET_SET_CR_IMPL(cr4)
+GET_SET_CR_IMPL(cr8)
+GET_SET_CR_IMPL(efer)
+GET_SET_CR_IMPL(rflags)
+
+
+
+static struct guest_accessors guest_acc = {
+DECL_IT(rax)
+DECL_IT(rbx)
+DECL_IT(rcx)
+DECL_IT(rdx)
+DECL_IT(rsi)
+DECL_IT(rdi)
+DECL_IT(rbp)
+DECL_IT(rsp)
+DECL_IT(r8)
+DECL_IT(r9)
+DECL_IT(r10)
+DECL_IT(r11)
+DECL_IT(r12)
+DECL_IT(r13)
+DECL_IT(r14)
+DECL_IT(r15)
+
+DECL_IT(rip)
+DECL_IT(cr0)
+DECL_IT(cr2)
+DECL_IT(cr3)
+DECL_IT(cr4)
+DECL_IT(cr8)
+DECL_IT(efer)
+DECL_IT(rflags)
+
+.gva_to_hva = (int (*)(palacios_core_t, uint64_t, uint64_t *)) v3_gva_to_hva,
+.gpa_to_hva = (int (*)(palacios_core_t, uint64_t, uint64_t *)) v3_gpa_to_hva,
+.gva_to_gpa = (int (*)(palacios_core_t, uint64_t, uint64_t *)) v3_gva_to_gpa,
+.read_gva = (int (*)(palacios_core_t, uint64_t, int, void *)) v3_read_gva_memory,
+.read_gpa = (int (*)(palacios_core_t, uint64_t, int, void *)) v3_read_gpa_memory,
+.write_gva = (int (*)(palacios_core_t, uint64_t, int, void *)) v3_write_gva_memory,
+.write_gpa = (int (*)(palacios_core_t, uint64_t, int, void *)) v3_write_gpa_memory,
+
+ } ;
+
+
+
+
+
+
+struct bounce_data {
+ int (*hypercall)(palacios_core_t core,
+ unsigned int hcall_id,
+ struct guest_accessors *accessors,
+ void *priv_data);
+ void *priv_data;
+};
+
+static int bounce(struct guest_info *core,
+ unsigned int hcall_id,
+ void *priv_data)
+{
+ struct bounce_data *b = (struct bounce_data *) priv_data;
+
+ return b->hypercall(core,hcall_id,&guest_acc,b->priv_data);
+}
+
+
+
+int v3_register_host_hypercall(host_vm_info_t * vm,
+ unsigned int hypercall_id,
+ int (*hypercall)(palacios_core_t core,
+ uint_t hcall_id,
+ struct guest_accessors *acc,
+ void * priv_data),
+ void * priv_data) {
+
+ struct bounce_data *b = V3_Malloc(sizeof(struct bounce_data));
+
+ if (!b) {
+ return -1;
+ }
+
+ b->hypercall=hypercall;
+ b->priv_data=priv_data;
+
+ if (v3_register_hypercall((struct v3_vm_info*) vm,
+ hypercall_id,
+ bounce,
+ b) < 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+int v3_unregister_host_hypercall(host_vm_info_t * vm,
+ unsigned int hypercall_id)
+{
+ return v3_remove_hypercall((struct v3_vm_info*)vm, hypercall_id);
+}
+
guest_state->rflags = info->ctrl_regs.rflags;
guest_state->efer = info->ctrl_regs.efer;
+ /* Synchronize MSRs */
+ guest_state->star = info->msrs.star;
+ guest_state->lstar = info->msrs.lstar;
+ guest_state->sfmask = info->msrs.sfmask;
+ guest_state->KernelGsBase = info->msrs.kern_gs_base;
+
guest_state->cpl = info->cpl;
v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
info->ctrl_regs.rflags = guest_state->rflags;
info->ctrl_regs.efer = guest_state->efer;
+ /* Synchronize MSRs */
+ info->msrs.star = guest_state->star;
+ info->msrs.lstar = guest_state->lstar;
+ info->msrs.sfmask = guest_state->sfmask;
+ info->msrs.kern_gs_base = guest_state->KernelGsBase;
+
v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
info->cpu_mode = v3_get_vm_cpu_mode(info);
info->mem_mode = v3_get_vm_mem_mode(info);
#endif
#ifdef V3_CONFIG_EXT_SW_INTERRUPTS
-#include <interfaces/sw_intr.h>
+#include <gears/sw_intr.h>
#endif
int v3_handle_svm_exit(struct guest_info * info, addr_t exit_code, addr_t exit_info1, addr_t exit_info2) {
}
+/* This clones v3_read_gva_memory
+ * We write only as far as page translations are available
+ */
+int v3_write_gva_memory(struct guest_info * guest_info, addr_t gva, int count, uchar_t * src) {
+ addr_t cursor = gva;
+ int bytes_written = 0;
+
+
+
+ while (count > 0) {
+ int dist_to_pg_edge = (PAGE_ADDR(cursor) + PAGE_SIZE) - cursor;
+ int bytes_to_copy = (dist_to_pg_edge > count) ? count : dist_to_pg_edge;
+ addr_t host_addr = 0;
+
+
+ if (v3_gva_to_hva(guest_info, cursor, &host_addr) != 0) {
+ PrintDebug("Invalid GVA(%p)->HVA lookup\n", (void *)cursor);
+ return bytes_written;
+ }
+
+
+
+ memcpy((void*)host_addr,
+ src + bytes_written,
+ bytes_to_copy);
+
+ bytes_written += bytes_to_copy;
+ count -= bytes_to_copy;
+ cursor += bytes_to_copy;
+ }
+
+ return bytes_written;
+}
+
+
/* This is a straight address conversion + copy,
error = v3_read_vmcs_segments(&(info->segments));
+ /* Save MSRs from MSR SAVE Area (whereever that is...)*/
+
+ info->msrs.star = vmx_info->msr_area->guest_star.hi;
+ info->msrs.star <<= 32;
+ info->msrs.star |= vmx_info->msr_area->guest_star.lo;
+
+ info->msrs.lstar = vmx_info->msr_area->guest_lstar.hi;
+ info->msrs.lstar <<= 32;
+ info->msrs.lstar |= vmx_info->msr_area->guest_lstar.lo;
+
+ info->msrs.sfmask = vmx_info->msr_area->guest_fmask.hi;
+ info->msrs.sfmask <<= 32;
+ info->msrs.sfmask |= vmx_info->msr_area->guest_fmask.lo;
+
+ info->msrs.kern_gs_base = vmx_info->msr_area->guest_kern_gs.hi;
+ info->msrs.kern_gs_base <<= 32;
+ info->msrs.kern_gs_base |= vmx_info->msr_area->guest_kern_gs.lo;
+
+
return error;
}
error = v3_write_vmcs_segments(&(info->segments));
+ /* Restore MSRs from MSR SAVE Area (whereever that is...)*/
+
+ vmx_info->msr_area->guest_star.hi = (info->msrs.star >> 32);
+ vmx_info->msr_area->guest_star.lo = (info->msrs.star & 0xffffffff);
+
+ vmx_info->msr_area->guest_lstar.hi = (info->msrs.lstar >> 32);
+ vmx_info->msr_area->guest_lstar.lo = (info->msrs.lstar & 0xffffffff);
+
+ vmx_info->msr_area->guest_fmask.hi = (info->msrs.sfmask >> 32);
+ vmx_info->msr_area->guest_fmask.lo = (info->msrs.sfmask & 0xffffffff);
+
+ vmx_info->msr_area->guest_kern_gs.hi = (info->msrs.kern_gs_base >> 32);
+ vmx_info->msr_area->guest_kern_gs.lo = (info->msrs.kern_gs_base & 0xffffffff);
+
return error;
}
}
-
-void Init_V3(struct v3_os_hooks * hooks, int num_cpus) {
- int i;
+void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus) {
+ int i = 0;
+ int minor = 0;
+ int major = 0;
V3_Print("V3 Print statement to fix a Kitten page fault bug\n");
V3_init_checkpoint();
#endif
-
-
-
if ((hooks) && (hooks->call_on_cpu)) {
- for (i = 0; i < num_cpus; i++) {
- V3_Print("Initializing VMM extensions on cpu %d\n", i);
- hooks->call_on_cpu(i, &init_cpu, (void *)(addr_t)i);
+ for (i = 0; i < num_cpus; i++) {
+ major = i / 8;
+ minor = i % 8;
- if (v3_mach_type == V3_INVALID_CPU) {
- v3_mach_type = v3_cpu_types[i];
- }
+ if ((cpu_mask == NULL) || (*(cpu_mask + major) & (0x1 << minor))) {
+ V3_Print("Initializing VMM extensions on cpu %d\n", i);
+ hooks->call_on_cpu(i, &init_cpu, (void *)(addr_t)i);
- }
+ if (v3_mach_type == V3_INVALID_CPU) {
+ v3_mach_type = v3_cpu_types[i];
+ }
+ }
+ }
}
-
-
}
+
void Shutdown_V3() {
int i;
return -1;
}
- if (hcall->hcall_fn(info, hypercall_id, hcall->priv_data) == 0) {
- info->vm_regs.rax = 0;
- } else {
- info->vm_regs.rax = -1;
+ if (hcall->hcall_fn(info, hypercall_id, hcall->priv_data) != 0) {
+ PrintError("Error handling hypercall\n");
+ return -1;
}
return 0;