Linux kernel compatability enhancements (through 3.19)

[palacios.git] / linux_module / palacios-stubs.c
diff --git a/linux_module/palacios-stubs.c b/linux_module/palacios-stubs.c

index 21aa4fd..039c170 100644 (file)
--- a/linux_module/palacios-stubs.c
+++ b/linux_module/palacios-stubs.c
@@ -8,59 +8,224 @@
 #include <linux/uaccess.h>
 #include <asm/irq_vectors.h>
 #include <asm/io.h>
+#include <asm/thread_info.h>
+#include <asm/i387.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0)
+#include <asm/fpu-internal.h>
+#endif
 
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
 #include <asm/uaccess.h>
 #include <linux/smp.h>
+#include <linux/vmalloc.h>
+
+#include <asm/i387.h>
 
 #include <palacios/vmm.h>
 #include <palacios/vmm_host_events.h>
-#include "palacios.h"
 
+#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH
+#include <interfaces/vmm_lazy_fpu.h>
+#endif
 
+#include "palacios.h"
 
+#include "util-hashtable.h"
 
 #include "mm.h"
 
+#include "memcheck.h"
+#include "lockcheck.h"
+
+
+
+// The following can be used to track memory bugs
+// zero memory after allocation (now applies to valloc and page alloc as well)
+#define ALLOC_ZERO_MEM 1
+// pad allocations by this many bytes on both ends of block (heap only)
+#define ALLOC_PAD      0
+
 
 u32 pg_allocs = 0;
 u32 pg_frees = 0;
 u32 mallocs = 0;
 u32 frees = 0;
-
+u32 vmallocs = 0;
+u32 vfrees = 0;
 
 static struct v3_vm_info * irq_to_guest_map[256];
 
 
 extern unsigned int cpu_khz;
 
+extern int cpu_list[NR_CPUS];
+extern int cpu_list_len;
+
+
+extern struct hashtable *v3_thread_resource_map;
+
+
+static char *print_buffer[NR_CPUS];
+
+static void deinit_print_buffers(void)
+{
+    int i;
+
+    for (i=0;i<NR_CPUS;i++) {
+       if (print_buffer[i]) { 
+           palacios_free(print_buffer[i]);
+           print_buffer[i]=0;
+       }
+    }
+}
+
+static int init_print_buffers(void)
+{
+    int i;
+    
+    memset(print_buffer,0,sizeof(char*)*NR_CPUS);
+
+#if !V3_PRINTK_OLD_STYLE_OUTPUT
+
+    for (i=0;i<NR_CPUS;i++) { 
+       print_buffer[i] = palacios_alloc(V3_PRINTK_BUF_SIZE);
+       if (!print_buffer[i]) { 
+           ERROR("Cannot allocate print buffer for cpu %d\n",i);
+           deinit_print_buffers();
+    return -1;
+       }
+       memset(print_buffer[i],0,V3_PRINTK_BUF_SIZE);
+    }
+
+#endif
+    
+    return 0;
 
+}
+ 
 /**
  * Prints a message to the console.
  */
-static void palacios_print(const char *        fmt, ...) {
+void palacios_print_scoped(void * vm, int vcore, const char *fmt, ...) {
+
+#if V3_PRINTK_OLD_STYLE_OUTPUT
+
   va_list ap;
+
   va_start(ap, fmt);
   vprintk(fmt, ap);
   va_end(ap);
-  
+
+  return
+
+#else 
+
+  va_list ap;
+  char *buf;
+  unsigned int cpu = palacios_get_cpu();
+  struct v3_guest *guest = (struct v3_guest *)vm;
+
+  buf = print_buffer[cpu];
+
+  if (!buf) { 
+      printk(KERN_INFO "palacios (pcore %u): output skipped - no allocated buffer\n",cpu);
+      return;
+  } 
+
+  va_start(ap, fmt);
+  vsnprintf(buf,V3_PRINTK_BUF_SIZE, fmt, ap);
+  va_end(ap);
+
+#if V3_PRINTK_CHECK_7BIT
+  {
+      char c=0;
+      int i;
+      for (i=0;i<strlen(buf);i++) { 
+         if (buf[i] < 0) {
+             c=buf[i];
+             break;
+         }
+      }
+      if (c!=0) { 
+         printk(KERN_INFO "palacios (pcore %u): ALERT ALERT 8 BIT CHAR (c=%d) DETECTED\n", cpu,c);
+      }
+  }
+#endif
+
+  if (guest) {
+    if (vcore>=0) { 
+      printk(KERN_INFO "palacios (pcore %u vm %s vcore %u): %s",
+            cpu,
+            guest->name,
+            vcore,
+            buf);
+    } else {
+       printk(KERN_INFO "palacios (pcore %u vm %s): %s",
+            cpu,
+            guest->name,
+            buf);
+    }
+  } else {
+    printk(KERN_INFO "palacios (pcore %u): %s",
+          cpu,
+          buf);
+  }
+    
   return;
-}
 
+#endif
+
+}
 
 
 /*
  * Allocates a contiguous region of pages of the requested size.
  * Returns the physical address of the first page in the region.
  */
-static void * palacios_allocate_pages(int num_pages, unsigned int alignment) {
+void *palacios_allocate_pages(int num_pages, unsigned int alignment, int node_id, int (*filter_func)(void *paddr, void *filter_state), void *filter_state) {
     void * pg_addr = NULL;
+    v3_resource_control_t *r;
+
+    if (num_pages<=0) { 
+       ERROR("ALERT ALERT Attempt to allocate zero or fewer pages (%d pages, alignment %d, node %d, filter_func %p, filter_state %p)\n",num_pages, alignment, node_id, filter_func, filter_state);
+      return NULL;
+    }
+
+    if ((r=(v3_resource_control_t *)palacios_htable_search(v3_thread_resource_map,(addr_t)current))) { 
+       // thread has a registered resource control structure
+       // these override any default values
+       //      INFO("Overridden page search: (pre) alignment=%x, node_id=%x, filter_func=%p, filter_state=%p\n",alignment,node_id,filter_func,filter_state);
+       if (alignment==4096) { 
+           alignment = r->pg_alignment;
+       }
+       if (node_id==-1) { 
+           node_id = r->pg_node_id;
+       }
+       if (!filter_func) {
+           filter_func = r->pg_filter_func;
+           filter_state = r->pg_filter_state;
+       }
+       //INFO("Overridden page search: (post) alignment=%x, node_id=%x, filter_func=%p, filter_state=%p\n",alignment,node_id,filter_func,filter_state);
+    }
+    
+    pg_addr = (void *)alloc_palacios_pgs(num_pages, alignment, node_id, filter_func, filter_state);
+
+    if (!pg_addr) { 
+       ERROR("ALERT ALERT  Page allocation has FAILED Warning (%d pages, alignment %d, node %d, filter_func %p, filter_state %p)\n",num_pages, alignment, node_id, filter_func, filter_state);
+       return NULL;
+    }
 
-    pg_addr = (void *)alloc_palacios_pgs(num_pages, alignment);
     pg_allocs += num_pages;
 
+#if ALLOC_ZERO_MEM
+    memset(__va(pg_addr),0,num_pages*4096);
+#endif
+
+    MEMCHECK_ALLOC_PAGES(pg_addr,num_pages*4096);
+
     return pg_addr;
 }
 
@@ -71,48 +236,134 @@ static void * palacios_allocate_pages(int num_pages, unsigned int alignment) {
  * a single call while palacios_free_page() only frees a single page.
  */
 
-static void palacios_free_pages(void * page_paddr, int num_pages) {
+void palacios_free_pages(void * page_paddr, int num_pages) {
+    if (!page_paddr) { 
+       ERROR("Ignoring free pages: 0x%p (0x%lx)for %d pages\n", page_paddr, (uintptr_t)page_paddr, num_pages);
+       dump_stack();
+       return;
+    }
     pg_frees += num_pages;
     free_palacios_pgs((uintptr_t)page_paddr, num_pages);
+    MEMCHECK_FREE_PAGES(page_paddr,num_pages*4096);
+
 }
 
 
+void *
+palacios_alloc_extended(unsigned int size, unsigned int flags, int node) {
+    void * addr = NULL;
+
+    if (size==0) { 
+      // note that modern kernels will respond to a zero byte
+      // kmalloc and return the address 0x10...  In Palacios, 
+      // we will simply not allow 0 byte allocs at all, of any kind
+      ERROR("ALERT ALERT attempt to kmalloc zero bytes rejected\n");
+      return NULL;
+    }
+
+    if (node==-1) { 
+       addr = kmalloc(size+2*ALLOC_PAD, flags);
+    } else {
+       addr = kmalloc_node(size+2*ALLOC_PAD, flags, node);
+    }
+
+    if (!addr || IS_ERR(addr)) { 
+       ERROR("ALERT ALERT  kmalloc has FAILED FAILED FAILED\n");
+       return NULL;
+    }  
+
+    mallocs++;
+
+#if ALLOC_ZERO_MEM
+    memset(addr,0,size+2*ALLOC_PAD);
+#endif
+
+    MEMCHECK_KMALLOC(addr,size+2*ALLOC_PAD);
+
+    return addr+ALLOC_PAD;
+}
+
+void *
+palacios_valloc(unsigned int size)
+{
+    void * addr = NULL;
+
+    if (size==0) { 
+      ERROR("ALERT ALERT attempt to vmalloc zero bytes rejected\n");
+      return NULL;
+    }
+
+    addr = vmalloc(size);
+
+    if (!addr || IS_ERR(addr)) { 
+       ERROR("ALERT ALERT  vmalloc has FAILED FAILED FAILED\n");
+       return NULL;
+    }  
+
+    vmallocs++;
+
+#if ALLOC_ZERO_MEM
+    memset(addr,0,size);
+#endif
+
+    MEMCHECK_VMALLOC(addr,size);
+
+    return addr;
+}
+
+void palacios_vfree(void *p)
+{
+  if (!p) { 
+      ERROR("Ignoring vfree: 0x%p\n",p);
+      dump_stack();
+      return;
+  }
+  vfree(p);
+  vfrees++;
+  MEMCHECK_VFREE(p);
+}
+
 /**
  * Allocates 'size' bytes of kernel memory.
  * Returns the kernel virtual address of the memory allocated.
  */
-static void *
+void *
 palacios_alloc(unsigned int size) {
-    void * addr = NULL;
 
-    if (irqs_disabled()) {
-       addr = kmalloc(size, GFP_ATOMIC);
+    // It is very important that this test remains since 
+    // this function is used extensively throughout palacios and the linux
+    // module, both in places where interrupts are off and where they are on
+    // a GFP_KERNEL call, when done with interrupts off can lead to DEADLOCK
+    if (irqs_disabled() || in_atomic()) {
+       return palacios_alloc_extended(size,GFP_ATOMIC,-1);
     } else {
-       addr = kmalloc(size, GFP_KERNEL);
+       return palacios_alloc_extended(size,GFP_KERNEL,-1);
     }
-    mallocs++;
 
- 
-    return addr;
 }
 
 /**
  * Frees memory that was previously allocated by palacios_alloc().
  */
-static void
+void
 palacios_free(
        void *                  addr
 )
 {
+    if (!addr) {
+       ERROR("Ignoring free : 0x%p\n", addr);
+       dump_stack();
+       return;
+    }
     frees++;
-    kfree(addr);
-    return;
+    kfree(addr-ALLOC_PAD);
+    MEMCHECK_KFREE(addr-ALLOC_PAD);
 }
 
 /**
  * Converts a kernel virtual address to the corresponding physical address.
  */
-static void *
+void *
 palacios_vaddr_to_paddr(
        void *                  vaddr
 )
@@ -124,7 +375,7 @@ palacios_vaddr_to_paddr(
 /**
  * Converts a physical address to the corresponding kernel virtual address.
  */
-static void *
+void *
 palacios_paddr_to_vaddr(
        void *                  paddr
 )
@@ -135,9 +386,7 @@ palacios_paddr_to_vaddr(
 /**
  * Runs a function on the specified CPU.
  */
-
-// For now, do call only on local CPU 
-static void 
+void 
 palacios_xcall(
        int                     cpu_id, 
        void                    (*fn)(void *arg),
@@ -152,90 +401,161 @@ palacios_xcall(
     return;
 }
 
+
+#define MAX_THREAD_NAME 32
+
 struct lnx_thread_arg {
     int (*fn)(void * arg);
     void * arg;
+    v3_resource_control_t *resource_control;
+    char name[MAX_THREAD_NAME];
 };
 
 static int lnx_thread_target(void * arg) {
     struct lnx_thread_arg * thread_info = (struct lnx_thread_arg *)arg;
     int ret = 0;
     /*
-      printk("Daemonizing new Palacios thread (name=%s)\n", thread_info->name);
+      INFO("Daemonizing new Palacios thread (name=%s)\n", thread_info->name);
 
       daemonize(thread_info->name);
       allow_signal(SIGKILL);
     */
 
+#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH
+    // We are a kernel thread that needs FPU save/restore state
+    // vcores definitely need this, all the other threads get it too, 
+    // but they just won't use it
+
+    fpu_alloc(&(current->thread.fpu));
+#endif
+
+    palacios_htable_insert(v3_thread_resource_map,(addr_t)current,(addr_t)thread_info->resource_control);
 
     ret = thread_info->fn(thread_info->arg);
 
-    kfree(thread_info);
+    INFO("Palacios Thread (%s) EXITING\n", thread_info->name);
+
+    palacios_htable_remove(v3_thread_resource_map,(addr_t)current,0);
+
+    palacios_free(thread_info);
     // handle cleanup 
 
+    // We rely on do_exit to free the fpu data
+    // since we could get switched at any point until the thread is done... 
+
     do_exit(ret);
-    
+
     return 0; // should not get here.
 }
 
 /**
  * Creates a kernel thread.
  */
-static void *
-palacios_start_kernel_thread(
+void *
+palacios_create_and_start_kernel_thread(
        int (*fn)               (void * arg),
        void *                  arg,
-       char *                  thread_name) {
+       char *                  thread_name,
+       v3_resource_control_t   *resource_control) {
+
+    struct lnx_thread_arg * thread_info = palacios_alloc(sizeof(struct lnx_thread_arg));
 
-    struct lnx_thread_arg * thread_info = kmalloc(sizeof(struct lnx_thread_arg), GFP_KERNEL);
+    if (!thread_info) { 
+       ERROR("ALERT ALERT Unable to allocate thread\n");
+       return NULL;
+    }
 
     thread_info->fn = fn;
     thread_info->arg = arg;
+    strncpy(thread_info->name,thread_name,MAX_THREAD_NAME);
+    thread_info->name[MAX_THREAD_NAME-1] =0;
+    thread_info->resource_control = resource_control;
 
-    return kthread_run( lnx_thread_target, thread_info, thread_name );
+    return kthread_run( lnx_thread_target, thread_info, thread_info->name );
 }
 
 
 /**
  * Starts a kernel thread on the specified CPU.
  */
-static void * 
-palacios_start_thread_on_cpu(int cpu_id, 
-                            int (*fn)(void * arg), 
-                            void * arg, 
-                            char * thread_name ) {
+void * 
+palacios_create_thread_on_cpu(int cpu_id,
+                             int (*fn)(void * arg), 
+                             void * arg, 
+                             char * thread_name,
+                             v3_resource_control_t *resource_control) {
     struct task_struct * thread = NULL;
-    struct lnx_thread_arg * thread_info = kmalloc(sizeof(struct lnx_thread_arg), GFP_KERNEL);
+    struct lnx_thread_arg * thread_info = palacios_alloc(sizeof(struct lnx_thread_arg));
+
+    if (!thread_info) { 
+       ERROR("ALERT ALERT Unable to allocate thread to start on cpu\n");
+       return NULL;
+    }
 
     thread_info->fn = fn;
     thread_info->arg = arg;
+    strncpy(thread_info->name,thread_name,MAX_THREAD_NAME);
+    thread_info->name[MAX_THREAD_NAME-1] =0;
+    thread_info->resource_control=resource_control;
 
+    thread = kthread_create( lnx_thread_target, thread_info, thread_info->name );
 
-    thread = kthread_create( lnx_thread_target, thread_info, thread_name );
-
-    if (IS_ERR(thread)) {
-       printk("Palacios error creating thread: %s\n", thread_name);
+    if (!thread || IS_ERR(thread)) {
+       WARNING("Palacios error creating thread: %s\n", thread_info->name);
+       palacios_free(thread_info);
        return NULL;
     }
 
-    set_cpus_allowed_ptr(thread, cpumask_of(cpu_id));
-    wake_up_process(thread);
+    if (set_cpus_allowed_ptr(thread, cpumask_of(cpu_id)) != 0) {
+       WARNING("Attempt to start thread on disallowed CPU\n");
+       kthread_stop(thread);
+       palacios_free(thread_info);
+       return NULL;
+    }
 
     return thread;
 }
 
+void
+palacios_start_thread(void * th){
+
+       struct task_struct * thread = (struct task_struct *)th;
+       wake_up_process(thread);
+
+}
+
+/*
+  Convenience wrapper
+*/
+void * 
+palacios_create_and_start_thread_on_cpu(int cpu_id,
+                                       int (*fn)(void * arg), 
+                                       void * arg, 
+                                       char * thread_name, 
+                                       v3_resource_control_t *resource_control) {
+
+    void *t = palacios_create_thread_on_cpu(cpu_id, fn, arg, thread_name, resource_control);
+
+    if (t) { 
+       palacios_start_thread(t);
+    } 
+    
+    return t;
+}
+
+
 
 /**
  * Rebind a kernel thread to the specified CPU
  * The thread will be running on target CPU on return
  * non-zero return means failure
  */
-static int
+int
 palacios_move_thread_to_cpu(int new_cpu_id, 
                            void * thread_ptr) {
     struct task_struct * thread = (struct task_struct *)thread_ptr;
 
-    printk("Moving thread (%p) to cpu %d\n", thread, new_cpu_id);
+    INFO("Moving thread (%p) to cpu %d\n", thread, new_cpu_id);
 
     if (thread == NULL) {
        thread = current;
@@ -252,7 +572,7 @@ palacios_move_thread_to_cpu(int new_cpu_id,
 /**
  * Returns the CPU ID that the caller is running on.
  */
-static unsigned int 
+unsigned int 
 palacios_get_cpu(void) 
 {
 
@@ -318,16 +638,16 @@ palacios_dispatch_interrupt( int vector, void * dev, struct pt_regs * regs ) {
 static int
 palacios_hook_interrupt(struct v3_vm_info *    vm,
                        unsigned int            vector ) {
-    printk("hooking vector %d\n", vector);     
+    INFO("hooking vector %d\n", vector);       
 
     if (irq_to_guest_map[vector]) {
-       printk(KERN_WARNING
+       WARNING(
               "%s: Interrupt vector %u is already hooked.\n",
               __func__, vector);
        return -1;
     }
 
-    printk(KERN_DEBUG
+    DEBUG(
           "%s: Hooking interrupt vector %u to vm %p.\n",
           __func__, vector, vm);
 
@@ -343,14 +663,15 @@ palacios_hook_interrupt(struct v3_vm_info *       vm,
     
     //set_idtvec_handler(vector, palacios_dispatch_interrupt);
     if (vector < 32) {
-       panic("unexpected vector for hooking\n");
+       ERROR("unexpected vector for hooking\n");
+       return -1;
     } else {
        int device_id = 0;              
        
        int flag = 0;
        int error;
                
-       printk("hooking vector: %d\n", vector);         
+       DEBUG("hooking vector: %d\n", vector);          
 
        if (vector == 32) {
            flag = IRQF_TIMER;
@@ -365,8 +686,9 @@ palacios_hook_interrupt(struct v3_vm_info * vm,
                            &device_id);
        
        if (error) {
-           printk("error code for request_irq is %d\n", error);
-           panic("request vector %d failed",vector);
+           ERROR("error code for request_irq is %d\n", error);
+           ERROR("request vector %d failed", vector);
+           return -1;
        }
     }
        
@@ -384,20 +706,20 @@ palacios_ack_interrupt(
 ) 
 {
   ack_APIC_irq(); 
-  printk("Pretending to ack interrupt, vector=%d\n",vector);
+  DEBUG("Pretending to ack interrupt, vector=%d\n", vector);
   return 0;
 }
   
 /**
  * Returns the CPU frequency in kilohertz.
  */
-static unsigned int
+unsigned int
 palacios_get_cpu_khz(void) 
 {
-    printk("cpu_khz is %u\n",cpu_khz);
+    INFO("cpu_khz is %u\n", cpu_khz);
 
     if (cpu_khz == 0) { 
-       printk("faking cpu_khz to 1000000\n");
+       INFO("faking cpu_khz to 1000000\n");
        return 1000000;
     } else {
        return cpu_khz;
@@ -407,66 +729,186 @@ palacios_get_cpu_khz(void)
 
 /**
  * Yield the CPU so other host OS tasks can run.
+ * This will return immediately if there is no other thread that is runnable
+ * And there is no real bound on how long it will yield
  */
-static void
+void
 palacios_yield_cpu(void)
 {
     schedule();
     return;
 }
 
+/**
+ * Yield the CPU so other host OS tasks can run.
+ * Given now immediately if there is no other thread that is runnable
+ * And there is no real bound on how long it will yield
+ */
+void palacios_sleep_cpu(unsigned int us)
+{
+
+    set_current_state(TASK_INTERRUPTIBLE);
+    if (us) {
+        unsigned int uspj = 1000000U/HZ;
+        unsigned int jiffies = us/uspj + ((us%uspj) !=0);  // ceiling 
+        schedule_timeout(jiffies);
+    } else {
+        schedule();
+    }
+    return;
+}
 
+void palacios_wakeup_cpu(void *thread)
+{
+    wake_up_process(thread);
+    return;
+}
 
 /**
  * Allocates a mutex.
  * Returns NULL on failure.
  */
-static void *
+void *
 palacios_mutex_alloc(void)
 {
-    spinlock_t *lock = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
+    spinlock_t *lock = palacios_alloc(sizeof(spinlock_t));
 
     if (lock) {
        spin_lock_init(lock);
+       LOCKCHECK_ALLOC(lock);
+    } else {
+       ERROR("ALERT ALERT Unable to allocate lock\n");
+       return NULL;
     }
     
     return lock;
 }
 
+void palacios_mutex_init(void *mutex)
+{
+  spinlock_t *lock = (spinlock_t*)mutex;
+  
+  if (lock) {
+    spin_lock_init(lock);
+    LOCKCHECK_ALLOC(lock);
+  }
+}
+
+void palacios_mutex_deinit(void *mutex)
+{
+  spinlock_t *lock = (spinlock_t*)mutex;
+  
+  if (lock) {
+    // no actual spin_lock_deinit on linux
+    // our purpose here is to drive the lock checker
+    LOCKCHECK_FREE(lock);
+  }
+}
+
+
 /**
  * Frees a mutex.
  */
-static void
+void
 palacios_mutex_free(void * mutex) {
-    kfree(mutex);
+    palacios_free(mutex);
+    LOCKCHECK_FREE(mutex);
 }
 
 /**
  * Locks a mutex.
  */
-static void 
+void 
 palacios_mutex_lock(void * mutex, int must_spin) {
+
+    LOCKCHECK_LOCK_PRE(mutex);
     spin_lock((spinlock_t *)mutex);
+    LOCKCHECK_LOCK_POST(mutex);
+}
+
+
+/**
+ * Locks a mutex, disabling interrupts on this core
+ */
+void *
+palacios_mutex_lock_irqsave(void * mutex, int must_spin) {
+    
+    unsigned long flags; 
+    
+    LOCKCHECK_LOCK_IRQSAVE_PRE(mutex,flags);
+    spin_lock_irqsave((spinlock_t *)mutex,flags);
+    LOCKCHECK_LOCK_IRQSAVE_POST(mutex,flags);
+
+    return (void *)flags;
 }
 
+
 /**
  * Unlocks a mutex.
  */
-static void 
+void 
 palacios_mutex_unlock(
        void *                  mutex
 ) 
 {
+    LOCKCHECK_UNLOCK_PRE(mutex);
     spin_unlock((spinlock_t *)mutex);
+    LOCKCHECK_UNLOCK_POST(mutex);
 }
 
+
+/**
+ * Unlocks a mutex and restores previous interrupt state on this core
+ */
+void 
+palacios_mutex_unlock_irqrestore(void *mutex, void *flags)
+{
+    LOCKCHECK_UNLOCK_IRQRESTORE_PRE(mutex,(unsigned long)flags);
+    // This is correct, flags is opaque
+    spin_unlock_irqrestore((spinlock_t *)mutex,(unsigned long)flags);
+    LOCKCHECK_UNLOCK_IRQRESTORE_POST(mutex,(unsigned long)flags);
+}
+
+void palacios_used_fpu(void)
+{
+   // We assume we are not preemptible here...
+#ifndef TS_USEDFPU
+   struct task_struct *tsk = current;
+   tsk->thread.fpu.has_fpu = 1;
+#else
+   struct thread_info *cur = current_thread_info();
+   cur->status |= TS_USEDFPU; 
+#endif
+   clts(); 
+   // After this, FP Save should be handled by Linux if it
+   // switches to a different task and that task uses FPU
+}
+
+inline int ists(void)
+{
+   return read_cr0() & X86_CR0_TS;
+
+}
+void palacios_need_fpu(void)
+{
+    // We assume we are not preemptible here... 
+    if (ists()) { 
+      // we have been switched back to from somewhere else...
+      // Do a restore now - this will also do a clts()
+      math_state_restore();
+    }
+}
+
+
 /**
  * Structure used by the Palacios hypervisor to interface with the host kernel.
  */
 static struct v3_os_hooks palacios_os_hooks = {
-       .print                  = palacios_print,
+       .print                  = palacios_print_scoped,
        .allocate_pages         = palacios_allocate_pages,
        .free_pages             = palacios_free_pages,
+       .vmalloc                = palacios_valloc,
+       .vfree                  = palacios_vfree,
        .malloc                 = palacios_alloc,
        .free                   = palacios_free,
        .vaddr_to_paddr         = palacios_vaddr_to_paddr,
@@ -474,30 +916,83 @@ static struct v3_os_hooks palacios_os_hooks = {
        .hook_interrupt         = palacios_hook_interrupt,
        .ack_irq                = palacios_ack_interrupt,
        .get_cpu_khz            = palacios_get_cpu_khz,
-       .start_kernel_thread    = palacios_start_kernel_thread,
+       .start_kernel_thread    = palacios_create_and_start_kernel_thread,
        .yield_cpu              = palacios_yield_cpu,
+       .sleep_cpu              = palacios_sleep_cpu,
+       .wakeup_cpu             = palacios_wakeup_cpu,
        .mutex_alloc            = palacios_mutex_alloc,
        .mutex_free             = palacios_mutex_free,
        .mutex_lock             = palacios_mutex_lock, 
        .mutex_unlock           = palacios_mutex_unlock,
+       .mutex_lock_irqsave     = palacios_mutex_lock_irqsave, 
+       .mutex_unlock_irqrestore= palacios_mutex_unlock_irqrestore,
        .get_cpu                = palacios_get_cpu,
        .interrupt_cpu          = palacios_interrupt_cpu,
        .call_on_cpu            = palacios_xcall,
-       .start_thread_on_cpu    = palacios_start_thread_on_cpu,
-       .move_thread_to_cpu = palacios_move_thread_to_cpu,
+       .create_thread_on_cpu   = palacios_create_thread_on_cpu,
+       .start_thread           = palacios_start_thread,
+       .move_thread_to_cpu     = palacios_move_thread_to_cpu,
 };
 
 
+#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH
+// Note that this host interface is defined here since it's
+// intertwined with thread creation... 
+static struct v3_lazy_fpu_iface palacios_fpu_hooks = {
+        .used_fpu               = palacios_used_fpu,
+        .need_fpu               = palacios_need_fpu
+};
+
+#endif
 
 
-int palacios_vmm_init( void )
+int palacios_vmm_init( char *options )
 {
+    int num_cpus = num_online_cpus();
+    char * cpu_mask = NULL;
+
+    if (cpu_list_len > 0) {
+       int major = 0;
+       int minor = 0;
+       int i = 0;
+
+        cpu_mask = palacios_alloc((num_cpus / 8) + 1);
+
+       if (!cpu_mask) { 
+           ERROR("Cannot allocate cpu mask\n");
+           return -1;
+       }
+
+       memset(cpu_mask, 0, (num_cpus / 8) + 1);
+        
+        for (i = 0; i < cpu_list_len; i++) {
+           if (cpu_list[i] >= num_cpus) {
+               WARNING("CPU (%d) exceeds number of available CPUs. Ignoring...\n", cpu_list[i]);
+               continue;
+           }
+
+            major = cpu_list[i] / 8;
+            minor = cpu_list[i] % 8;
     
+            *(cpu_mask + major) |= (0x1 << minor);
+        }
+    }
+
     memset(irq_to_guest_map, 0, sizeof(struct v3_vm_info *) * 256);
-    
-    printk("palacios_init starting - calling init_v3\n");
-    
-    Init_V3(&palacios_os_hooks, num_online_cpus());
+
+    if (init_print_buffers()) {
+       ERROR("Cannot initialize print buffers\n");
+       palacios_free(cpu_mask);
+       return -1;
+    }
+
+    INFO("palacios_init starting - calling init_v3\n");
+
+    Init_V3(&palacios_os_hooks, cpu_mask, num_cpus, options);
+
+#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH
+    V3_Init_Lazy_FPU(&palacios_fpu_hooks);
+#endif
 
     return 0;
 
@@ -508,5 +1003,9 @@ int palacios_vmm_exit( void ) {
 
     Shutdown_V3();
 
+    INFO("palacios shutdown complete\n");
+
+    deinit_print_buffers();
+
     return 0;
 }