X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=linux_module%2Fpalacios-stubs.c;h=039c1708ee0b84c21bf0f39eb3494d7d1e253a2f;hb=c8b23e99efde3aa5a2c26d1b8e9bc7dc914e6113;hp=a8f29238927ee8f061eb8bac2835a410673063a2;hpb=791ea2f3e21cfbc9c47341efbb98995c33d86fcb;p=palacios.git diff --git a/linux_module/palacios-stubs.c b/linux_module/palacios-stubs.c index a8f2923..039c170 100644 --- a/linux_module/palacios-stubs.c +++ b/linux_module/palacios-stubs.c @@ -8,28 +8,53 @@ #include #include #include +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0) +#include +#endif #include #include #include #include #include +#include + +#include #include #include -#include "palacios.h" +#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH +#include +#endif +#include "palacios.h" +#include "util-hashtable.h" #include "mm.h" +#include "memcheck.h" +#include "lockcheck.h" + + + +// The following can be used to track memory bugs +// zero memory after allocation (now applies to valloc and page alloc as well) +#define ALLOC_ZERO_MEM 1 +// pad allocations by this many bytes on both ends of block (heap only) +#define ALLOC_PAD 0 + u32 pg_allocs = 0; u32 pg_frees = 0; u32 mallocs = 0; u32 frees = 0; - +u32 vmallocs = 0; +u32 vfrees = 0; static struct v3_vm_info * irq_to_guest_map[256]; @@ -40,30 +65,167 @@ extern int cpu_list[NR_CPUS]; extern int cpu_list_len; +extern struct hashtable *v3_thread_resource_map; + + +static char *print_buffer[NR_CPUS]; + +static void deinit_print_buffers(void) +{ + int i; + + for (i=0;i=0) { + printk(KERN_INFO "palacios (pcore %u vm %s vcore %u): %s", + cpu, + guest->name, + vcore, + buf); + } else { + printk(KERN_INFO "palacios (pcore %u vm %s): %s", + cpu, + guest->name, + buf); + } + } else { + printk(KERN_INFO "palacios (pcore %u): %s", + cpu, + buf); + } + return; -} +#endif + +} /* * Allocates a contiguous region of pages of the requested size. * Returns the physical address of the first page in the region. */ -static void * palacios_allocate_pages(int num_pages, unsigned int alignment) { +void *palacios_allocate_pages(int num_pages, unsigned int alignment, int node_id, int (*filter_func)(void *paddr, void *filter_state), void *filter_state) { void * pg_addr = NULL; + v3_resource_control_t *r; + + if (num_pages<=0) { + ERROR("ALERT ALERT Attempt to allocate zero or fewer pages (%d pages, alignment %d, node %d, filter_func %p, filter_state %p)\n",num_pages, alignment, node_id, filter_func, filter_state); + return NULL; + } + + if ((r=(v3_resource_control_t *)palacios_htable_search(v3_thread_resource_map,(addr_t)current))) { + // thread has a registered resource control structure + // these override any default values + // INFO("Overridden page search: (pre) alignment=%x, node_id=%x, filter_func=%p, filter_state=%p\n",alignment,node_id,filter_func,filter_state); + if (alignment==4096) { + alignment = r->pg_alignment; + } + if (node_id==-1) { + node_id = r->pg_node_id; + } + if (!filter_func) { + filter_func = r->pg_filter_func; + filter_state = r->pg_filter_state; + } + //INFO("Overridden page search: (post) alignment=%x, node_id=%x, filter_func=%p, filter_state=%p\n",alignment,node_id,filter_func,filter_state); + } + + pg_addr = (void *)alloc_palacios_pgs(num_pages, alignment, node_id, filter_func, filter_state); + + if (!pg_addr) { + ERROR("ALERT ALERT Page allocation has FAILED Warning (%d pages, alignment %d, node %d, filter_func %p, filter_state %p)\n",num_pages, alignment, node_id, filter_func, filter_state); + return NULL; + } - pg_addr = (void *)alloc_palacios_pgs(num_pages, alignment); pg_allocs += num_pages; +#if ALLOC_ZERO_MEM + memset(__va(pg_addr),0,num_pages*4096); +#endif + + MEMCHECK_ALLOC_PAGES(pg_addr,num_pages*4096); + return pg_addr; } @@ -74,48 +236,134 @@ static void * palacios_allocate_pages(int num_pages, unsigned int alignment) { * a single call while palacios_free_page() only frees a single page. */ -static void palacios_free_pages(void * page_paddr, int num_pages) { +void palacios_free_pages(void * page_paddr, int num_pages) { + if (!page_paddr) { + ERROR("Ignoring free pages: 0x%p (0x%lx)for %d pages\n", page_paddr, (uintptr_t)page_paddr, num_pages); + dump_stack(); + return; + } pg_frees += num_pages; free_palacios_pgs((uintptr_t)page_paddr, num_pages); + MEMCHECK_FREE_PAGES(page_paddr,num_pages*4096); + } +void * +palacios_alloc_extended(unsigned int size, unsigned int flags, int node) { + void * addr = NULL; + + if (size==0) { + // note that modern kernels will respond to a zero byte + // kmalloc and return the address 0x10... In Palacios, + // we will simply not allow 0 byte allocs at all, of any kind + ERROR("ALERT ALERT attempt to kmalloc zero bytes rejected\n"); + return NULL; + } + + if (node==-1) { + addr = kmalloc(size+2*ALLOC_PAD, flags); + } else { + addr = kmalloc_node(size+2*ALLOC_PAD, flags, node); + } + + if (!addr || IS_ERR(addr)) { + ERROR("ALERT ALERT kmalloc has FAILED FAILED FAILED\n"); + return NULL; + } + + mallocs++; + +#if ALLOC_ZERO_MEM + memset(addr,0,size+2*ALLOC_PAD); +#endif + + MEMCHECK_KMALLOC(addr,size+2*ALLOC_PAD); + + return addr+ALLOC_PAD; +} + +void * +palacios_valloc(unsigned int size) +{ + void * addr = NULL; + + if (size==0) { + ERROR("ALERT ALERT attempt to vmalloc zero bytes rejected\n"); + return NULL; + } + + addr = vmalloc(size); + + if (!addr || IS_ERR(addr)) { + ERROR("ALERT ALERT vmalloc has FAILED FAILED FAILED\n"); + return NULL; + } + + vmallocs++; + +#if ALLOC_ZERO_MEM + memset(addr,0,size); +#endif + + MEMCHECK_VMALLOC(addr,size); + + return addr; +} + +void palacios_vfree(void *p) +{ + if (!p) { + ERROR("Ignoring vfree: 0x%p\n",p); + dump_stack(); + return; + } + vfree(p); + vfrees++; + MEMCHECK_VFREE(p); +} + /** * Allocates 'size' bytes of kernel memory. * Returns the kernel virtual address of the memory allocated. */ -static void * +void * palacios_alloc(unsigned int size) { - void * addr = NULL; - if (irqs_disabled()) { - addr = kmalloc(size, GFP_ATOMIC); + // It is very important that this test remains since + // this function is used extensively throughout palacios and the linux + // module, both in places where interrupts are off and where they are on + // a GFP_KERNEL call, when done with interrupts off can lead to DEADLOCK + if (irqs_disabled() || in_atomic()) { + return palacios_alloc_extended(size,GFP_ATOMIC,-1); } else { - addr = kmalloc(size, GFP_KERNEL); + return palacios_alloc_extended(size,GFP_KERNEL,-1); } - mallocs++; - - return addr; } /** * Frees memory that was previously allocated by palacios_alloc(). */ -static void +void palacios_free( void * addr ) { + if (!addr) { + ERROR("Ignoring free : 0x%p\n", addr); + dump_stack(); + return; + } frees++; - kfree(addr); - return; + kfree(addr-ALLOC_PAD); + MEMCHECK_KFREE(addr-ALLOC_PAD); } /** * Converts a kernel virtual address to the corresponding physical address. */ -static void * +void * palacios_vaddr_to_paddr( void * vaddr ) @@ -127,7 +375,7 @@ palacios_vaddr_to_paddr( /** * Converts a physical address to the corresponding kernel virtual address. */ -static void * +void * palacios_paddr_to_vaddr( void * paddr ) @@ -138,9 +386,7 @@ palacios_paddr_to_vaddr( /** * Runs a function on the specified CPU. */ - -// For now, do call only on local CPU -static void +void palacios_xcall( int cpu_id, void (*fn)(void *arg), @@ -155,10 +401,14 @@ palacios_xcall( return; } + +#define MAX_THREAD_NAME 32 + struct lnx_thread_arg { int (*fn)(void * arg); void * arg; - char * name; + v3_resource_control_t *resource_control; + char name[MAX_THREAD_NAME]; }; static int lnx_thread_target(void * arg) { @@ -171,79 +421,136 @@ static int lnx_thread_target(void * arg) { allow_signal(SIGKILL); */ +#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH + // We are a kernel thread that needs FPU save/restore state + // vcores definitely need this, all the other threads get it too, + // but they just won't use it + + fpu_alloc(&(current->thread.fpu)); +#endif + + palacios_htable_insert(v3_thread_resource_map,(addr_t)current,(addr_t)thread_info->resource_control); ret = thread_info->fn(thread_info->arg); - kfree(thread_info); + INFO("Palacios Thread (%s) EXITING\n", thread_info->name); + + palacios_htable_remove(v3_thread_resource_map,(addr_t)current,0); + + palacios_free(thread_info); // handle cleanup - - printk("Palacios Thread (%s) EXITTING\n", thread_info->name); + // We rely on do_exit to free the fpu data + // since we could get switched at any point until the thread is done... do_exit(ret); - + return 0; // should not get here. } /** * Creates a kernel thread. */ -static void * -palacios_start_kernel_thread( +void * +palacios_create_and_start_kernel_thread( int (*fn) (void * arg), void * arg, - char * thread_name) { + char * thread_name, + v3_resource_control_t *resource_control) { - struct lnx_thread_arg * thread_info = kmalloc(sizeof(struct lnx_thread_arg), GFP_KERNEL); + struct lnx_thread_arg * thread_info = palacios_alloc(sizeof(struct lnx_thread_arg)); + + if (!thread_info) { + ERROR("ALERT ALERT Unable to allocate thread\n"); + return NULL; + } thread_info->fn = fn; thread_info->arg = arg; - thread_info->name = thread_name; + strncpy(thread_info->name,thread_name,MAX_THREAD_NAME); + thread_info->name[MAX_THREAD_NAME-1] =0; + thread_info->resource_control = resource_control; - return kthread_run( lnx_thread_target, thread_info, thread_name ); + return kthread_run( lnx_thread_target, thread_info, thread_info->name ); } /** * Starts a kernel thread on the specified CPU. */ -static void * -palacios_start_thread_on_cpu(int cpu_id, - int (*fn)(void * arg), - void * arg, - char * thread_name ) { +void * +palacios_create_thread_on_cpu(int cpu_id, + int (*fn)(void * arg), + void * arg, + char * thread_name, + v3_resource_control_t *resource_control) { struct task_struct * thread = NULL; - struct lnx_thread_arg * thread_info = kmalloc(sizeof(struct lnx_thread_arg), GFP_KERNEL); + struct lnx_thread_arg * thread_info = palacios_alloc(sizeof(struct lnx_thread_arg)); + + if (!thread_info) { + ERROR("ALERT ALERT Unable to allocate thread to start on cpu\n"); + return NULL; + } thread_info->fn = fn; thread_info->arg = arg; - thread_info->name = thread_name; - + strncpy(thread_info->name,thread_name,MAX_THREAD_NAME); + thread_info->name[MAX_THREAD_NAME-1] =0; + thread_info->resource_control=resource_control; - thread = kthread_create( lnx_thread_target, thread_info, thread_name ); + thread = kthread_create( lnx_thread_target, thread_info, thread_info->name ); - if (IS_ERR(thread)) { - WARNING("Palacios error creating thread: %s\n", thread_name); + if (!thread || IS_ERR(thread)) { + WARNING("Palacios error creating thread: %s\n", thread_info->name); + palacios_free(thread_info); return NULL; } if (set_cpus_allowed_ptr(thread, cpumask_of(cpu_id)) != 0) { + WARNING("Attempt to start thread on disallowed CPU\n"); kthread_stop(thread); + palacios_free(thread_info); return NULL; } - wake_up_process(thread); - return thread; } +void +palacios_start_thread(void * th){ + + struct task_struct * thread = (struct task_struct *)th; + wake_up_process(thread); + +} + +/* + Convenience wrapper +*/ +void * +palacios_create_and_start_thread_on_cpu(int cpu_id, + int (*fn)(void * arg), + void * arg, + char * thread_name, + v3_resource_control_t *resource_control) { + + void *t = palacios_create_thread_on_cpu(cpu_id, fn, arg, thread_name, resource_control); + + if (t) { + palacios_start_thread(t); + } + + return t; +} + + /** * Rebind a kernel thread to the specified CPU * The thread will be running on target CPU on return * non-zero return means failure */ -static int +int palacios_move_thread_to_cpu(int new_cpu_id, void * thread_ptr) { struct task_struct * thread = (struct task_struct *)thread_ptr; @@ -265,7 +572,7 @@ palacios_move_thread_to_cpu(int new_cpu_id, /** * Returns the CPU ID that the caller is running on. */ -static unsigned int +unsigned int palacios_get_cpu(void) { @@ -356,7 +663,8 @@ palacios_hook_interrupt(struct v3_vm_info * vm, //set_idtvec_handler(vector, palacios_dispatch_interrupt); if (vector < 32) { - panic("unexpected vector for hooking\n"); + ERROR("unexpected vector for hooking\n"); + return -1; } else { int device_id = 0; @@ -379,7 +687,8 @@ palacios_hook_interrupt(struct v3_vm_info * vm, if (error) { ERROR("error code for request_irq is %d\n", error); - panic("request vector %d failed",vector); + ERROR("request vector %d failed", vector); + return -1; } } @@ -397,17 +706,17 @@ palacios_ack_interrupt( ) { ack_APIC_irq(); - DEBUG("Pretending to ack interrupt, vector=%d\n",vector); + DEBUG("Pretending to ack interrupt, vector=%d\n", vector); return 0; } /** * Returns the CPU frequency in kilohertz. */ -static unsigned int +unsigned int palacios_get_cpu_khz(void) { - INFO("cpu_khz is %u\n",cpu_khz); + INFO("cpu_khz is %u\n", cpu_khz); if (cpu_khz == 0) { INFO("faking cpu_khz to 1000000\n"); @@ -420,66 +729,186 @@ palacios_get_cpu_khz(void) /** * Yield the CPU so other host OS tasks can run. + * This will return immediately if there is no other thread that is runnable + * And there is no real bound on how long it will yield */ -static void +void palacios_yield_cpu(void) { schedule(); return; } +/** + * Yield the CPU so other host OS tasks can run. + * Given now immediately if there is no other thread that is runnable + * And there is no real bound on how long it will yield + */ +void palacios_sleep_cpu(unsigned int us) +{ + + set_current_state(TASK_INTERRUPTIBLE); + if (us) { + unsigned int uspj = 1000000U/HZ; + unsigned int jiffies = us/uspj + ((us%uspj) !=0); // ceiling + schedule_timeout(jiffies); + } else { + schedule(); + } + return; +} +void palacios_wakeup_cpu(void *thread) +{ + wake_up_process(thread); + return; +} /** * Allocates a mutex. * Returns NULL on failure. */ -static void * +void * palacios_mutex_alloc(void) { - spinlock_t *lock = kmalloc(sizeof(spinlock_t), GFP_KERNEL); + spinlock_t *lock = palacios_alloc(sizeof(spinlock_t)); if (lock) { spin_lock_init(lock); + LOCKCHECK_ALLOC(lock); + } else { + ERROR("ALERT ALERT Unable to allocate lock\n"); + return NULL; } return lock; } +void palacios_mutex_init(void *mutex) +{ + spinlock_t *lock = (spinlock_t*)mutex; + + if (lock) { + spin_lock_init(lock); + LOCKCHECK_ALLOC(lock); + } +} + +void palacios_mutex_deinit(void *mutex) +{ + spinlock_t *lock = (spinlock_t*)mutex; + + if (lock) { + // no actual spin_lock_deinit on linux + // our purpose here is to drive the lock checker + LOCKCHECK_FREE(lock); + } +} + + /** * Frees a mutex. */ -static void +void palacios_mutex_free(void * mutex) { - kfree(mutex); + palacios_free(mutex); + LOCKCHECK_FREE(mutex); } /** * Locks a mutex. */ -static void +void palacios_mutex_lock(void * mutex, int must_spin) { + + LOCKCHECK_LOCK_PRE(mutex); spin_lock((spinlock_t *)mutex); + LOCKCHECK_LOCK_POST(mutex); } + +/** + * Locks a mutex, disabling interrupts on this core + */ +void * +palacios_mutex_lock_irqsave(void * mutex, int must_spin) { + + unsigned long flags; + + LOCKCHECK_LOCK_IRQSAVE_PRE(mutex,flags); + spin_lock_irqsave((spinlock_t *)mutex,flags); + LOCKCHECK_LOCK_IRQSAVE_POST(mutex,flags); + + return (void *)flags; +} + + /** * Unlocks a mutex. */ -static void +void palacios_mutex_unlock( void * mutex ) { + LOCKCHECK_UNLOCK_PRE(mutex); spin_unlock((spinlock_t *)mutex); + LOCKCHECK_UNLOCK_POST(mutex); } + +/** + * Unlocks a mutex and restores previous interrupt state on this core + */ +void +palacios_mutex_unlock_irqrestore(void *mutex, void *flags) +{ + LOCKCHECK_UNLOCK_IRQRESTORE_PRE(mutex,(unsigned long)flags); + // This is correct, flags is opaque + spin_unlock_irqrestore((spinlock_t *)mutex,(unsigned long)flags); + LOCKCHECK_UNLOCK_IRQRESTORE_POST(mutex,(unsigned long)flags); +} + +void palacios_used_fpu(void) +{ + // We assume we are not preemptible here... +#ifndef TS_USEDFPU + struct task_struct *tsk = current; + tsk->thread.fpu.has_fpu = 1; +#else + struct thread_info *cur = current_thread_info(); + cur->status |= TS_USEDFPU; +#endif + clts(); + // After this, FP Save should be handled by Linux if it + // switches to a different task and that task uses FPU +} + +inline int ists(void) +{ + return read_cr0() & X86_CR0_TS; + +} +void palacios_need_fpu(void) +{ + // We assume we are not preemptible here... + if (ists()) { + // we have been switched back to from somewhere else... + // Do a restore now - this will also do a clts() + math_state_restore(); + } +} + + /** * Structure used by the Palacios hypervisor to interface with the host kernel. */ static struct v3_os_hooks palacios_os_hooks = { - .print = palacios_print, + .print = palacios_print_scoped, .allocate_pages = palacios_allocate_pages, .free_pages = palacios_free_pages, + .vmalloc = palacios_valloc, + .vfree = palacios_vfree, .malloc = palacios_alloc, .free = palacios_free, .vaddr_to_paddr = palacios_vaddr_to_paddr, @@ -487,23 +916,37 @@ static struct v3_os_hooks palacios_os_hooks = { .hook_interrupt = palacios_hook_interrupt, .ack_irq = palacios_ack_interrupt, .get_cpu_khz = palacios_get_cpu_khz, - .start_kernel_thread = palacios_start_kernel_thread, + .start_kernel_thread = palacios_create_and_start_kernel_thread, .yield_cpu = palacios_yield_cpu, + .sleep_cpu = palacios_sleep_cpu, + .wakeup_cpu = palacios_wakeup_cpu, .mutex_alloc = palacios_mutex_alloc, .mutex_free = palacios_mutex_free, .mutex_lock = palacios_mutex_lock, .mutex_unlock = palacios_mutex_unlock, + .mutex_lock_irqsave = palacios_mutex_lock_irqsave, + .mutex_unlock_irqrestore= palacios_mutex_unlock_irqrestore, .get_cpu = palacios_get_cpu, .interrupt_cpu = palacios_interrupt_cpu, .call_on_cpu = palacios_xcall, - .start_thread_on_cpu = palacios_start_thread_on_cpu, - .move_thread_to_cpu = palacios_move_thread_to_cpu, + .create_thread_on_cpu = palacios_create_thread_on_cpu, + .start_thread = palacios_start_thread, + .move_thread_to_cpu = palacios_move_thread_to_cpu, }; +#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH +// Note that this host interface is defined here since it's +// intertwined with thread creation... +static struct v3_lazy_fpu_iface palacios_fpu_hooks = { + .used_fpu = palacios_used_fpu, + .need_fpu = palacios_need_fpu +}; + +#endif -int palacios_vmm_init( void ) +int palacios_vmm_init( char *options ) { int num_cpus = num_online_cpus(); char * cpu_mask = NULL; @@ -513,7 +956,13 @@ int palacios_vmm_init( void ) int minor = 0; int i = 0; - cpu_mask = kmalloc((num_cpus / 8) + 1, GFP_KERNEL); + cpu_mask = palacios_alloc((num_cpus / 8) + 1); + + if (!cpu_mask) { + ERROR("Cannot allocate cpu mask\n"); + return -1; + } + memset(cpu_mask, 0, (num_cpus / 8) + 1); for (i = 0; i < cpu_list_len; i++) { @@ -531,9 +980,19 @@ int palacios_vmm_init( void ) memset(irq_to_guest_map, 0, sizeof(struct v3_vm_info *) * 256); + if (init_print_buffers()) { + ERROR("Cannot initialize print buffers\n"); + palacios_free(cpu_mask); + return -1; + } + INFO("palacios_init starting - calling init_v3\n"); - - Init_V3(&palacios_os_hooks, cpu_mask, num_cpus); + + Init_V3(&palacios_os_hooks, cpu_mask, num_cpus, options); + +#ifdef V3_CONFIG_HOST_LAZY_FPU_SWITCH + V3_Init_Lazy_FPU(&palacios_fpu_hooks); +#endif return 0; @@ -544,5 +1003,9 @@ int palacios_vmm_exit( void ) { Shutdown_V3(); + INFO("palacios shutdown complete\n"); + + deinit_print_buffers(); + return 0; }