X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=linux_module%2Flockcheck.c;h=a921abd2bff39ffe71eb885eb320c2c03feb6cbb;hb=da0b4bb80de755529f47b9ca57ccb0c2cefae15b;hp=bfa05660f355e5fe316b9755f7649b340e91d5d0;hpb=49852ca1e8280641ffb7ef2986c03bd443bd02b8;p=palacios.git diff --git a/linux_module/lockcheck.c b/linux_module/lockcheck.c index bfa0566..a921abd 100644 --- a/linux_module/lockcheck.c +++ b/linux_module/lockcheck.c @@ -1,11 +1,36 @@ +/* + Palacios lock-checking framework + (c) Peter Dinda, 2013 + */ + #include #include #include +#include +#include +#include #include "palacios.h" #include "lockcheck.h" + +// Number of outputs possible before output stops +// set to zero to remove the limit +#define OUTPUT_LIMIT 0 +// +// Number of outputs to skip before +// printing and counting +#define OUTPUT_SKIP 0 + +// +// Whether or not to print these events +// +#define PRINT_LOCK_ALLOC 0 +#define PRINT_LOCK_FREE 0 +#define PRINT_LOCK_LOCK 0 +#define PRINT_LOCK_UNLOCK 0 + // How far up the stack to track the caller // 0 => palacios_... // 1 => v3_lock... @@ -19,9 +44,39 @@ // This is the minimum number #define WARN_MULTIPLE_THRESHOLD 3 +// Detect when last lock a processor holds is released +// but interrupts remain off, similarly for first acquire +#define CHECK_IRQ_LAST_RELEASE 0 +#define CHECK_IRQ_FIRST_ACQUIRE 1 + +// Show hottest locks every this many locks or lock_irqsaves +// 0 indicates this should not be shown +#define HOT_LOCK_INTERVAL 100000 + +// How often the long lock timer checker should run +// And how long a lock needs to be held before it complains +// set to zero to deactivate +#define LONG_LOCK_INTERVAL_MS 10 +#define LONG_LOCK_HELD_MS 20 + +// +// Whether lockcheck should lock its own data structures during an +// event (alloc, dealloc, lock, unlock, etc) and the subsequent +// checking. If this is off, it will only lock on a lock allocation, +// in order to assure each lock has a distinct slot. If this is off, +// lockcheck may miss problems (or see additional problems) due to +// internal race conditions. However, if it is on, lockcheck is +// essentially adding a global lock acuire to each lock operation in +// palacios, which will perturb palacios. +// +#define LOCK_SELF 1 + + typedef struct { int inuse; // nonzero if this is in use void *lock; // the lock + void *holder; // current thread holding it + u32 holdingcpu; // on which cpu it acquired it void *allocator[STEP_BACK_DEPTH]; // who allocated this int lockcount; // how many times it's been locked/unlocked (lock=+1, unlock=-1) @@ -33,24 +88,74 @@ typedef struct { void *lastirqlocker[STEP_BACK_DEPTH]; // who last locked unsigned long lastlockflags; // their flags + unsigned long locktime; + // when it was locked void *lastirqunlocker[STEP_BACK_DEPTH] ; // who last unlocked unsigned long lastunlockflags; // their flags + int hotlockcount; // how many times it's been locked } lockcheck_state_t; // This lock is currently used only to control // allocation of entries in the global state -static spinlock_t lock; +static spinlock_t mylock; static lockcheck_state_t state[NUM_LOCKS]; +static lockcheck_state_t *sorted_state[NUM_LOCKS]; + +static int numout=0; + +static int globallockcount=0; + +static struct timer_list long_lock_timer; + + +#define DEBUG_OUTPUT(fmt, args...) \ +do { \ + numout++; \ + if (numout>=OUTPUT_SKIP) { \ + if (OUTPUT_LIMIT==0 ||(numout-OUTPUT_SKIP)=OUTPUT_SKIP) { \ + if (OUTPUT_LIMIT==0 || (numout-OUTPUT_SKIP)inuse)) { @@ -70,8 +174,7 @@ static lockcheck_state_t *get_lock_entry(void) break; } } - - spin_unlock_irqrestore(&lock,f); + UNLOCK_ALLOC(); if (itop)>0) { for (i=mystack->top; i>0;i--) { - snprintf(buf,64,"LOCK STACK (cpu=%u, index=%u, irq=%d)",cpu, i-1, (int)(mystack->irq[i-1])); + snprintf(buf,64,"LOCK STACK (cpu=%u, index=%u, irq=%d, irqoff=%d)",cpu, i-1, (int)(mystack->irq[i-1]), (int)(mystack->irqoff[i-1])); printlock(buf,find_lock_entry(mystack->lock[i-1])); } } @@ -119,57 +222,203 @@ static void lock_stack_print(void) } -static void lock_stack_lock(void *lock, char irq) +static void lock_stack_lock(void *lock, char irq, unsigned long flags) { lock_stack_t *mystack = &(get_cpu_var(lock_stack)); u32 cpu = get_cpu(); put_cpu(); if (mystack->top>=(LOCK_STACK_DEPTH-1)) { put_cpu_var(lock_stack); - DEBUG("LOCKCHECK: Locking lock 0x%p on cpu %u exceeds stack limit of %d\n",lock,cpu,LOCK_STACK_DEPTH); + DEBUG_OUTPUT("LOCKCHECK: Locking lock 0x%p on cpu %u exceeds stack limit of %d\n",lock,cpu,LOCK_STACK_DEPTH); lock_stack_print(); } else { + int oldtop = mystack->top; mystack->lock[mystack->top] = lock; mystack->irq[mystack->top] = irq; + mystack->irqoff[mystack->top] = irqs_disabled_flags(flags); mystack->top++; put_cpu_var(lock_stack); + if (CHECK_IRQ_FIRST_ACQUIRE && oldtop==0 && irqs_disabled_flags(flags) ) { + DEBUG_OUTPUT("LOCKCHECK: First lock on lock stack of processor %d but irqs were already disabled - stack trace follows\n", cpu); + DEBUG_DUMPSTACK(); + } + } + + } -static void lock_stack_unlock(void *lock, char irq) +static void lock_stack_unlock(void *lock, char irq, unsigned long flags) { lock_stack_t *mystack = &(get_cpu_var(lock_stack)); u32 cpu = get_cpu(); put_cpu(); if (mystack->top==0) { put_cpu_var(lock_stack); - DEBUG("LOCKCHECK: Unlocking lock 0x%p on cpu %u when lock stack is empty\n",lock,cpu); + DEBUG_OUTPUT("LOCKCHECK: Unlocking lock 0x%p on cpu %u when lock stack is empty\n",lock,cpu); } else { if (mystack->lock[mystack->top-1] != lock) { void *otherlock=mystack->lock[mystack->top-1]; put_cpu_var(lock_stack); - DEBUG("LOCKCHECK: Unlocking lock 0x%p on cpu %u when top of stack is lock 0x%p\n",lock,cpu, otherlock); + DEBUG_OUTPUT("LOCKCHECK: Unlocking lock 0x%p on cpu %u when top of stack is lock 0x%p\n",lock,cpu, otherlock); lock_stack_print(); } else { if (irq!=mystack->irq[mystack->top-1]) { char otherirq = mystack->irq[mystack->top-1]; put_cpu_var(lock_stack); - DEBUG("LOCKCHECK: Unlocking lock 0x%p on cpu %u with irq=%d, but was locked with irq=%d\n",lock,cpu,irq,otherirq); + DEBUG_OUTPUT("LOCKCHECK: Unlocking lock 0x%p on cpu %u with irq=%d, but was locked with irq=%d\n",lock,cpu,irq,otherirq); lock_stack_print(); + mystack = &(get_cpu_var(lock_stack)); + mystack->top--; + put_cpu_var(lock_stack); } else { - mystack->top--; - put_cpu_var(lock_stack); + if (irq) { + if (irqs_disabled_flags(flags)!=mystack->irqoff[mystack->top-1]) { + char otherirqoff = mystack->irqoff[mystack->top-1]; + put_cpu_var(lock_stack); + DEBUG_OUTPUT("LOCKCHECK: Unlocking lock 0x%p on cpu %u sets irqoff=%d but the matching lock returned irqoff=%d\n", lock, cpu, irqs_disabled_flags(flags), otherirqoff); + lock_stack_print(); + mystack = &(get_cpu_var(lock_stack)); + mystack->top--; + put_cpu_var(lock_stack); + } else { + // irq, and irq states match - good + mystack->top--; + put_cpu_var(lock_stack); + } + } else { + // !irq - we are good + mystack->top--; + put_cpu_var(lock_stack); + } + } + } + } + + mystack = &(get_cpu_var(lock_stack)); + if (mystack->top == 0) { + put_cpu_var(lock_stack); + if (CHECK_IRQ_LAST_RELEASE && irqs_disabled()) { + DEBUG_OUTPUT("LOCKCHECK: Lock stack on cpu %u is now empty, but irqs are still disabled! Stack trace follows\n", cpu); + DEBUG_DUMPSTACK(); + } + } else { + put_cpu_var(lock_stack); + } + +} + + +// pointers are to the pointers in the sorted_state array +int compare(const void *a, const void *b) +{ + lockcheck_state_t *l = *((lockcheck_state_t **)a); + lockcheck_state_t *r = *((lockcheck_state_t **)b); + + return -(l->hotlockcount - r->hotlockcount); +} + +static void hot_lock_show(void) +{ + int n, i; + char buf[64]; + + n=0; + for (i=0;ihotlockcount); + printlock(buf,sorted_state[i]); + } +} + + +static void hot_lock_lock(void *lock) +{ + lockcheck_state_t *l = find_lock_entry(lock); + + if (!l) { return; } + + l->hotlockcount++; + globallockcount++; + + if (HOT_LOCK_INTERVAL && !(globallockcount % HOT_LOCK_INTERVAL )) { + DEBUG_OUTPUT("LOCKCHECK: Hot locks after %d acquires Follow\n",globallockcount); + hot_lock_show(); + } +} + + +#define hot_lock_unlock(X) // nothing for now + + +void long_lock_callback(unsigned long ignored) +{ + // See if any lock has been held for too long + // then print all the locks if it has + + // Note that this function does not lock because the + // idea is to try to catch a deadlock, which might + // imply that we are stalled waiting on our lock already + + int i; + int have=0; + lockcheck_state_t *l; + char buf[64]; + unsigned long now; + + + now = jiffies; + + for (i=0;iinuse && (l->irqcount>0 || l->lockcount>0)) { + if (jiffies_to_msecs(now-l->locktime)>LONG_LOCK_HELD_MS) { + have++; } } } + + if (have>0) { + DEBUG_OUTPUT("LOCKCHECK: LONG LOCK: %d locks have been held longer than %u ms - Dump of all held locks Follows\n",have,LONG_LOCK_HELD_MS); + have=0; + for (i=0;iinuse && (l->irqcount>0 || l->lockcount>0)) { + snprintf(buf,64,"LONG LOCK (%d - %s)", + have, + jiffies_to_msecs(now-l->locktime)>LONG_LOCK_HELD_MS ? "**OLD**" : "ok"); + printlock(buf,l); + have++; + } + } + } + + // reset the timer to run again + + mod_timer(&long_lock_timer,jiffies+msecs_to_jiffies(LONG_LOCK_INTERVAL_MS)); } + void palacios_lockcheck_init() { memset(state,0,sizeof(lockcheck_state_t)*NUM_LOCKS); - spin_lock_init(&lock); - DEBUG("LOCKCHECK: LOCK CHECKING INITED\n"); + spin_lock_init(&mylock); + +#if LONG_LOCK_INTERVAL_MS>0 + setup_timer(&long_lock_timer,long_lock_callback,0); + mod_timer(&long_lock_timer, jiffies + msecs_to_jiffies(LONG_LOCK_INTERVAL_MS)); +#endif + + DEBUG_OUTPUT("LOCKCHECK: LOCK CHECKING INITED (internal locking: %d, output limit: %d, output skip: %d)\n",LOCK_SELF,OUTPUT_LIMIT,OUTPUT_SKIP); } // @@ -202,11 +451,11 @@ static void clear_trace(void **trace) static void printlock(char *prefix, lockcheck_state_t *l) { if (!l || !(l->lock) ) { - DEBUG("LOCKCHECK: %s: lock 0x%p BOGUS\n",prefix,l); + DEBUG_OUTPUT("LOCKCHECK: %s: lock 0x%p BOGUS\n",prefix,l); return; } if (l->lock) { - DEBUG("LOCKCHECK: %s: lock 0x%p, allocator=" + DEBUG_OUTPUT("LOCKCHECK: %s: lock 0x%p, holder %s on %u, allocator=" backtrace_format ", lockcount=%d, lastlocker=" backtrace_format @@ -216,8 +465,11 @@ static void printlock(char *prefix, lockcheck_state_t *l) backtrace_format ", lastlockflags=%lu, lastirqunlocker=" backtrace_format - ", lastunlockflags=%lu\n", - prefix,l->lock, + ", lastunlockflags=%lu" + ", lockage=%ums\n", + prefix,l->lock, + l->holder ? ((struct task_struct *)(l->holder))->comm : "none", + l->holdingcpu, backtrace_expand(l->allocator), l->lockcount, backtrace_expand(l->lastlocker), @@ -226,12 +478,16 @@ static void printlock(char *prefix, lockcheck_state_t *l) backtrace_expand(l->lastirqlocker), l->lastlockflags, backtrace_expand(l->lastirqunlocker), - l->lastunlockflags); + l->lastunlockflags, + (l->lockcount>0) || (l->irqcount>0) ? jiffies_to_msecs(jiffies-l->locktime) : 0); } } + + + static void find_multiple_locks_held(void) { int i; @@ -299,7 +555,10 @@ void palacios_lockcheck_deinit() { int i; lockcheck_state_t *l; + LOCK_DECL; + LOCK(); + for (i=0;ilock) { @@ -312,16 +571,25 @@ void palacios_lockcheck_deinit() } } } +#if LONG_LOCK_INTERVAL_MS>0 + del_timer(&long_lock_timer); +#endif + UNLOCK(); INFO("LOCKCHECK: DEINITED\n"); } void palacios_lockcheck_alloc(void *lock) { - lockcheck_state_t *l=get_lock_entry(); + lockcheck_state_t *l; + LOCK_DECL; + LOCK(); + + l=get_lock_entry(); + if (!l) { - DEBUG("LOCKCHECK: UNABLE TO ALLOCATE TRACKING DATA FOR LOCK 0x%p\n",lock); + DEBUG_OUTPUT("LOCKCHECK: UNABLE TO ALLOCATE TRACKING DATA FOR LOCK 0x%p\n",lock); } l->lock=lock; backtrace(l->allocator); @@ -331,16 +599,25 @@ void palacios_lockcheck_alloc(void *lock) clear_trace(l->lastirqlocker); clear_trace(l->lastirqunlocker); //INFO("LOCKCHECK: LOCK ALLOCATE 0x%p\n",lock); +#if PRINT_LOCK_ALLOC printlock("NEW LOCK", l); - //dump_stack(); +#endif + + UNLOCK(); } void palacios_lockcheck_free(void *lock) { - lockcheck_state_t *l=find_lock_entry(lock); + lockcheck_state_t *l; + LOCK_DECL; + + LOCK(); + l=find_lock_entry(lock); if (!l){ - DEBUG("LOCKCHECK: FREEING UNTRACKED LOCK 0x%p\n",lock); + UNLOCK(); + DEBUG_OUTPUT("LOCKCHECK: FREEING UNTRACKED LOCK 0x%p - stack trace follows\n",lock); + DEBUG_DUMPSTACK(); return; } @@ -351,105 +628,221 @@ void palacios_lockcheck_free(void *lock) if ((l->irqcount)) { printlock("BAD IRQ COUNT AT FREE",l); } + +#if PRINT_LOCK_FREE + printlock("FREE LOCK",l); +#endif + free_lock_entry(l); + + UNLOCK(); + } void palacios_lockcheck_lock(void *lock) { - lockcheck_state_t *l=find_lock_entry(lock); - + LOCK_DECL; + lockcheck_state_t *l; + + + LOCK(); + + l=find_lock_entry(lock); + if (!l) { - DEBUG("LOCKCHECK: LOCKING UNTRACKED LOCK 0x%p\n",lock); + UNLOCK(); + DEBUG_OUTPUT("LOCKCHECK: LOCKING UNTRACKED LOCK 0x%p - stack follows\n",lock); + DEBUG_DUMPSTACK(); return; } if (l->lockcount!=0) { - printlock("BAD LOCKCOUNT AT LOCK",l); + printlock("BAD LOCKCOUNT AT LOCK - stack follows",l); + DEBUG_DUMPSTACK(); } if (l->irqcount!=0) { - printlock("BAD IRQCOUNT AT LOCK",l); + printlock("BAD IRQCOUNT AT LOCK - stack follows",l); + DEBUG_DUMPSTACK(); } l->lockcount++; + l->holder=current; + l->holdingcpu=get_cpu(); put_cpu(); + l->locktime=jiffies; backtrace(l->lastlocker); find_multiple_locks_held(); - lock_stack_lock(lock,0); + lock_stack_lock(lock,0,0); + + hot_lock_lock(lock); + +#if PRINT_LOCK_LOCK + printlock("LOCK",l); +#endif + + UNLOCK(); } void palacios_lockcheck_unlock(void *lock) { - lockcheck_state_t *l=find_lock_entry(lock); + LOCK_DECL; + lockcheck_state_t *l; + + LOCK(); + + l=find_lock_entry(lock); if (!l) { - DEBUG("LOCKCHECK: UNLOCKING UNTRACKED LOCK 0x%p\n",lock); + UNLOCK(); + DEBUG_OUTPUT("LOCKCHECK: UNLOCKING UNTRACKED LOCK 0x%p - stack follows\n",lock); + DEBUG_DUMPSTACK(); return; } if (l->lockcount!=1) { - printlock("LOCKCHECK: BAD LOCKCOUNT AT UNLOCK",l); + printlock("LOCKCHECK: BAD LOCKCOUNT AT UNLOCK - stack follows",l); + DEBUG_DUMPSTACK(); } if (l->irqcount!=0) { - printlock("LOCKCHECK: BAD IRQCOUNT AT UNLOCK",l); + printlock("LOCKCHECK: BAD IRQCOUNT AT UNLOCK - stack follows",l); + DEBUG_DUMPSTACK(); } - lock_stack_unlock(lock,0); + lock_stack_unlock(lock,0,0); + + hot_lock_unlock(lock); + l->holder=0; + l->holdingcpu=0; l->lockcount--; backtrace(l->lastunlocker); +#if PRINT_LOCK_UNLOCK + printlock("UNLOCK",l); +#endif + + UNLOCK(); + } void palacios_lockcheck_lock_irqsave(void *lock,unsigned long flags) { - lockcheck_state_t *l=find_lock_entry(lock); + LOCK_DECL; + lockcheck_state_t *l; + LOCK(); + + l=find_lock_entry(lock); + if (!l) { - DEBUG("LOCKCHECK: IRQ LOCKING UNTRACKED LOCK 0x%p\n",lock); + UNLOCK(); + DEBUG_OUTPUT("LOCKCHECK: IRQ LOCKING UNTRACKED LOCK 0x%p - stack follows\n",lock); + DEBUG_DUMPSTACK(); return; } if (l->lockcount!=0) { - printlock("BAD LOCKCOUNT AT IRQ LOCK",l); + printlock("BAD LOCKCOUNT AT IRQ LOCK - stack follows",l); + DEBUG_DUMPSTACK(); } if (l->irqcount!=0) { - printlock("BAD IRQCOUNT AT IRQ LOCK",l); + printlock("BAD IRQCOUNT AT IRQ LOCK - stack follows",l); + DEBUG_DUMPSTACK(); } l->irqcount++; + l->holder=current; + l->holdingcpu=get_cpu(); put_cpu(); l->lastlockflags=flags; + l->locktime=jiffies; backtrace(l->lastirqlocker); - find_multiple_irqs_held(); - lock_stack_lock(lock,1); + lock_stack_lock(lock,1,flags); + + hot_lock_lock(lock); + +#if PRINT_LOCK_LOCK + printlock("LOCK_IRQSAVE",l); +#endif + + UNLOCK(); } -void palacios_lockcheck_unlock_irqrestore(void *lock,unsigned long flags) + +// +// This is separated into two components to avoid a race between +// the underlying spin_unlock_irqrestore and the next lockcheck_lock_irqsave +// If simply record the state after the unlock, we might see that the +// irqcount has already increased. Therefore, we will acquire the +// lockchecker lock in _pre and release it in _post. Note that when +// we release the lock in _post, we restore the flags provided by the +// code under test - NOT our original flags +// +// unlock_pre() - stores flags, takes mylock discard flags +// At this point, a lockcheck_lock cannot enter, since it's stuck on mylock +// spinunlock - restores lock, restores original flags +// unlock_post() - restores mylock WITH orig flags +// +void palacios_lockcheck_unlock_irqrestore_pre(void *lock,unsigned long flags) { - lockcheck_state_t *l=find_lock_entry(lock); + LOCK_DECL; + LOCK(); // flags are discarded + // at this point, the actual spin unlock can run + // if another thread hits lockcheck_irqsave at this point, it + // will block on mylock +} + +void palacios_lockcheck_unlock_irqrestore_post(void *lock,unsigned long flags) +{ + LOCK_DECL; + lockcheck_state_t *l; + +#if LOCK_SELF + // when we unlock, want to restore the flags *the user wants* + f = flags; +#endif + + // Note that we DO NOT take mylock here, since we already took it in + // _pre + + l=find_lock_entry(lock); + if (!l) { - DEBUG("LOCKCHECK: IRQ UNLOCKING UNTRACKED LOCK 0x%p\n",lock); + UNLOCK(); // release any waiting threads on lockcheck_lock_irqsave + DEBUG_OUTPUT("LOCKCHECK: IRQ UNLOCKING UNTRACKED LOCK 0x%p - stack follows\n",lock); + DEBUG_DUMPSTACK(); return; } if (l->lockcount!=0) { - printlock("LOCKCHECK: BAD LOCKCOUNT AT IRQ UNLOCK",l); + printlock("LOCKCHECK: BAD LOCKCOUNT AT IRQ UNLOCK - stack follows",l); + DEBUG_DUMPSTACK(); } if (l->irqcount!=1) { - printlock("LOCKCHECK: BAD IRQCOUNT AT IRQ UNLOCK",l); + printlock("LOCKCHECK: BAD IRQCOUNT AT IRQ UNLOCK - stack follows",l); + DEBUG_DUMPSTACK(); } + l->holder=0; + l->holdingcpu=0; l->irqcount--; l->lastunlockflags = flags; - lock_stack_unlock(lock,1); + lock_stack_unlock(lock,1,flags); + + hot_lock_unlock(lock); backtrace(l->lastirqunlocker); - + +#if PRINT_LOCK_UNLOCK + printlock("UNLOCK_IRQRESTORE",l); +#endif + UNLOCK(); // release any waiting threads on lockcheck_lock_irqsave + }