Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Avoid strict-aliasing related issues when compiling with optimization
[palacios.git] / linux_module / lockcheck.c
index 26a33ae..a921abd 100644 (file)
@@ -1,6 +1,14 @@
+/* 
+   Palacios lock-checking framework
+   (c) Peter Dinda, 2013
+ */
+
 #include <linux/kernel.h>
 #include <linux/kthread.h>
 #include <linux/spinlock.h>
+#include <linux/sort.h>
+#include <linux/timer.h>
+#include <linux/time.h>
 
 #include "palacios.h"
 
 #define CHECK_IRQ_LAST_RELEASE   0
 #define CHECK_IRQ_FIRST_ACQUIRE  1
 
+// Show hottest locks every this many locks or lock_irqsaves
+// 0 indicates this should not be shown
+#define HOT_LOCK_INTERVAL        100000
+
+// How often the long lock timer checker should run
+// And how long a lock needs to be held before it complains
+// set to zero to deactivate
+#define LONG_LOCK_INTERVAL_MS    10
+#define LONG_LOCK_HELD_MS        20
+
 //
 // Whether lockcheck should lock its own data structures during an
 // event (alloc, dealloc, lock, unlock, etc) and the subsequent
@@ -70,9 +88,12 @@ typedef struct {
   void *lastirqlocker[STEP_BACK_DEPTH];
                       // who last locked
   unsigned long lastlockflags; // their flags
+  unsigned long locktime; 
+                      // when it was locked
   void *lastirqunlocker[STEP_BACK_DEPTH]
                     ; // who last unlocked
   unsigned long lastunlockflags; // their flags
+  int   hotlockcount; // how many times it's been locked
 } lockcheck_state_t;
 
 
@@ -80,9 +101,15 @@ typedef struct {
 // allocation of entries in the global state
 static spinlock_t mylock; 
 static lockcheck_state_t state[NUM_LOCKS];
+static lockcheck_state_t *sorted_state[NUM_LOCKS];
 
 static int numout=0;
 
+static int globallockcount=0;
+
+static struct timer_list  long_lock_timer;
+
+
 #define DEBUG_OUTPUT(fmt, args...)                                     \
 do {                                                                   \
   numout++;                                                            \
@@ -124,7 +151,6 @@ do {                                                                        \
 
 static void printlock(char *prefix, lockcheck_state_t *l);
 
-
 typedef struct {
   u32 top;               // next open slot 0..
   void *lock[LOCK_STACK_DEPTH]; // the stack
@@ -282,10 +308,116 @@ static void lock_stack_unlock(void *lock, char irq, unsigned long flags)
 
 }
 
+
+// pointers are to the pointers in the sorted_state array
+int compare(const void *a, const void *b)
+{
+  lockcheck_state_t *l = *((lockcheck_state_t **)a);
+  lockcheck_state_t *r = *((lockcheck_state_t **)b);
+
+  return -(l->hotlockcount - r->hotlockcount);
+}
+
+static void hot_lock_show(void)
+{
+  int n, i;
+  char buf[64];
+
+  n=0;
+  for (i=0;i<NUM_LOCKS;i++) { 
+    if (state[i].inuse) { 
+      sorted_state[n]=&(state[i]);
+      n++;
+    }
+  }
+
+  sort(sorted_state,n,sizeof(lockcheck_state_t *),compare,NULL);
+  
+  for (i=0;i<n;i++) {
+    snprintf(buf,64,"HOT LOCK (%d of %d) %d acquires", i,n,sorted_state[i]->hotlockcount);
+    printlock(buf,sorted_state[i]);
+  }
+}
+
+
+static void hot_lock_lock(void *lock)
+{ 
+  lockcheck_state_t *l = find_lock_entry(lock);
+   
+  if (!l) { return; }
+
+  l->hotlockcount++;
+  globallockcount++;
+
+  if (HOT_LOCK_INTERVAL && !(globallockcount % HOT_LOCK_INTERVAL )) {
+    DEBUG_OUTPUT("LOCKCHECK: Hot locks after %d acquires Follow\n",globallockcount);
+    hot_lock_show();
+  }
+}
+
+
+#define hot_lock_unlock(X) // nothing for now
+
+
+void long_lock_callback(unsigned long ignored)
+{
+  // See if any lock has been held for too long
+  // then print all the locks if it has
+
+  // Note that this function does not lock because the 
+  // idea is to try to catch a deadlock, which might
+  // imply that we are stalled waiting on our lock already
+  
+  int i;
+  int have=0;
+  lockcheck_state_t *l;
+  char buf[64];
+  unsigned long now;
+
+
+  now = jiffies;
+
+  for (i=0;i<NUM_LOCKS;i++) { 
+    l=&(state[i]);
+    if (l->inuse && (l->irqcount>0 || l->lockcount>0)) {
+      if (jiffies_to_msecs(now-l->locktime)>LONG_LOCK_HELD_MS) { 
+       have++;
+      }
+    }
+  }
+  
+  if (have>0) { 
+    DEBUG_OUTPUT("LOCKCHECK: LONG LOCK: %d locks have been held longer than %u ms - Dump of all held locks Follows\n",have,LONG_LOCK_HELD_MS);
+    have=0;
+    for (i=0;i<NUM_LOCKS;i++) { 
+      l=&(state[i]);
+      if (l->inuse && (l->irqcount>0 || l->lockcount>0)) { 
+       snprintf(buf,64,"LONG LOCK (%d - %s)",
+                have,
+                jiffies_to_msecs(now-l->locktime)>LONG_LOCK_HELD_MS ? "**OLD**" : "ok");
+       printlock(buf,l);
+       have++;
+      }
+    }
+  }
+  
+  // reset the timer to run again
+  
+  mod_timer(&long_lock_timer,jiffies+msecs_to_jiffies(LONG_LOCK_INTERVAL_MS));
+
+}
+
+
 void palacios_lockcheck_init()
 {
   memset(state,0,sizeof(lockcheck_state_t)*NUM_LOCKS);
   spin_lock_init(&mylock);
+
+#if LONG_LOCK_INTERVAL_MS>0
+  setup_timer(&long_lock_timer,long_lock_callback,0);
+  mod_timer(&long_lock_timer, jiffies + msecs_to_jiffies(LONG_LOCK_INTERVAL_MS));
+#endif
+
   DEBUG_OUTPUT("LOCKCHECK: LOCK CHECKING INITED (internal locking: %d, output limit: %d, output skip: %d)\n",LOCK_SELF,OUTPUT_LIMIT,OUTPUT_SKIP);
 }
 
@@ -333,8 +465,9 @@ static void printlock(char *prefix, lockcheck_state_t *l)
          backtrace_format
          ", lastlockflags=%lu, lastirqunlocker="
          backtrace_format
-         ", lastunlockflags=%lu\n",
-         prefix,l->lock,
+         ", lastunlockflags=%lu"
+         ", lockage=%ums\n",
+         prefix,l->lock,
          l->holder ? ((struct task_struct *)(l->holder))->comm : "none",
          l->holdingcpu,
          backtrace_expand(l->allocator),
@@ -345,12 +478,16 @@ static void printlock(char *prefix, lockcheck_state_t *l)
          backtrace_expand(l->lastirqlocker),
          l->lastlockflags,
          backtrace_expand(l->lastirqunlocker),
-         l->lastunlockflags);
+          l->lastunlockflags,
+         (l->lockcount>0) || (l->irqcount>0) ? jiffies_to_msecs(jiffies-l->locktime) : 0);
   }
 }
 
 
 
+
+
+
 static void find_multiple_locks_held(void)
 {
   int i;
@@ -434,6 +571,9 @@ void palacios_lockcheck_deinit()
       }
     }
   } 
+#if LONG_LOCK_INTERVAL_MS>0
+  del_timer(&long_lock_timer);
+#endif
   UNLOCK();
   INFO("LOCKCHECK: DEINITED\n");
 }
@@ -528,12 +668,15 @@ void palacios_lockcheck_lock(void *lock)
   l->lockcount++;
   l->holder=current;
   l->holdingcpu=get_cpu(); put_cpu();
+  l->locktime=jiffies;
   backtrace(l->lastlocker);
 
   find_multiple_locks_held();
 
   lock_stack_lock(lock,0,0);
 
+  hot_lock_lock(lock);
+
 #if PRINT_LOCK_LOCK
   printlock("LOCK",l);
 #endif
@@ -567,6 +710,8 @@ void palacios_lockcheck_unlock(void *lock)
   }
 
   lock_stack_unlock(lock,0,0);
+
+  hot_lock_unlock(lock);
   
   l->holder=0;
   l->holdingcpu=0;
@@ -610,12 +755,15 @@ void palacios_lockcheck_lock_irqsave(void *lock,unsigned long flags)
   l->holder=current;
   l->holdingcpu=get_cpu(); put_cpu();
   l->lastlockflags=flags;
+  l->locktime=jiffies;
   backtrace(l->lastirqlocker);
 
   find_multiple_irqs_held();
 
   lock_stack_lock(lock,1,flags);
 
+  hot_lock_lock(lock);
+
 #if PRINT_LOCK_LOCK
   printlock("LOCK_IRQSAVE",l);
 #endif
@@ -688,6 +836,8 @@ void palacios_lockcheck_unlock_irqrestore_post(void *lock,unsigned long flags)
 
   lock_stack_unlock(lock,1,flags);
 
+  hot_lock_unlock(lock);
+
   backtrace(l->lastirqunlocker);
 
 #if PRINT_LOCK_UNLOCK