diff -u --recursive --new-file penguin/linux/include/asm-i386/semaphore.h linux/include/asm-i386/semaphore.h
--- penguin/linux/include/asm-i386/semaphore.h	Fri Jan  1 11:56:20 1999
+++ linux/include/asm-i386/semaphore.h	Sat Jan  9 13:37:29 1999
@@ -25,12 +25,23 @@
 
 struct semaphore {
 	atomic_t count;
+	unsigned long owner;
 	int waking;
 	struct wait_queue * wait;
 };
 
-#define MUTEX ((struct semaphore) { ATOMIC_INIT(1), 0, NULL })
-#define MUTEX_LOCKED ((struct semaphore) { ATOMIC_INIT(0), 0, NULL })
+/*
+ * Because we want the non-contention case to be
+ * fast, we save the stack pointer into the "owner"
+ * field, and to get the true task pointer we have
+ * to do the bit masking. That moves the masking
+ * operation into the slow path.
+ */
+#define semaphore_owner(sem) \
+	((struct task_struct *)((2*PAGE_MASK) & (sem)->owner))
+
+#define MUTEX ((struct semaphore) { ATOMIC_INIT(1), 0, 0, NULL })
+#define MUTEX_LOCKED ((struct semaphore) { ATOMIC_INIT(0), 0, 0, NULL })
 
 asmlinkage void __down_failed(void /* special register calling convention */);
 asmlinkage int  __down_failed_interruptible(void  /* params in registers */);
@@ -64,13 +75,14 @@
 	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
 }
 
-static inline int waking_non_zero(struct semaphore *sem)
+static inline int waking_non_zero(struct semaphore *sem, struct task_struct *tsk)
 {
 	unsigned long flags;
 	int ret = 0;
 
 	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->waking > 0) {
+	if (sem->waking > 0 || semaphore_owner(sem) == tsk) {
+		sem->owner = (unsigned long) tsk;
 		sem->waking--;
 		ret = 1;
 	}
@@ -91,7 +103,8 @@
 		"lock ; "
 #endif
 		"decl 0(%0)\n\t"
-		"js 2f\n"
+		"js 2f\n\t"
+		"movl %%esp,4(%0)\n"
 		"1:\n"
 		".section .text.lock,\"ax\"\n"
 		"2:\tpushl $1b\n\t"
@@ -113,6 +126,7 @@
 #endif
 		"decl 0(%1)\n\t"
 		"js 2f\n\t"
+		"movl %%esp,4(%1)\n\t"
 		"xorl %0,%0\n"
 		"1:\n"
 		".section .text.lock,\"ax\"\n"
diff -u --recursive --new-file penguin/linux/kernel/sched.c linux/kernel/sched.c
--- penguin/linux/kernel/sched.c	Mon Jan  4 23:15:49 1999
+++ linux/kernel/sched.c	Sat Jan  9 13:37:16 1999
@@ -883,7 +883,7 @@
 	 * who gets to gate through and who has to wait some more.	 \
 	 */								 \
 	for (;;) {							 \
-		if (waking_non_zero(sem))	/* are we waking up?  */ \
+		if (waking_non_zero(sem, tsk))	/* are we waking up?  */ \
 			break;			/* yes, exit loop */
 
 #define DOWN_TAIL(task_state)			\