Index: arch/i386/kernel/ldt.c
===================================================================
RCS file: /var/cvs/linux/arch/i386/kernel/ldt.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 ldt.c
--- ldt.c	1999/01/18 01:28:57	1.1.1.1
+++ linux/arch/i386/kernel/ldt.c	1999/01/27 03:01:36
@@ -83,7 +83,7 @@
 			set_ldt_desc(i, ldt, LDT_ENTRIES);
 			current->tss.ldt = _LDT(i);
 			load_ldt(i);
-			if (atomic_read(&mm->count) > 1)
+			if (mm->count > 1)
 				printk(KERN_WARNING
 					"LDT allocated for cloned task!\n");
 		} else {
Index: fs/binfmt_elf.c
===================================================================
RCS file: /var/cvs/linux/fs/binfmt_elf.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 binfmt_elf.c
--- binfmt_elf.c	1999/01/18 01:26:49	1.1.1.1
+++ linux/fs/binfmt_elf.c	1999/01/27 03:01:36
@@ -1067,7 +1067,7 @@
 
 	if (!current->dumpable ||
 	    limit < ELF_EXEC_PAGESIZE ||
-	    atomic_read(&current->mm->count) != 1)
+	    current->mm->count != 1)
 		return 0;
 	current->dumpable = 0;
 
Index: fs/exec.c
===================================================================
RCS file: /var/cvs/linux/fs/exec.c,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 exec.c
--- exec.c	1999/01/23 16:28:07	1.1.1.2
+++ linux/fs/exec.c	1999/01/27 11:14:17
@@ -378,7 +378,7 @@
 	struct mm_struct * mm, * old_mm;
 	int retval, nr;
 
-	if (atomic_read(&current->mm->count) == 1) {
+	if (current->mm->count == 1) {
 		flush_cache_mm(current->mm);
 		mm_release();
 		release_segments(current->mm);
@@ -409,7 +409,9 @@
 	activate_context(current);
 	up(&mm->mmap_sem);
 	mm_release();
+	spin_lock(&mm_lock);
 	mmput(old_mm);
+	spin_unlock(&mm_lock);
 	return 0;
 
 	/*
Index: fs/proc/array.c
===================================================================
RCS file: /var/cvs/linux/fs/proc/array.c,v
retrieving revision 1.1.1.4
diff -u -r1.1.1.4 array.c
--- array.c	1999/01/26 18:30:44	1.1.1.4
+++ linux/fs/proc/array.c	1999/01/27 11:18:17
@@ -43,6 +43,8 @@
  *			<Alan....@linux.org>
  *
  * Andi Kleen	     :  Race Fixes. 	
+ * 
+ * Andrea Arcangeli  :  do_exit/mmput/mmget race fix on the Race Fixes ;). 
  *
  */
 
@@ -399,8 +401,11 @@
 
 	read_lock(&tasklist_lock);
 	tsk = find_task_by_pid(pid);
+	spin_lock(&mm_lock);
 	if (tsk && tsk->mm && tsk->mm != &init_mm)
-		mmget(mm = tsk->mm);
+		if (!mmget(mm = tsk->mm))
+			mm = NULL;
+	spin_unlock(&mm_lock);
 	read_unlock(&tasklist_lock);
 	if (mm != NULL)
 		down(&mm->mmap_sem);
@@ -410,7 +415,9 @@
 static void release_mm(struct mm_struct *mm)
 {
 	up(&mm->mmap_sem);
+	spin_lock(&mm_lock);
 	mmput(mm);
+	spin_unlock(&mm_lock);
 }
 
 static unsigned long get_phys_addr(struct mm_struct *mm, unsigned long ptr)
@@ -504,17 +511,9 @@
 	return res;
 }
 
-/*
- * These bracket the sleeping functions..
- */
-extern void scheduling_functions_start_here(void);
-extern void scheduling_functions_end_here(void);
-#define first_sched	((unsigned long) scheduling_functions_start_here)
-#define last_sched	((unsigned long) scheduling_functions_end_here)
-
 static unsigned long get_wchan(struct task_struct *p)
 {
-	if (!p || p == current || p->state == TASK_RUNNING)
+	if (!p || real_tsk(p) == current || p->state == TASK_RUNNING)
 		return 0;
 #if defined(__i386__)
 	{
@@ -522,7 +521,7 @@
 		unsigned long stack_page;
 		int count = 0;
 
-		stack_page = (unsigned long)p;
+		stack_page = (unsigned long)real_tsk(p);
 		esp = p->tss.esp;
 		if (!stack_page || esp < stack_page || esp >= 8188+stack_page)
 			return 0;
@@ -564,7 +563,7 @@
 	    unsigned long stack_page;
 	    int count = 0;
 
-	    stack_page = (unsigned long)p;
+	    stack_page = (unsigned long)real_tsk(p);
 	    fp = ((struct switch_stack *)p->tss.ksp)->a6;
 	    do {
 		    if (fp < stack_page+sizeof(struct task_struct) ||
@@ -585,7 +584,7 @@
 		unsigned long stack_page;
 		int count = 0;
 
-		stack_page = 4096 + (unsigned long)p;
+		stack_page = 4096 + (unsigned long)real_tsk(p);
 		fp = get_css_fp (&p->tss);
 		do {
 			if (fp < stack_page || fp > 4092+stack_page)
@@ -599,7 +598,7 @@
 #elif defined (__sparc__)
 	{
 		unsigned long pc, fp, bias = 0;
-		unsigned long task_base = (unsigned long) p;
+		unsigned long task_base = (unsigned long) real_tsk(p);
 		struct reg_window *rw;
 		int count = 0;
 
@@ -624,8 +623,8 @@
 }
 
 #if defined(__i386__)
-# define KSTK_EIP(tsk)	(((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
-# define KSTK_ESP(tsk)	(((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+# define KSTK_EIP(tsk)	(((unsigned long *)(4096+(unsigned long)real_tsk(tsk)))[1019])
+# define KSTK_ESP(tsk)	(((unsigned long *)(4096+(unsigned long)real_tsk(tsk)))[1022])
 #elif defined(__alpha__)
   /*
    * See arch/alpha/kernel/ptrace.c for details.
@@ -633,11 +632,11 @@
 # define PT_REG(reg)		(PAGE_SIZE - sizeof(struct pt_regs)	\
 				 + (long)&((struct pt_regs *)0)->reg)
 # define KSTK_EIP(tsk) \
-    (*(unsigned long *)(PT_REG(pc) + PAGE_SIZE + (unsigned long)(tsk)))
+    (*(unsigned long *)(PT_REG(pc) + PAGE_SIZE + (unsigned long)real_tsk(tsk)))
 # define KSTK_ESP(tsk)	((tsk) == current ? rdusp() : (tsk)->tss.usp)
 #elif defined(CONFIG_ARM)
-# define KSTK_EIP(tsk)	(((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
-# define KSTK_ESP(tsk)	(((unsigned long *)(4096+(unsigned long)(tsk)))[1020])
+# define KSTK_EIP(tsk)	(((unsigned long *)(4096+(unsigned long)real_tsk(tsk)))[1022])
+# define KSTK_ESP(tsk)	(((unsigned long *)(4096+(unsigned long)real_tsk(tsk)))[1020])
 #elif defined(__mc68000__)
 #define	KSTK_EIP(tsk)	\
     ({			\
@@ -646,7 +645,7 @@
 	    MAP_NR((tsk)->tss.esp0) < max_mapnr) \
 	      eip = ((struct pt_regs *) (tsk)->tss.esp0)->pc;	 \
 	eip; })
-#define	KSTK_ESP(tsk)	((tsk) == current ? rdusp() : (tsk)->tss.usp)
+#define	KSTK_ESP(tsk)	(real_tsk(tsk) == current ? rdusp() : (tsk)->tss.usp)
 #elif defined(__powerpc__)
 #define KSTK_EIP(tsk)	((tsk)->tss.regs->nip)
 #define KSTK_ESP(tsk)	((tsk)->tss.regs->gpr[1])
@@ -849,14 +848,24 @@
 			    cap_t(p->cap_effective));
 }
 
-static struct task_struct *grab_task(int pid)
+static struct task_struct *grab_task(int pid, struct task_struct *dst)
 {
 	struct task_struct *tsk = current;
 	if (pid != tsk->pid) {
 		read_lock(&tasklist_lock);
 		tsk = find_task_by_pid(pid);
-		if (tsk && tsk->mm && tsk->mm != &init_mm)
-			mmget(tsk->mm);
+		if (tsk) {
+			spin_lock(&mm_lock);
+			if (tsk->mm && tsk->mm != &init_mm) 
+				if (!mmget(tsk->mm))
+					tsk = NULL;
+			spin_unlock(&mm_lock);
+			if (tsk)
+			{
+				memcpy(dst, tsk, sizeof(struct task_struct));
+				tsk = dst;
+			}
+		}
 		read_unlock(&tasklist_lock);
 	}	
 	return tsk;
@@ -864,16 +873,18 @@
 
 static void release_task(struct task_struct *tsk)
 {
+	spin_lock(&mm_lock);
 	if (tsk != current && tsk->mm && tsk->mm != &init_mm)
 		mmput(tsk->mm);
+	spin_unlock(&mm_lock);
 }
 
 static int get_status(int pid, char * buffer)
 {
 	char * orig = buffer;
-	struct task_struct *tsk;
+	struct task_struct *tsk, mytask;
 	
-	tsk = grab_task(pid);
+	tsk = grab_task(pid, &mytask); 
 	if (!tsk)
 		return 0;
 	buffer = task_name(tsk, buffer);
@@ -887,7 +898,7 @@
 
 static int get_stat(int pid, char * buffer)
 {
-	struct task_struct *tsk;
+	struct task_struct *tsk, mytask;
 	unsigned long vsize, eip, esp, wchan;
 	long priority, nice;
 	int tty_pgrp;
@@ -895,7 +906,7 @@
 	char state;
 	int res;
 
-	tsk = grab_task(pid);
+	tsk = grab_task(pid, &mytask);  
 	if (!tsk)
 		return 0;
 	state = *get_task_state(tsk);
@@ -1149,7 +1160,7 @@
 		goto getlen_out;
 
 	/* Check whether the mmaps could change if we sleep */
-	volatile_task = (p != current || atomic_read(&p->mm->count) > 1);
+	volatile_task = (p != current || p->mm->count > 1);
 
 	/* decode f_pos */
 	lineno = *ppos >> MAPS_LINE_SHIFT;
@@ -1250,10 +1261,10 @@
 #ifdef __SMP__
 static int get_pidcpu(int pid, char * buffer)
 {
-	struct task_struct * tsk;
+	struct task_struct * tsk, mytask;
 	int i, len;
 
-	tsk = grab_task(pid);
+	tsk = grab_task(pid, &mytask);
 	if (!tsk)
 		return 0;
 
Index: include/asm-i386/pgtable.h
===================================================================
RCS file: /var/cvs/linux/include/asm-i386/pgtable.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 pgtable.h
--- pgtable.h	1999/01/18 01:27:15	1.1.1.1
+++ linux/include/asm-i386/pgtable.h	1999/01/27 03:01:36
@@ -101,7 +101,7 @@
 static inline void flush_tlb_current_task(void)
 {
 	/* just one copy of this mm? */
-	if (atomic_read(&current->mm->count) == 1)
+	if (current->mm->count == 1)
 		local_flush_tlb();	/* and that's us, so.. */
 	else
 		smp_flush_tlb();
@@ -113,7 +113,7 @@
 
 static inline void flush_tlb_mm(struct mm_struct * mm)
 {
-	if (mm == current->mm && atomic_read(&mm->count) == 1)
+	if (mm == current->mm && mm->count == 1)
 		local_flush_tlb();
 	else
 		smp_flush_tlb();
@@ -122,7 +122,7 @@
 static inline void flush_tlb_page(struct vm_area_struct * vma,
 	unsigned long va)
 {
-	if (vma->vm_mm == current->mm && atomic_read(&current->mm->count) == 1)
+	if (vma->vm_mm == current->mm && current->mm->count == 1)
 		__flush_tlb_one(va);
 	else
 		smp_flush_tlb();
Index: include/linux/sched.h
===================================================================
RCS file: /var/cvs/linux/include/linux/sched.h,v
retrieving revision 1.1.1.3
diff -u -r1.1.1.3 sched.h
--- sched.h	1999/01/23 16:29:58	1.1.1.3
+++ linux/include/linux/sched.h	1999/01/27 11:18:42
@@ -114,6 +114,15 @@
 extern rwlock_t tasklist_lock;
 extern spinlock_t scheduler_lock;
 
+/*
+ * These bracket the sleeping functions..
+ */
+extern void scheduling_functions_start_here(void);
+extern void scheduling_functions_end_here(void);
+#define first_sched	((unsigned long) scheduling_functions_start_here)
+#define last_sched	((unsigned long) scheduling_functions_end_here)
+#define real_tsk(tsk)	(*(tsk)->tarray_ptr)
+
 extern void sched_init(void);
 extern void show_state(void);
 extern void trap_init(void);
@@ -164,7 +173,7 @@
 	struct vm_area_struct *mmap_avl;	/* tree of VMAs */
 	struct vm_area_struct *mmap_cache;	/* last find_vma result */
 	pgd_t * pgd;
-	atomic_t count;
+	int count;
 	int map_count;				/* number of VMAs */
 	struct semaphore mmap_sem;
 	unsigned long context;
@@ -184,7 +193,7 @@
 #define INIT_MM {					\
 		&init_mmap, NULL, NULL,			\
 		swapper_pg_dir, 			\
-		ATOMIC_INIT(1), 1,			\
+		1, 1,					\
 		MUTEX,					\
 		0,					\
 		0, 0, 0, 0,				\
@@ -391,6 +400,7 @@
 
 extern struct task_struct **tarray_freelist;
 extern spinlock_t taskslot_lock;
+extern spinlock_t mm_lock;
 
 extern __inline__ void add_free_taskslot(struct task_struct **t)
 {
@@ -609,11 +619,8 @@
  * Routines for handling mm_structs
  */
 extern struct mm_struct * mm_alloc(void);
-static inline void mmget(struct mm_struct * mm)
-{
-	atomic_inc(&mm->count);
-}
-extern void mmput(struct mm_struct *);
+extern int FASTCALL(mmget(struct mm_struct *));
+extern void FASTCALL(mmput(struct mm_struct *));
 /* Remove the current tasks stale references to the old mm_struct */
 extern void mm_release(void);
 
Index: kernel/exit.c
===================================================================
RCS file: /var/cvs/linux/kernel/exit.c,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 exit.c
--- exit.c	1999/01/23 16:30:27	1.1.1.2
+++ linux/kernel/exit.c	1999/01/27 11:19:46
@@ -248,8 +248,10 @@
 
 static inline void __exit_mm(struct task_struct * tsk)
 {
-	struct mm_struct * mm = tsk->mm;
+	struct mm_struct * mm;
 
+	spin_lock(&mm_lock);
+	mm = tsk->mm;
 	/* Set us up to use the kernel mm state */
 	if (mm != &init_mm) {
 		flush_cache_mm(mm);
@@ -261,6 +263,7 @@
 		mm_release();
 		mmput(mm);
 	}
+	spin_unlock(&mm_lock);
 }
 
 void exit_mm(struct task_struct *tsk)
Index: kernel/fork.c
===================================================================
RCS file: /var/cvs/linux/kernel/fork.c,v
retrieving revision 1.1.1.3
diff -u -r1.1.1.3 fork.c
--- fork.c	1999/01/23 16:30:27	1.1.1.3
+++ linux/kernel/fork.c	1999/01/27 11:20:02
@@ -10,6 +10,11 @@
  * Fork is rather simple, once you get the hang of it, but the memory
  * management can be a bitch. See 'mm/mm.c': 'copy_page_tables()'
  */
+/*
+ * Fixed a race between mmget() and mmput(): added the mm_lock spinlock in
+ * the task_struct to serialize accesses to the tsk->mm field.
+ * Copyright (C) 1999  Andrea Arcangeli
+ */
 
 #include <linux/malloc.h>
 #include <linux/init.h>
@@ -39,6 +44,7 @@
 
 struct task_struct **tarray_freelist = NULL;
 spinlock_t taskslot_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t mm_lock = SPIN_LOCK_UNLOCKED;
 
 /* UID task count cache, to prevent walking entire process list every
  * single fork() operation.
@@ -261,7 +267,7 @@
 	if (mm) {
 		*mm = *current->mm;
 		init_new_context(mm);
-		atomic_set(&mm->count, 1);
+		mm->count = 1;
 		mm->map_count = 0;
 		mm->def_flags = 0;
 		mm->mmap_sem = MUTEX_LOCKED;
@@ -303,12 +309,25 @@
 	}
 }
 
+int mmget(struct mm_struct * mm)
+{
+	int retval = 0;
+
+	if (mm->count)
+	{
+		mm->count++;
+		retval = 1;
+	}
+
+	return retval;
+}
+
 /*
  * Decrement the use count and release all resources for an mm.
  */
 void mmput(struct mm_struct *mm)
 {
-	if (atomic_dec_and_test(&mm->count)) {
+	if (!--mm->count) {
 		release_segments(mm);
 		exit_mmap(mm);
 		free_page_tables(mm);
@@ -322,7 +341,9 @@
 	int retval;
 
 	if (clone_flags & CLONE_VM) {
+		spin_lock(&mm_lock);
 		mmget(current->mm);
+		spin_unlock(&mm_lock);
 		/*
 		 * Set up the LDT descriptor for the clone task.
 		 */