diff -urN 2.4.15pre6/include/linux/mmzone.h google/include/linux/mmzone.h
--- 2.4.15pre6/include/linux/mmzone.h	Tue Nov 13 05:18:58 2001
+++ google/include/linux/mmzone.h	Mon Nov 19 18:19:00 2001
@@ -18,6 +18,11 @@
 #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
 #endif
 
+#define ZONE_DMA		0
+#define ZONE_NORMAL		1
+#define ZONE_HIGHMEM		2
+#define MAX_NR_ZONES		3
+
 typedef struct free_area_struct {
 	struct list_head	free_list;
 	unsigned long		*map;
@@ -25,6 +30,10 @@
 
 struct pglist_data;
 
+typedef struct zone_watermarks_s {
+	unsigned long min, low, high;
+} zone_watermarks_t;
+
 /*
  * On machines where it is needed (eg PCs) we divide physical memory
  * into multiple physical zones. On a PC we have 3 zones:
@@ -39,8 +48,17 @@
 	 */
 	spinlock_t		lock;
 	unsigned long		free_pages;
-	unsigned long		pages_min, pages_low, pages_high;
-	int			need_balance;
+
+	/*
+	 * We don't know if the memory that we're going to allocate will be freeable
+	 * or/and it will be released eventually, so to avoid totally wasting several
+	 * GB of ram we must reserve some of the lower zone memory (otherwise we risk
+	 * to run OOM on the lower zones despite there's tons of freeable ram
+	 * on the higher zones).
+	 */
+	zone_watermarks_t	watermarks[MAX_NR_ZONES];
+
+	unsigned long		need_balance;
 
 	/*
 	 * free areas of different sizes
@@ -60,6 +78,7 @@
 	 */
 	char			*name;
 	unsigned long		size;
+	unsigned long		realsize;
 } zone_t;
 
 #define ZONE_DMA		0
@@ -113,8 +132,8 @@
 extern int numnodes;
 extern pg_data_t *pgdat_list;
 
-#define memclass(pgzone, classzone)	(((pgzone)->zone_pgdat == (classzone)->zone_pgdat) \
-			&& ((pgzone) <= (classzone)))
+#define zone_idx(zone)			((zone) - (zone)->zone_pgdat->node_zones)
+#define memclass(pgzone, classzone)	(zone_idx(pgzone) <= zone_idx(classzone))
 
 /*
  * The following two are not meant for general usage. They are here as
diff -urN 2.4.15pre6/mm/page_alloc.c google/mm/page_alloc.c
--- 2.4.15pre6/mm/page_alloc.c	Sun Nov 18 06:04:47 2001
+++ google/mm/page_alloc.c	Mon Nov 19 18:09:15 2001
@@ -27,9 +27,10 @@
 pg_data_t *pgdat_list;
 
 static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
-static int zone_balance_ratio[MAX_NR_ZONES] __initdata = { 32, 128, 128, };
+static int zone_balance_ratio[MAX_NR_ZONES] __initdata = { 128, 128, 128, };
 static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, };
 static int zone_balance_max[MAX_NR_ZONES] __initdata = { 255 , 255, 255, };
+static int lower_zone_reserve_ratio[MAX_NR_ZONES-1] = { 128, 8 };
 
 /*
  * Free_page() adds the page to the free lists. This is optimized for
@@ -312,16 +313,18 @@
 {
 	zone_t **zone, * classzone;
 	struct page * page;
-	int freed;
+	int freed, class_idx;
 
 	zone = zonelist->zones;
 	classzone = *zone;
+	class_idx = zone_idx(classzone);
+
 	for (;;) {
 		zone_t *z = *(zone++);
 		if (!z)
 			break;
 
-		if (zone_free_pages(z, order) > z->pages_low) {
+		if (zone_free_pages(z, order) > z->watermarks[class_idx].low) {
 			page = rmqueue(z, order);
 			if (page)
 				return page;
@@ -340,7 +343,7 @@
 		if (!z)
 			break;
 
-		min = z->pages_min;
+		min = z->watermarks[class_idx].min;
 		if (!(gfp_mask & __GFP_WAIT))
 			min >>= 2;
 		if (zone_free_pages(z, order) > min) {
@@ -353,7 +356,7 @@
 	/* here we're in the low on memory slow path */
 
 rebalance:
-	if (current->flags & (PF_MEMALLOC | PF_MEMDIE)) {
+	if (current->flags & (PF_MEMALLOC | PF_MEMDIE) && !in_interrupt()) {
 		zone = zonelist->zones;
 		for (;;) {
 			zone_t *z = *(zone++);
@@ -381,7 +384,7 @@
 		if (!z)
 			break;
 
-		if (zone_free_pages(z, order) > z->pages_min) {
+		if (zone_free_pages(z, order) > z->watermarks[class_idx].min) {
 			page = rmqueue(z, order);
 			if (page)
 				return page;
@@ -473,13 +476,15 @@
 	unsigned int sum = 0;
 
 	do {
+		int class_idx;
 		zonelist_t *zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK);
 		zone_t **zonep = zonelist->zones;
 		zone_t *zone;
 
+		class_idx = zone_idx(*zonep);
 		for (zone = *zonep++; zone; zone = *zonep++) {
 			unsigned long size = zone->size;
-			unsigned long high = zone->pages_high;
+			unsigned long high = zone->watermarks[class_idx].high;
 			if (size > high)
 				sum += size - high;
 		}
@@ -525,13 +530,9 @@
 		zone_t *zone;
 		for (zone = tmpdat->node_zones;
 			       	zone < tmpdat->node_zones + MAX_NR_ZONES; zone++)
-			printk("Zone:%s freepages:%6lukB min:%6lukB low:%6lukB " 
-				       "high:%6lukB\n", 
+			printk("Zone:%s freepages:%6lukB\n", 
 					zone->name,
-					K(zone->free_pages),
-					K(zone->pages_min),
-					K(zone->pages_low),
-					K(zone->pages_high));
+					K(zone->free_pages));
 			
 		tmpdat = tmpdat->node_next;
 	}
@@ -697,6 +698,7 @@
 		zone_t *zone = pgdat->node_zones + j;
 		unsigned long mask;
 		unsigned long size, realsize;
+		int idx;
 
 		realsize = size = zones_size[j];
 		if (zholes_size)
@@ -704,6 +706,7 @@
 
 		printk("zone(%lu): %lu pages.\n", j, size);
 		zone->size = size;
+		zone->realsize = realsize;
 		zone->name = zone_names[j];
 		zone->lock = SPIN_LOCK_UNLOCKED;
 		zone->zone_pgdat = pgdat;
@@ -719,9 +722,29 @@
 			mask = zone_balance_min[j];
 		else if (mask > zone_balance_max[j])
 			mask = zone_balance_max[j];
-		zone->pages_min = mask;
-		zone->pages_low = mask*2;
-		zone->pages_high = mask*3;
+		zone->watermarks[j].min = mask;
+		zone->watermarks[j].low = mask*2;
+		zone->watermarks[j].high = mask*3;
+		/* now set the watermarks of the lower zones in the "j" classzone */
+		for (idx = j-1; idx >= 0; idx--) {
+			zone_t * lower_zone = pgdat->node_zones + idx;
+			unsigned long lower_zone_reserve;
+			if (!lower_zone->size)
+				continue;
+
+			mask = lower_zone->watermarks[idx].min;
+			lower_zone->watermarks[j].min = mask;
+			lower_zone->watermarks[j].low = mask*2;
+			lower_zone->watermarks[j].high = mask*3;
+
+			/* now the brainer part */
+			lower_zone_reserve = realsize / lower_zone_reserve_ratio[idx];
+			lower_zone->watermarks[j].min += lower_zone_reserve;
+			lower_zone->watermarks[j].low += lower_zone_reserve;
+			lower_zone->watermarks[j].high += lower_zone_reserve;
+
+			realsize += lower_zone->realsize;
+		}
 
 		zone->zone_mem_map = mem_map + offset;
 		zone->zone_start_mapnr = offset;
@@ -797,3 +820,16 @@
 }
 
 __setup("memfrac=", setup_mem_frac);
+
+static int __init setup_lower_zone_reserve(char *str)
+{
+	int j = 0;
+
+	while (get_option(&str, &lower_zone_reserve_ratio[j++]) == 2);
+	printk("setup_lower_zone_reserve: ");
+	for (j = 0; j < MAX_NR_ZONES-1; j++) printk("%d  ", lower_zone_reserve_ratio[j]);
+	printk("\n");
+	return 1;
+}
+
+__setup("lower_zone_reserve=", setup_lower_zone_reserve);
diff -urN 2.4.15pre6/mm/vmscan.c google/mm/vmscan.c
--- 2.4.15pre6/mm/vmscan.c	Sun Nov 18 06:04:47 2001
+++ google/mm/vmscan.c	Mon Nov 19 18:10:19 2001
@@ -606,11 +606,12 @@
 
 static int check_classzone_need_balance(zone_t * classzone)
 {
-	zone_t * first_classzone;
+	zone_t * first_zone;
+	int class_idx = zone_idx(classzone);
 
-	first_classzone = classzone->zone_pgdat->node_zones;
-	while (classzone >= first_classzone) {
-		if (classzone->free_pages > classzone->pages_high)
+	first_zone = classzone->zone_pgdat->node_zones;
+	while (classzone >= first_zone) {
+		if (classzone->free_pages > classzone->watermarks[class_idx].high)
 			return 0;
 		classzone--;
 	}