--cut here--------------------------------------------------------------- /* * * fsx is a File System eXerciser. It is a benchmark that attempts to * quantify the performance of several filesystem operations that have been * observed to be bottlenecks in I/O-intensive applications, specifically * the text database work done in connection with the New Oxford English * Dictionary Project at the University of Waterloo. * * It performs a series of tests on a file of known size. By default, that * size is 50 Mb. For each test, fsx reports the bytes processed per * elapsed second, per CPU second, and the % CPU usage (user and system). * * In each case, some care is taken to keep optimizers, no matter how * clever, from noticing it's all bogus. The idea is to make sure that * these are real transfers from user space to the physical disk. The * tests are: * * 1. Sequential Output * * 1.1 Per-Character. The file is written using the putc() stdio macro. * The loop that does the writing should be small enough to fit into any * reasonable I-cache. The CPU exercised here is that required to do the * stdio code plus the OS file space allocation. * * 1.2 Block. The file is created using write(2). The CPU consumption * should be just the OS file space allocation. * * 1.3 Rewrite. Each BUFSIZ of the file is read with read(2), dirtied, and * rewritten with write(2), requiring an lseek(2). Since no space * allocation is done, and the I/O is well-localized, this should test the * effectiveness of the filesystem cache and the speed of data transfer. * * 2. Sequential Input * * 2.1 Per-Character. The file is read using the getc() stdio macro. Once * again, the inner loop is small. This should exercise only stdio and * sequential input. * * 2.2 Block. The file is read using read(2). This should be a very pure * test of sequential input performance. * * 3. Random Seeks * * This test seeks 1000 times to locations in the file specified by * random() in bsd systems, drand48() on sysV systems. In each case, the * block is read with read(2). In 10% of cases, it is dirtied and written * back with write(2). * * AXIOM: For any unix filesystem, the effective number of lseek(2) calls * per second declines asymptotically to near 30, once the effect of * caching is defeated. * * The size of the file has a strong nonlinear effect on the results of * this test. Many Unix systems that have the memory available will make * aggressive efforts to cache the whole thing, and report random I/O rates * in the thousands per second, which is ridiculous. As an extreme * example, an IBM RISC 6000 with 64 Mb of memory reported 3,722 per second * on a 50 Mb file. * * COPYRIGHT NOTICE: * Copyright (c) Tim Bray, 1990. * Everybody is hereby granted rights to use, copy, and modify this program, * provided only that the copyright notice above and the disclaimer below * are preserved without change. * DISCLAIMER: * This program is provided AS IS with no warranty of any kind, and * The author makes no representation with respect to the adequacy of this * program for any particular purpose or with respect to its adequacy to * produce any particular result, and * The author shall not be liable for loss or damage arising out of * the use of this program regardless of how sustained, and * In no event shall the author be liable for special, direct, indirect * or consequential damage, loss, costs or fees or expenses of any * nature or kind. */ #include <stdio.h> #include <fcntl.h> #include <sys/types.h> #include <sys/time.h> #ifdef i386 #include <limits.h> #include <sys/times.h> #else #include <sys/resource.h> #endif #define IntSize (4) #define Elapsed (0) #define CPU (1) #define Searches (1000) #define UpdateSeek (10) static double cpu_so_far(); static void doseek(); static void get_delta_t(); static void io_error(); static void newfile(); static void report(); static double time_so_far(); static void timestamp(); static void usage(); #ifdef i386 static long random(); #endif typedef enum { Putc, ReWrite, FastWrite, Getc, FastRead, Binary, TestCount } tests_t; static int basetime; static double delta[(int) TestCount][2]; static char * myname; static double last_cpustamp = 0.0; static double last_timestamp = 0.0; static int lseek_count = 0; main(argc, argv) int argc; char * argv[]; { int buf[BUFSIZ / IntSize]; int bufindex; int chars[256]; char * dir; int fd; char name[BUFSIZ]; int next; int size; FILE * stream; int words; myname = argv[0]; fd = -1; basetime = (int) time((time_t *) NULL); size = 50; dir = "."; for (next = 1; next < argc - 1; next++) if (argv[next][0] == '-') { /* option? */ if (strcmp(argv[next] + 1, "d") == 0) dir = argv[next + 1]; else if (strcmp(argv[next] + 1, "s") == 0) size = atoi(argv[next + 1]); else usage(); next++; } /* option? */ else usage(); if (size < 1) usage(); size *= (1024 * 1024); sprintf(name, "%s/fsx.%d", dir, getpid()); printf("File '%s', size: %d\n", name, size); fflush(stdout); /* * Fill up a file, writing it a char at a time with the stdio putc() * facility. This models the activity of a negligible-CPU filter * and exercises kernel file space allocation. Should be measured with * disks of varying degrees of fullness. */ printf("Writing with putc()..."); newfile(name, &fd, &stream, 1); fflush(stdout); timestamp(); for (words = 0; words < size; words++) if (putc(words & 0x7f, stream) == EOF) io_error("putc"); /* * note that we always close the file before measuring time, in an * effort to force as much of the I/O out as we can */ if (fclose(stream) == -1) io_error("fclose after putc"); get_delta_t((int) Putc); printf("done\n"); fflush(stdout); /* * Now read & rewrite it, a block at a time. Change one word in each block * in case somebody's being clever about rewritten dirty blocks. * This exercises sequential read-write performance and hence the cache, * but avoids any space-allocation work. */ newfile(name, &fd, &stream, 0); if (lseek(fd, (off_t) 0, 0) == (off_t) -1) io_error("lseek(2) before rewrite"); printf("Rewriting..."); fflush(stdout); timestamp(); bufindex = 0; do { /* while we can read a block */ if ((words = read(fd, (char *) buf, BUFSIZ)) == -1) io_error("read(2)"); if (bufindex == words / IntSize) bufindex = 0; buf[bufindex++]++; if (lseek(fd, (off_t) -words, 1) == -1) io_error("relative lseek(2)"); if (write(fd, (char *) buf, words) == -1) io_error("re write(2)"); } /* while we can read a block */ while (words == BUFSIZ); if (close(fd) == -1) io_error("close after rewrite"); get_delta_t((int) ReWrite); printf("done\n"); fflush(stdout); /* * Now rewrite the whole file from scratch, but a block at a time rather * than with stdio. Exercises space allocation a little more stringently, * and by comparison with the putc test, quantifies stdio overhead a bit. * Once again, dirty each block. */ newfile(name, &fd, &stream, 1); printf("Writing intelligently..."); for (words = 0; words < BUFSIZ / IntSize; words++) buf[words] = 0; fflush(stdout); timestamp(); for (words = bufindex = 0; words < (size / BUFSIZ); words++) { /* for each word */ if (bufindex == (BUFSIZ/IntSize)) bufindex = 0; buf[bufindex++]++; if (write(fd, (char *) buf, BUFSIZ) == -1) io_error("write(2)"); } /* for each word */ if (close(fd) == -1) io_error("close after fast write"); get_delta_t((int) FastWrite); printf("done\n"); fflush(stdout); /* * Now read them all back with getc, excercising default character- * at-a-time input. Do a character frequency count just to fool * any optimizers that may notice that they're not being used. */ newfile(name, &fd, &stream, 0); for (words = 0; words < 256; words++) chars[words] = 0; printf("Reading with getc()..."); fflush(stdout); timestamp(); for (words = 0; words < size; words++) { /* for each byte */ if ((next = getc(stream)) == EOF) io_error("getc(3)"); chars[next]++; } /* for each byte */ if (fclose(stream) == -1) io_error("fclose after getc"); get_delta_t((int) Getc); printf("done\n"); fflush(stdout); /* use the frequency count */ for (words = 0; words < 256; words++) sprintf((char *) buf, "%d", chars[words]); /* * Now suck it in, BUFSIZ at a time, as fast as we can. */ newfile(name, &fd, &stream, 0); if (lseek(fd, 0L, 0) == -1) io_error("lseek before read"); printf("Reading intelligently..."); fflush(stdout); timestamp(); do if ((words = read(fd, (char *) buf, BUFSIZ)) == -1) io_error("read(2)"); while (words); if (close(fd) == -1) io_error("close after read"); get_delta_t((int) FastRead); printf("done\n"); fflush(stdout); /* * Now do some random I/O. Originally these were binary searches, but * that is well-behaved, since the first few seeks are always the same. * Probably an application that was doing this kind of thing would keep * its own cache of the top few levels of the tree or whatever - we're * just interested in 'How much random I/O can be done?' */ newfile(name, &fd, &stream, 0); timestamp(); printf("Seeking..."); fflush(stdout); for (lseek_count = 0; lseek_count < Searches; lseek_count++) doseek(random() % size, fd, ((lseek_count % UpdateSeek) == 0)); if (close(fd) == -1) io_error("close after read"); get_delta_t((int) Binary); printf("done\n"); fflush(stdout); report(size); unlink(name); } static void report(size) int size; { printf("\n"); printf("Times reported are elapsed / cpu / %%cpu usage.\n"); printf("\nSequential output\n"); printf("putc() bytes/sec: %d / %d / %.1f%%\n", (int) (((double) size) / delta[(int) Putc][Elapsed]), (int) (((double) size) / delta[(int) Putc][CPU]), delta[(int) Putc][CPU] / delta[(int) Putc][Elapsed] * 100.0); printf("write() bytes/sec: %d / %d / %.1f%%\n", (int) (((double) size) / delta[(int) FastWrite][Elapsed]), (int) (((double) size) / delta[(int) FastWrite][CPU]), delta[(int) FastWrite][CPU] / delta[(int) FastWrite][Elapsed] * 100.0); printf("putc() multiplier: %.1f / %.1f\n", delta[(int) Putc][Elapsed] / delta[(int) FastWrite][Elapsed], delta[(int) Putc][CPU] / delta[(int) FastWrite][CPU]); printf("Sequential output time: %.1f / %.1f / %.1f%%\n\n", delta[(int) FastWrite][Elapsed] + delta[(int) Putc][Elapsed], delta[(int) FastWrite][CPU] + delta[(int) Putc][CPU], (delta[(int) FastWrite][CPU] + delta[(int) Putc][CPU]) / (delta[(int) FastWrite][Elapsed] + delta[(int) Putc][Elapsed]) * 100.0); printf("\nSequential input\n"); printf("getc() bytes/sec: %d / %d / %.1f%%\n", (int) (((double) size) / delta[(int) Getc][Elapsed]), (int) (((double) size) / delta[(int) Getc][CPU]), delta[(int) Getc][CPU] / delta[(int) Getc][Elapsed] * 100.0); printf("read() bytes/sec: %d / %d / %.1f%%\n", (int) (((double) size) / delta[(int) FastRead][Elapsed]), (int) (((double) size) / delta[(int) FastRead][CPU]), delta[(int) FastRead][CPU] / delta[(int) FastRead][Elapsed] * 100.0); printf("getc() multiplier: %.1f / %.1f\n", delta[(int) Getc][Elapsed] / delta[(int) FastRead][Elapsed], delta[(int) Getc][CPU] / delta[(int) FastRead][CPU]); printf("Sequential input time: %.1f / %.1f / %.1f%%\n\n", delta[(int) Getc][Elapsed] + delta[(int) FastRead][Elapsed], delta[(int) Getc][CPU] + delta[(int) FastRead][CPU], (delta[(int) Getc][CPU] + delta[(int) FastRead][CPU]) / (delta[(int) Getc][Elapsed] + delta[(int) FastRead][Elapsed]) * 100.0); printf("\nSequential rewrite\n"); printf("Sequential rewrite bytes/sec: %d / %d / %.1f%%\n", (int) (((double) size) / delta[(int) ReWrite][Elapsed]), (int) (((double) size) / delta[(int) ReWrite][CPU]), delta[(int) ReWrite][CPU] / delta[(int) ReWrite][Elapsed] * 100.0); printf("Sequential rewrite time: %.1f / %.1f / %.1f%%\n", delta[(int) ReWrite][Elapsed], delta[(int) ReWrite][CPU], delta[(int) ReWrite][CPU] / delta[(int) ReWrite][Elapsed] * 100.0); printf("\nRandom I/O\n"); printf("Random reads/sec: %.1f / %.1f / %.1f%%\n", ((double) lseek_count) / delta[(int) Binary][Elapsed], ((double) lseek_count) / delta[(int) Binary][CPU], delta[(int) Binary][CPU] / delta[(int) Binary][Elapsed] * 100.0); } static void newfile(name, fd, stream, create) char * name; int * fd; FILE * * stream; int create; { if (create) { /* create from scratch */ if (unlink(name) == -1 && *fd != -1) io_error("unlink"); *fd = open(name, O_RDWR | O_CREAT | O_EXCL, 0777); } /* create from scratch */ else *fd = open(name, O_RDWR, 0777); if (*fd == -1) io_error(name); *stream = fdopen(*fd, "r+"); if (*stream == NULL) io_error("fdopen"); } static void usage() { fprintf(stderr, "usage: %s [-d scratch-directory] [-s size-in-megabytes]\n", myname); exit(1); } static void timestamp() { last_timestamp = time_so_far(); last_cpustamp = cpu_so_far(); } static void get_delta_t(which) int which; { delta[which][Elapsed] = time_so_far() - last_timestamp; delta[which][CPU] = cpu_so_far() - last_cpustamp; } static double cpu_so_far() { #ifdef i386 struct tms tms; if (times(&tms) == -1) io_error("times"); return ((double) tms.tms_utime) / ((double) CLK_TCK) + ((double) tms.tms_stime) / ((double) CLK_TCK); #else struct rusage rusage; getrusage(RUSAGE_SELF, &rusage); return ((double) rusage.ru_utime.tv_sec) + (((double) rusage.ru_utime.tv_usec) / 1000000.0) + ((double) rusage.ru_stime.tv_sec) + (((double) rusage.ru_stime.tv_usec) / 1000000.0); #endif } static double time_so_far() { #ifdef i386 int val; struct tms tms; if ((val = times(&tms)) == -1) io_error("times"); return ((double) val) / ((double) CLK_TCK); #else struct timeval tp; if (gettimeofday(&tp, (struct timezone *) NULL) == -1) io_error("gettimeofday"); return ((double) (tp.tv_sec - basetime)) + (((double) tp.tv_usec) / 1000000.0); #endif } static void io_error(message) char * message; { char buf[BUFSIZ]; sprintf(buf, "%s: drastic I/O error (%s)", myname, message); perror(buf); exit(1); } /* * Do a typical-of-something random I/O. Any serious application that * has a random I/O bottleneck is going to be smart enough to operate * in a page mode, and not stupidly pull individual words out at * odd offsets. To keep the cache from getting too clever, some * pages must be updated. However an application that updated each of * many random pages that it looked at is hard to imagine. * However, it would be wrong to put the update percentage in as a * parameter - the effect is too nonlinear. Need a profile * of what Oracle or Ingres or some such actually does. * Be warned - there is a *sharp* elbow in this curve - on a 1-Mb file, * most substantial unix systems show >2000 random I/Os per second - * obviously they've cached the whole thing and are just doing buffer * copies. */ static void doseek(where, fd, update) long where; int fd; int update; { int buf[BUFSIZ / IntSize]; long probe; int size; probe = (where / BUFSIZ) * BUFSIZ; if (lseek(fd, probe, 0) != probe) io_error("lseek in doseek"); if ((size = read(fd, (char *) buf, BUFSIZ)) == -1) io_error("read in doseek"); /* every so often, update a block */ if (update) { /* update this block */ /* touch a word */ buf[((int) random() % (size/IntSize - 2)) + 1]--; if (lseek(fd, (long) probe, 0) != probe) io_error("lseek in doseek update"); if (write(fd, (char *) buf, size) == -1) io_error("write in doseek"); } /* update this block */ } #ifdef i386 static char randseed[] = "ioioio"; static long random() { return nrand48(randseed); } #endif