EOF : Run this shell script with "sh" not "csh" PATH=:/bin:/usr/bin:/usr/ucb export PATH all=FALSE if [ $1x = -ax ]; then all=TRUE fi /bin/echo 'Extracting compress.l' sed 's/^X//' <<'//go.sysin dd *' >compress.l X.PU X.TH COMPRESS 1 X.SH NAME compress \- compress files X.SH SYNOPSIS X.B compress X.B [\-d] [filename] X.br X.B uncompress [filename] X.SH DESCRIPTION Compresses the specified file or standard input. The result is written to standard output. Compressed files can be restored to their original form by specifying the X.B \-d option, or by running X.I uncompress. X.PP X.I Compress uses the modified Lempel-Ziv algorithm described in "A Technique for High Performance Data Compression", Terry A. Welch, X.I IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. X.PP The amount of compression obtained depends on the size of the input file, and the distribution of character sequences. Typically, text files, such as C programs, are reduced to 40\-50% of their original size; X.SH BUGS Even if the file gets bigger after compression, the output is still produced. Some of the code is highly VAX dependent. //go.sysin dd * made=TRUE if [ $made = TRUE ]; then /bin/chmod 664 compress.l /bin/echo -n ' '; /bin/ls -ld compress.l fi /bin/echo 'Extracting uncompress.l' sed 's/^X//' <<'//go.sysin dd *' >uncompress.l X.so manl/compress.l //go.sysin dd * made=TRUE if [ $made = TRUE ]; then /bin/chmod 664 uncompress.l /bin/echo -n ' '; /bin/ls -ld uncompress.l fi /bin/echo 'Extracting compress.c' sed 's/^X//' <<'//go.sysin dd *' >compress.c X/* * compress.c - File compression ala IEEE Computer June 1984. * * Author: Spencer W. Thomas * Computer Science Dept. * University of Utah * Date: Wed Jul 4 1984 * Copyright (c) 1984 Spencer W. Thomas * * WARNING: due to cc -O bug (dealing with ext instruction), do not * compile this program with -O (4.2bsd, at least). * * $Header: compress.c,v 1.4 84/07/05 03:11:11 thomas Exp $ * $Log: compress.c,v $ * Revision 1.4 84/07/05 03:11:11 thomas * Clean up the code a little and lint it. (Lint complains about all * the regs used in the asm, but I'm not going to "fix" this.) * * Revision 1.3 84/07/05 02:06:54 thomas * Minor fixes. * * Revision 1.2 84/07/05 00:27:27 thomas * Add variable bit length output. * */ static char rcs_ident[] = "$Header: compress.c,v 1.4 84/07/05 03:11:11 thomas Exp $"; #include "stdio.h" #include "ctype.h" #define BITS 16 /* maximum number of bits/code */ int maxbits = BITS; /* user settable max # bits/code */ long int maxmaxcode = 1 << BITS; int n_bits = 9; /* initial number of bits/code */ long int maxcode = 511; /* 1 << n_bits - 1 */ long int bytes_out = 0; /* count how many are written */ X/* * One code could conceivably represent (1<<BITS) characters, but * to get a code of length N requires an input string of at least * N*(N-1)/2 characters. With 10000 chars in the stack, an input * file would have to contain a 50Mb string of a single character. * This seems unlikely. */ #define MAXSTACK 10000 /* size of output stack */ struct c_ent { struct c_ent * next, /* chain of entries with same prefix */ * chain; /* chain prefixed with this entry */ unsigned int prefix : BITS, /* prefix code for this entry */ code : BITS; /* code for this entry */ unsigned char suffix; /* last char in this entry */ } c_tab[1<<BITS]; /* a table full of them */ long int free_ent = 0; /* first unused entry */ long int getcode(); int debug = 0; X/***************************************************************** * TAG( main ) * * Algorithm from "A Technique for High Performance Data Compression", * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. * * Usage: compress [-d] [file] * Inputs: * -d: If given, decompression is done instead. * * file: File to be compressed. If none specified, stdin * is used. * Outputs: * stdout: Compressed form of file. * * Assumptions: * File is better off compressed. Goes ahead and compresses even * if the result is bigger. * Algorithm: * Modified Lempel-Ziv method (LZW). Basically finds common * substrings and replaces them with a variable size code. This is * deterministic, and can be done on the fly. Thus, the decompression * procedure needs no input table, but tracks the way the table was * built. */ main( argc, argv ) char **argv; { FILE * infile; char * fname = NULL; int decompress_flg = 0; int verbose = 0; /* 4.2BSD dependent - take it out if not */ setlinebuf( stderr ); #ifndef NO_SCANARGS if ( scanargs( argc, argv, "compress D%- d%- v%- b%-maxbits!d file%s", &debug, &decompress_flg, &verbose, &maxbits, &maxbits, &fname ) == 0 ) exit( 1 ); #else /* * Roll your own here using getopt or whatever. All flags are * optional. * -D => debug * -d => decompress_flg * -v => verbose * -b maxbits => maxbits. If -b is specified, then maxbits MUST be * given also. * if a string is left, must be filename. */ #endif if ( maxbits > BITS ) maxbits = BITS; maxmaxcode = 1 << maxbits; if ( fname != NULL ) { if ( ( infile = fopen( fname, "r" ) ) == NULL ) { perror( fname ); exit( 1 ); } } else infile = stdin; if ( decompress_flg == 0 ) compress( infile ); else if ( debug == 0 ) decompress( infile ); else { /* * Just print out codes from input file. Mostly for debugging. */ long int code; int col = 0, bits = n_bits; free_ent = 255; while ( ( code = getcode( infile ) ) >= 0 ) { if ( free_ent < maxmaxcode ) free_ent++; if ( bits != n_bits ) { printf( "\nChange to %d bits\n", n_bits ); bits = n_bits; col = 0; } printf( "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); } putchar( '\n' ); exit( 0 ); } /* If requested (verbose), dump string table */ if ( verbose ) dump_tab(); } X/***************************************************************** * TAG( compress ) * * Actually does the compression. * Inputs: * infile: File to compress. * Outputs: * Writes compressed file to stdout. * Assumptions: * [None] * Algorithm: * See above. */ compress( infile ) FILE *infile; { int ic; unsigned char c; register struct c_ent * ent, * n_ent; long int in_count = 1; #ifdef STATS int out_count = 0; #endif /* * Initialize the compression table to contain all 8-bit values * initially. These don't need to be chained, they can be looked * up directly. */ for ( free_ent = 0, ent = c_tab; free_ent < 256; free_ent++, ent++ ) { ent->next = ent->chain = NULL; ent->prefix = 0; ent->code = free_ent; ent->suffix = free_ent; } c = getc( infile ); ent = &c_tab[c]; /* initial entry */ while ( !feof( infile ) && (ic = getc(infile)) != EOF ) { in_count++; c = ic; /* * Find the entry corresponding to the current entry suffixed * with this char. */ for ( n_ent = ent->chain; n_ent != NULL; n_ent = n_ent->next ) if ( n_ent->suffix == c ) break; /* found it */ /* * If no such entry, do 2 things: * 1. Put out code for current prefix string. * 2. Add the new string to the table. * If the table is full, just start over with current input * character. */ if ( n_ent == NULL ) { output( (long int)ent->code ); #ifdef STATS out_count++; #endif if ( free_ent < maxmaxcode ) { n_ent = &c_tab[free_ent]; n_ent->chain = NULL; n_ent->prefix = ent->code; n_ent->suffix = c; n_ent->code = free_ent; n_ent->next = ent->chain; ent->chain = n_ent; free_ent++; } n_ent = &c_tab[c]; } ent = n_ent; } /* * Put out the final code. */ output( (long int)ent->code ); #ifdef STATS out_count++; #endif output( -1L ); /* finish up output if necessary */ /* * Print out stats on stderr */ #ifdef STATS fprintf( stderr, "%ld chars in, %ld codes (%ld bytes) out, compression factor %g\n", in_count, out_count, bytes_out, (double)in_count / (double)bytes_out ); fprintf( stderr, "\tCompression as in compact: %5.2f%%\n", 100.0 * ( in_count - bytes_out ) / (double) in_count ); fprintf( stderr, "\tLargest code was %d (%d bits)\n", free_ent - 1, n_bits ); #else fprintf( stderr, "\tCompression: %5.2f%%\n", 100.0 * ( in_count - bytes_out ) / (double) in_count ); #endif } X/***************************************************************** * TAG( output ) * * Output the given code. * Inputs: * code: A n_bits-bit integer. If == -1, then EOF. This assumes * that n_bits =< (long)wordsize - 1. * * Outputs: * Outputs code to the file. * Assumptions: * Chars are 8 bits long. * Algorithm: * Maintain a BITS character long buffer (so that 8 codes will * fit in it exactly). Use the VAX insv instruction to insert each * code in turn. When the buffer fills up empty it and start over. */ output( code ) long int code; { static char buf[BITS]; static int offset = 0; static int col = 0; /* * On the VAX, it is important to have the register declarations * in exactly the order given, or the asm will break. */ register int r_off = offset, bits= n_bits; register char * bp = buf; if ( code >= 0 ) { /* VAX DEPENDENT!! Implementation on other machines may be * difficult. * * Translation: Insert BITS bits from the argument starting at * offset bits from the beginning of buf. */ #ifndef vax You lose #endif asm( "insv 4(ap),r11,r10,(r9)" ); offset += n_bits; if ( offset == n_bits*8 ) { fwrite( buf, 1, n_bits, stdout ); offset = 0; bytes_out += n_bits; } if ( debug ) fprintf( stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); /* * If the next entry is going to be too big for the code size, * then increase it, if possible. */ if ( free_ent > maxcode ) { /* * Write the whole buffer, because the input side won't * discover the size increase until after it has read it. */ if ( offset > 0 ) { fwrite( buf, 1, n_bits, stdout ); bytes_out += n_bits; } offset = 0; n_bits++; if ( n_bits == maxbits ) maxcode = maxmaxcode; else maxcode = (1 << n_bits) - 1; if ( debug ) { fprintf( stderr, "\nChange to %d bits\n", n_bits ); col = 0; } } } else { /* * At EOF, write the rest of the buffer. */ if ( offset > 0 ) fwrite( buf, 1, (offset + 7) / 8, stdout ); bytes_out += (offset + 7) / 8; offset = 0; fflush( stdout ); if ( debug ) fprintf( stderr, "\n" ); } } X/***************************************************************** * TAG( decompress ) * * Decompress the input file. * * Inputs: * infile: File to be decompressed. * Outputs: * Writes decompressed file to stdout. * Assumptions: * Input file was created with compress (could use a magic #, I * guess). * Algorithm: * See article cited above. */ decompress( infile ) FILE *infile; { unsigned char stack[MAXSTACK]; int stack_top = MAXSTACK; long int code, oldcode, incode; unsigned char finchar; register struct c_ent * ent; /* * As above, initialize the first 256 entries in the table. */ for ( free_ent = 0; free_ent < 256; free_ent++ ) { ent = &c_tab[free_ent]; ent->next = ent->chain = NULL; ent->prefix = 0; ent->code = free_ent; ent->suffix = free_ent; } code = oldcode = getcode( infile ); putchar( (char)code ); /* first code must be 8 bits = char */ finchar = code; while ( ( code = getcode( infile ) ) != -1 ) { incode = code; /* * Special case for KwKwK string. */ if ( code >= free_ent ) { stack[--stack_top] = finchar; code = oldcode; } /* * Generate output characters in reverse order */ ent = &c_tab[code]; while ( ent->code >= 256 ) { stack[--stack_top] = ent->suffix; ent = &c_tab[ent->prefix]; } stack[--stack_top] = finchar = ent->suffix; /* * And put them out in forward order */ fwrite( &stack[stack_top], 1, MAXSTACK - stack_top, stdout ); stack_top = MAXSTACK; /* * Generate the new entry. */ if ( free_ent < maxmaxcode ) { ent = &c_tab[free_ent]; ent->prefix = oldcode; ent->suffix = finchar; ent->code = free_ent; free_ent++; } /* * Remember previous code. */ oldcode = incode; } fflush( stdout ); } X/***************************************************************** * TAG( getcode ) * * Read one code from the input file. If EOF, return -1. * Inputs: * infile: Input file. * Outputs: * code or -1 is returned. * Assumptions: * [None] * Algorithm: * For now, use scanf to read ascii input. */ long int getcode( infile ) FILE *infile; { /* * On the VAX, it is important to have the register declarations * in exactly the order given, or the asm will break. */ register long int code; static int offset = 0, size = 0; static char buf[BITS]; register int r_off, bits; register char * bp = buf; if ( offset >= size || free_ent > maxcode ) { /* * If the next entry will be too big for the current code * size, then we must increase the size. This implies reading * a new buffer full, too. */ if ( free_ent > maxcode ) { n_bits++; if ( n_bits == maxbits ) maxcode = maxmaxcode; /* won't get any bigger now */ else maxcode = (1 << n_bits) - 1; } size = fread( buf, 1, n_bits, infile ); if ( size <= 0 ) return -1; /* end of file */ offset = 0; /* Round size down to integral number of codes */ size = size * 8 - (n_bits - 1); } /* * Get it into a register for the following VAX dependent code. */ r_off = offset; bits = n_bits; #ifndef vax You lose #endif asm( "extzv r10,r9,(r8),r11" ); offset += n_bits; return code; } X/***************************************************************** * TAG( dump_tab ) * * Dump the string table. * Inputs: * [None] * Outputs: * [None] * Assumptions: * [None] * Algorithm: * [None] */ dump_tab() { register int i; register struct c_ent * ent; char stack[4 * MAXSTACK]; /* \nnn makes it 4 times bigger */ int stack_top = 4 * MAXSTACK; for ( i = 0; i < free_ent; i++ ) { ent = &c_tab[i]; if ( isascii(ent->suffix) && isprint(ent->suffix) ) fprintf( stderr, "%5d: %5d/'%c' \"", ent->code, ent->prefix, ent->suffix ); else fprintf( stderr, "%5d: %5d/\\%03o \"", ent->code, ent->prefix, ent->suffix ); stack[--stack_top] = '\n'; stack[--stack_top] = '"'; for ( ; ent != NULL; ent = (ent->code >= 256 ? &c_tab[ent->prefix] : NULL) ) { if ( isascii(ent->suffix) && isprint(ent->suffix) ) stack[--stack_top] = ent->suffix; else { switch( ent->suffix ) { case '\n': stack[--stack_top] = 'n'; break; case '\t': stack[--stack_top] = 't'; break; case '\b': stack[--stack_top] = 'b'; break; case '\f': stack[--stack_top] = 'f'; break; case '\r': stack[--stack_top] = 'r'; break; default: stack[--stack_top] = '0' + ent->suffix % 8; stack[--stack_top] = '0' + (ent->suffix / 8) % 8; stack[--stack_top] = '0' + ent->suffix / 64; break; } stack[--stack_top] = '\\'; } } fwrite( &stack[stack_top], 1, 4 * MAXSTACK - stack_top, stderr ); stack_top = 4 * MAXSTACK; } } //go.sysin dd * made=TRUE if [ $made = TRUE ]; then /bin/chmod 444 compress.c /bin/echo -n ' '; /bin/ls -ld compress.c fi /bin/echo 'Extracting uncompress.sh' sed 's/^X//' <<'//go.sysin dd *' >uncompress.sh #! /bin/sh PATH=/bin:/usr/bin:/usr/local compress -d $* //go.sysin dd * made=TRUE if [ $made = TRUE ]; then /bin/chmod 664 uncompress.sh /bin/echo -n ' '; /bin/ls -ld uncompress.sh fi