/*
 * lzrw1.c
 *
 * Copyright (C) 1996 Luigi Rizzo (luigi@iet.unipi.it)
 *
 * An implementation of LZRW1/KH algorithm in C
 * from a Turbo Pascal source from Kurt HAENEN (sorry, address unknown)
 *
 */

#include <string.h>	/* for bcopy and friends */

#define	HASHSIZE	4096	/* must be a power of 2 */
#define	HASHMASK	(HASHSIZE - 1)
#define	SEQLEN		0xfff	/* max length of a sequence of equal chars */

#define	BufferMaxSize	32768;
#define	BufferMax	(BufferMaxSize-1)
#define	FLAG_Copied	0x80
#define	FLAG_Compress	0x40
#define	HASH_EMPTY	-1

#define	BYTE	unsigned char
#define	WORD	unsigned short

#define	HI(x)	(((x)>>8) & 0xff)
#define	LO(x)	((x) & 0xff)

/*
 * getmatch returns a success if a match is found
 * with length >= 3.
 * Pos and size indicate the position and size of the match
 */

int
GetMatch(BYTE src[], WORD src_ptr, WORD SourceSize, int Hash[], int *Pos)
{
    int	size=0;
    int	key;
    int	found=0;

    int lim;
    BYTE *match_p, *sp= src+src_ptr;

    key = (40543*((((*sp <<4) ^ sp[1]) << 4) ^ sp[2]) >> 4) & HASHMASK;

    if ( (Hash[key] != HASH_EMPTY) && (src_ptr-Hash[key] < 4096) ) {
        *Pos = Hash[key];
	match_p = src + *Pos;
        size = 0;
	if (src_ptr+18 > SourceSize) lim= SourceSize-src_ptr;
	else lim=18;
        while (lim-- && (sp[size] == match_p[size]) )
	    size++;
    }
    Hash[key] = src_ptr;
    return size;
}

/*
 * words are stored MSB first. The header is made as follows:
 *
 *	MSB	 +----+----+----+----+ LSB
 *	POS	  xxxx xxxx xxxx
 *	SIZE			 xxxx
 */
#if 1
#   define	PUTWORD(destp,w) (*(destp) = HI(w), *(destp+1) = LO(w))
#   define	GETWORD(srcp) ((*(srcp) <<8) + (*(srcp+1)))
#   define	GETSIZE(srcp) ((*(srcp+1) & 0xf))
#   define	GETPOS(srcp) ( (*(srcp) << 4) + (*(srcp+1) >> 4) )
#else
#   define	PUTWORD(destp,w) *(WORD *)(destp) = (w)
#   define	GETWORD(srcp) *(WORD *)(srcp)
#   define	GETSIZE(srcp) (GETWORD(srcp) & 0x0f)
#   define	GETPOS(srcp) (GETWORD(srcp) >> 4)
#endif

int
lzrw1_compr(BYTE src[], int SourceSize, BYTE dst[])
{
    unsigned short	Command=0;
    int	Bit=0,
	size;
    int	src_ptr=0,
	dst_ptr=3,
	cmd_ptr=1,
	Pos;

    int      key, hash[HASHSIZE];
	/* init hash table */
    for (key = 0; key < HASHSIZE; key++)  hash[key] = HASH_EMPTY;

    dst[0] = FLAG_Compress;	/* assume compression ok */
    while ((src_ptr < SourceSize) && (dst_ptr <= SourceSize)) {
	if (Bit > 15) {
		/* store back command */
	    PUTWORD(dst+cmd_ptr, Command);
	    cmd_ptr = dst_ptr;
	    Bit = 0;
	    dst_ptr +=2;
	}
        size = 1;
	    /* look for string sequences */
        while ((src[src_ptr] == src[src_ptr+size]) &&
	       (size < SEQLEN) &&
	       (src_ptr+size < SourceSize))
	    size++;
        if (size >= 16) {
		/* a long string found. Don't waste hash table space */
            Command += Command + 1; /* SHL and set LSB */
	    dst[dst_ptr] = 0;	/* XXX this requires big-endian storage */
	    PUTWORD(dst+dst_ptr+1, size-16);
            dst[dst_ptr+3] = src[src_ptr];
            dst_ptr += 4;
            src_ptr += size;
        }
        else if ( (size=GetMatch(src, src_ptr, SourceSize, hash, &Pos))>=3) {
            Command += Command + 1; /* SHL and set LSB */
            key = ((src_ptr-Pos) << 4) + (size-3);
	    PUTWORD(dst+dst_ptr, key);
            dst_ptr += 2;
            src_ptr += size;
        }
        else {
		/* no match found, just copy the source symbol */
            Command += Command; /* SHL and clear LSB */
            dst[dst_ptr++] = src[src_ptr++];
        }
        Bit++;
    }
    Command <<= (16-Bit); /* left align Command */
    PUTWORD(dst+cmd_ptr, Command);
    if (dst_ptr > SourceSize) {
	memcpy(dst+1, src, SourceSize);
        dst[0] = FLAG_Copied;
        dst_ptr = SourceSize+1;
    }
    return dst_ptr;
}

int
lzrw1_decompr(BYTE src[], int SourceSize, BYTE dst[])
{
    int
	i,
	size,
	Pos;
    unsigned long Command=0; /* this is a trick to avoid using a bit counter */
    BYTE *dst_ptr= dst;
    BYTE *src_ptr= src+1,
	*src_lim = src + SourceSize;

    if (src[0] == FLAG_Copied) {
	memcpy(dst,src+1, SourceSize-1);
	return SourceSize-1;
    }
    while (src_ptr < src_lim) {
	if (Command == 0) {
	    Command = GETWORD(src_ptr) | 0x10000UL;
	    src_ptr +=2;
	}
	if ((Command & 0x8000) == 0) {
	    *dst_ptr++ = *src_ptr++;
	}
	else {
	    Pos = GETPOS(src_ptr);
	    if (Pos == 0) {
		size = GETWORD(src_ptr+1) + 16;
		for (; size ; size--)
		      *dst_ptr++ = src_ptr[3];
		src_ptr +=4;
	    }
	    else {
		size = GETSIZE(src_ptr)+3;
#if	0	/* don't know why, this doesn't work */
		bcopy(dst_ptr-Pos, dst_ptr, size);
		i=size;
#else
		for (i = 0 ; i < size ; i++)
		     dst_ptr[i] = dst_ptr[-Pos+i];
#endif
		src_ptr += 2;
		dst_ptr += size;
	    }
	}
	Command += Command; /* SHL */
	Command &= ~0x10000UL;
    }
    return dst_ptr - dst;
}
