git » fp-git.git » master » tree

[master] / PR / src / lib / compression / lzg_compress.c

/*
 * LZG compression
 *
 * ----------------------------------------------------------------------------
 *
 * Authors:
 *   Enrique Calot <ecalot.cod@princed.com.ar>
 *   Diego Essaya <dessaya@fi.uba.ar>
 *
 * Research: Tammo Jan Dijkemma, Anke Balderer, Enrique Calot
 *
 * ----------------------------------------------------------------------------
 *
 * Copyright (C) 2004, 2005 the Princed Team
 *
 * This file is part of the Princed project.
 *
 * Princed is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Princed is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Library General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */

/*
 * Modus operandi of the compression algorithm
 * -------------------------------------------
 *
 * For each input byte we take a window containing the 1023 previous bytes.
 * If the window goes out of bounds (ie, when the current input byte is
 * before position 1024), we consider it filled with zeros.
 *
 *     00000000000000000000********************************
 *                         ^                  ^
 *                    input start   current input byte
 *           |--------------------------------|
 *                    window size=1023
 *
 * The algorithm works as follows:
 *
 * While there is unread input data:
 *     Create a maskbyte.
 *     For each bit in the maskbyte (and there is still unread input data):
 *         Compare the following input bytes with the bytes in the window,
 *         and search the longest pattern that is equal to the next bytes.
 *         If we found a pattern of length n > 2:
 *             Assign 0 to the current bit of the maskbyte.
 *             In the next 2 bytes of the output, specify the relative
 *             position and length of the pattern.
 *             Advance output pointer by 2.
 *             Advance input pointer by n.
 * 	       Else:
 *             Assign 1 to the current bit of the maskbyte.
 * 	           Copy the current input byte in the next output byte.
 *             Advance output pointer by 1.
 *             Advance input pointer by 1.
 */

#include <stdio.h>
#include <stdlib.h>
#include "compress.h"

/*#define LZG_REVERSE*/
extern int compressionHigher;

#ifdef LZG_REVERSE
void *memrchr2(unsigned char *s, int c, size_t n) {
	while (n--) {
		if (s[n]==c) return s+n;
	}
	return NULL;
}
#define memsearch(a,b,c) memrchr2(a,b,c)
#else
#include <string.h>  /* memchr() */
#define memsearch(a,b,c) memchr(a,b,c)
#endif

/* Window size */
#define WIN_SIZE 1024

/* A repetition pattern must have a length of at least MIN_PATTERN_SIZE
 * to be accepted (ie, to be worth for compressing) */
#define MIN_PATTERN_SIZE 3
#define MAX_PATTERN_SIZE 66

/* search the longest pattern in the window that matches the first bytes
 * of the input */
void search_best_pattern(unsigned char *input, int inputSize,
                         unsigned char **best_pattern, int *best_pattern_len)
{
	unsigned char *pattern;
	int pattern_len;
	unsigned char *window = input - (WIN_SIZE - 1);
	int window_len = WIN_SIZE - 1;

	*best_pattern_len = 0;

	while ((pattern = (unsigned char *)memsearch(window, *input, window_len)))
	{
		unsigned char *ic = input + 1;
		unsigned char *wc = pattern + 1;
		pattern_len = 1;

		while (
			(ic < (input + inputSize)) &&
			(*ic == *wc) &&
			pattern_len < MAX_PATTERN_SIZE
		)	{
			ic++; wc++; pattern_len++; /* increase until the pattern is different */
		}

		if (pattern_len > *best_pattern_len) /* if it is the maximum, save it */
		{
			/*window_len+= *best_pattern_len - pattern_len; * optimization *
			if (window_len <= 0) break;*/
			*best_pattern_len = pattern_len;
			*best_pattern = pattern;
		}

		if (pattern_len == MAX_PATTERN_SIZE) break;

		/* if cLevel is 6 or 7 (cHigh set) compression rate will be
		 * 5% better and compression time will be 400% slower */
		cHigh {
			window_len--;
			window++;
		} else {
			window_len -= wc - window;
			if (window_len <= 0) break;
			window = wc;
		}
	}
}

/* Insert the specified bit in the current maskByte. If the maskByte is full,
 * start a new one. */
static int maskBit=8;
void pushBit(int b, unsigned char *output, int *oCursor)
{
	static unsigned char* maskByte;
	if ( maskBit == 8 ) /* first time or maskBit is full */
	{
		/* start a new maskByte */
		maskByte = output + *oCursor;
		(*oCursor)++;
		*maskByte = 0;
		maskBit = 0;
	}

	*maskByte |= b<<maskBit;
	maskBit++;
}

/* Insert the two bytes describing the pattern repetition to the output. */
void addPattern(unsigned char *input, int iCursor,
                unsigned char *output, int oCursor,
                unsigned char *pattern, int pattern_len) {
	int loc = (pattern - input) + WIN_SIZE - MAX_PATTERN_SIZE;
	output[oCursor] =
		(((pattern_len - MIN_PATTERN_SIZE) << 2) & 0xfc) + ((loc & 0x0300) >> 8);
	output[oCursor + 1] = (loc & 0x00FF);
}

/* Compress using the LZG algorithm */
/* Assume output has been allocated and the size is very big */
void compressLzg(const unsigned char* input2, int inputSize,
                 unsigned char* output, int *outputSize)
{
	int iCursor = 0, oCursor = 0;
	unsigned char* input=(unsigned char*)malloc(inputSize+WIN_SIZE);

	/* Create ghost window filled with zeros before input data: */
	memset(input, 0, WIN_SIZE);
	input += WIN_SIZE;
	memcpy(input,input2,inputSize);

	while (iCursor < inputSize)
	{
		unsigned char *best_pattern;
		int best_pattern_len;

		search_best_pattern(input + iCursor, inputSize - iCursor,
		                    &best_pattern, &best_pattern_len);

		if (best_pattern_len < MIN_PATTERN_SIZE)
		{
			/* No suitable pattern found. Just copy the current byte. */
			pushBit(1, output, &oCursor);
			output[oCursor] = input[iCursor];
			iCursor++;
			oCursor++;
		}
		else
		{
			/* Can compress. Repeat the best pattern. */
			pushBit(0, output, &oCursor);
			addPattern(input, iCursor, output, oCursor,
			           best_pattern, best_pattern_len);
			iCursor += best_pattern_len;
			oCursor += 2;
		}
	}

	maskBit=8; /* reinitialize maskBit*/

	free(input-WIN_SIZE);
	*outputSize = oCursor;
}