/*
 *	bmg.c ->	Boyer-Moore-Gosper search routines
 *
 * Adapted from:
 *   Boyer/Moore/Gosper-assisted 'egrep' search, with delta0 table as in
 *   original paper (CACM, October, 1977).  No delta1 or delta2.  According to
 *   experiment (Horspool, Soft. Prac. Exp., 1982), delta2 is of minimal
 *   practical value.  However, to improve for worst case input, integrating
 *   the improved Galil strategies (Apostolico/Giancarlo, Siam. J. Comput.,
 *   February 1986) deserves consideration.
 *
 *   James A. Woods					Copyright (c) 1986
 *   NASA Ames Research Center
 *
 * 29 April 1986	Jeff Mogul	Stanford
 *	Adapted as a set of subroutines for use by a program. No
 *	regular-expression support.
 *
 * 29 August 1986	Frank Whaley	Beyond Words
 *	Trimmed for speed and other dirty tricks.
 */

#include <ctype.h>
#include <string.h>
#include "bmg.h"

/*  forward declarations  */
static int strncmpi(unsigned char *, unsigned char *, int);
static void lower(char *);


/*
 *	bmgCompile() ->	compile Boyer-Moore delta table
 *
 *	bmgCompile() compiles the delta table for the given search string, and
 *	 initializes the search argument structure.  This structure must be
 *	 passed to the bmgSearch() function described below.
 */
void bmgCompile(pat, arg, ignore)
	char	*pat;		/*  the pattern		*/
	bmgARG	*arg;		/*  argument data	*/
	int	ignore;		/*  TRUE to ignore case	*/
	{
	int	i,		/*  general ctr		*/
		patlen;		/*  pattern length	*/

	patlen = strlen(pat);

#ifdef	CHECKS
	if (patlen > bmgMAXPAT)
		{
		fprintf(stderr, "bmgCompile: pattern length exceeds %d\n",
			bmgMAXPAT);
		pat[bmgMAXPAT] = 0;	/*  truncate  */
		}
#endif

	strcpy(arg->pat, pat);
	if (arg->ignore = ignore)
		lower(arg->pat);

	memset(arg->delta, patlen, 256);

	for (i = 0; i < patlen; ++i)
		arg->delta[((unsigned char *)pat)[i]] = patlen - i - 1;

	if (ignore)	/*  tweak upper case if ignore on  */
		for (i = 0; i < patlen; ++i)
			arg->delta[toupper(((unsigned char *)pat)[i])] = patlen - i - 1;
	}


/*
 *	bmgSearch() ->	scan for match
 *
 *	bmgSearch() performs a Boyer-Moore-Gosper search of the given buffer
 *	 for the string described by the given search argument structure.  The
 *	 match action function "action" will be called for each match found.
 *	 This function should return non-zero to continue searching, or 0 to
 *	 terminate the search.  bmgSearch() returns the total number of
 *	 matches found.
 */
int bmgSearch(buffer, buflen, arg, action)
	char	*buffer;	/*  buffer to search		*/
	int	buflen;		/*  length of buffer		*/
	bmgARG	*arg;		/*  search argument		*/
	int	(*action)();	/*  action function		*/
	{
	char	*s;		/*  temp ptr for comparisons	*/
	int	inc,		/*  position increment		*/
		k,		/*  current buffer index	*/
		nhits,		/*  match ctr			*/
		patlen;		/*  pattern length		*/

	nhits = 0;
	k = (patlen = strlen(arg->pat)) - 1;

	for (;;)
		{
		/*
		 *	the following (unsigned char *) type casts save a
		 *	 few clocks by freeing us from some XCHGs
		 */
		while ((inc = arg->delta[((unsigned char *)buffer)[k]]) &&
			((k += inc) < buflen))
			;
		if (k >= buflen)
			break;
    
		s = buffer + (k++ - (patlen - 1));
		if (!(arg->ignore ? strncmpi(s, arg->pat, patlen) : strncmp(s, arg->pat, patlen)))
			{
			++nhits;
			if (!(*action)(buffer, s - buffer))
				return (nhits);
			}
		}

	return (nhits);
	}

/*****************************************************************************/
/*****************************  local functions  *****************************/
/*****************************************************************************/

/*
 *	lower() ->	lower case a string
 */
static void lower(s)
	char	*s;
	{
	while (*s)
		{
		*s = tolower(*s);
		++s;
		}
	}


/*
 *	strncmpi() ->	strncmp(), ignore case
 */
static int strncmpi(s, t, n)
	unsigned char	*s,
			*t;
	int		n;
	{
	for (; n-- && (tolower(*s) == tolower(*t)); ++t)
		if (!*s++)
			return (0);	/* equal */

	if (n < 0)		/* maximum hit */
		return (0);		/* equal */

	return ((tolower(*s) > tolower(*t)) ? 1 : (-1));	/* not equal */
	}


/*
 *	END of bmg.c
 */
