/* GREPFV10 - Microsoft C source
Find regular expression in files and archives
=======================================================
Nigel Salt
25 Lower Station Rd
Crayford
Kent
DA1 3PY
UK

Phone +44 322 553260

Email nao@cix.compulink.co.uk
=======================================================
Can process PAK ZIP ARC ARJ LZH and ZOO files
   grepfv  expression filepattern[+] tempdir   
   MSoft C V6   
   [Options]
   Argv 1 regular expression
        2 file pattern
        3 Path for temporary files
   FV.COM must be in path along with any necessary
   utilities to unarchive files
   + on the end of file pattern activates recursive directory search
*/
/**********************************************************************
   The getfiles function is recursive if search of subdirectories
   is specified.
   This can cause stack overflow which may be remedied by compiling as
   cl -F D000 grepfv.c
   This gives the program a very large stack
***********************************************************************/
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <process.h>
#include <dos.h>
#include <string.h>
#include <time.h>

/* Prototypes */
int ifile(char *fname);
int xfile(char *fname);
void dosearch(char *name);
void doarc(char *arcname);
void dofile(char *fname);
void usage(char *error);
char *regstr(char *regexp,char *string);
int regpars(char *r);
void nsfgets(char *buff,int max,FILE *f);
void getfiles(char *p,char *n,int all);
void dosummary(void);

/* Global variables */
/* Structure to hold regular expression segments */
typedef struct
{
  char op;
  char *cptr;
} REGSEG;
REGSEG regsegs[64];
int numexp;

/* Structure to hold file exclusion  / inclusion strings 
   sptr points to a string
   next is NULL or a pointer to the next item in the list */

typedef struct lstruct
{
  char *sptr;
  struct lstruct *next;
} LTYPE, *LTYPEPTR;
static LTYPE xlist;
static LTYPE ilist;

static char tempath[256];
static char zippath[256];
static char buff[256];
static char regexp[256];
static char version[]="GREPFV10";
static char copyright[]=" (c) Nigel Salt 1994";
static int  checksum=1422;
static unsigned long  numfound=0;
static unsigned long  numarcs=0;
static unsigned long  numfiles=0;
static unsigned long  numfailed=0;
static unsigned long  numexc=0;
static unsigned long  numchars=0;
static time_t         startime;
static time_t         endtime;
static FILE *nsout=NULL;
static char *outfilnam;
static int  outfile=0;

void main(int argc,char *argv[])
{
  int all=0;
  int pattend;
  int nextarg=0;
  char path[129];
  char name[65];
  char *fslash;
  char *lastslash;
  char *s;
  int count;
  char *arglist[256];
  LTYPEPTR xcurptr,icurptr;
  FILE *optfile;
  int i;
  int maxarg=0;
  int firstarg=1;
  char *tokadd;


  /* Set start time in seconds */
  time(&startime);

  

  /* Set include list to empty */
  icurptr=&ilist;
  icurptr->sptr=NULL;
  icurptr->next=NULL;
  
  /* Set exclude list to empty */
  xcurptr=&xlist;
  xcurptr->sptr=NULL;
  xcurptr->next=NULL;
  
  /* Check copyright notice */
  s=copyright;
  count=0;
  while (*s)
     count+=*s++;
  if (count!=checksum)
     {
     fprintf(stderr,"\nCopyright violation");
     exit(1);
     }

  fprintf(stderr,"%s%s\n",version,copyright);

  /* Copy args to list */
  if (argv[1][0]=='@')
    {
    firstarg=2;
    optfile=fopen(&argv[1][1],"r");
    if (optfile==NULL)
      {
      sprintf(buff,"Cannot open option file '%s'",&argv[1][1]);
      usage(buff);
      }
    while (!feof(optfile) && maxarg<256)
      {
      if (fgets(buff,255,optfile)==NULL) continue;
      if (feof(optfile)) continue;
      tokadd=strtok(buff,"\n\r\t ");
      while (tokadd!=NULL && maxarg<256)
        {
        if ( (arglist[maxarg]=calloc(1,strlen(tokadd)+1))==NULL)
          usage("No memory for parameters from file");
        strcpy(arglist[maxarg],tokadd);
        maxarg++;
        tokadd=strtok(NULL,"\n\r\t ");
        }
      }
    fclose(optfile);
    }
  for (i=firstarg;i<argc;i++,maxarg++)
    {
    if (maxarg>255)
      usage("Too many parameters - max is 255");
    arglist[maxarg]=argv[i];
    }

  if (maxarg<2)
    usage("You must specify an expression and filepattern");



  /* Print command line for reference where results redirected to file */
  while (arglist[nextarg][0]=='-')
    {
    switch (toupper(arglist[nextarg][1]))
      {
      case 'O':
        nextarg++;
        outfilnam=arglist[nextarg];
        nsout=fopen(outfilnam,"r");
        if (nsout!=NULL)
          {
          sprintf(buff,"Output file '%s' already exists!\nDelete it or choose a new output file name"
            ,outfilnam);
          usage(buff);
          }
        nsout=fopen(outfilnam,"w");
        if (nsout==NULL)
          {
          sprintf(buff,"Cannot open output file '%s'",outfilnam);
          usage(buff);
          }
        outfile=1;
        nextarg++;
        break;
      case 'X':
        nextarg++;
        strupr(arglist[nextarg]);
        xcurptr->next=(LTYPEPTR)(malloc(sizeof(LTYPE)));
        if (xcurptr->next==NULL)
          usage("Cannot allocate memory for exclude option");
        xcurptr->sptr=arglist[nextarg];
        nextarg++;
        xcurptr->next->next=NULL;
        xcurptr->next->sptr=NULL;
        xcurptr=xcurptr->next;
        break;
      case 'I':
        nextarg++;
        strupr(arglist[nextarg]);
        icurptr->next=(LTYPEPTR)(malloc(sizeof(LTYPE)));
        if (icurptr->next==NULL)
          usage("Cannot allocate memory for include option");
        icurptr->sptr=arglist[nextarg];
        nextarg++;
        icurptr->next->next=NULL;
        icurptr->next->sptr=NULL;
        icurptr=icurptr->next;
        break;
      default:
        sprintf(buff,"Unrecognised option '%s'",arglist[nextarg]);
        usage(buff);
      }
    }

  /* Print options for reference */
  fprintf(stdout,"GREPFV10 SEARCH PARAMETERS\nOPTIONS:");
  if (outfile)
    fprintf(nsout,"GREPFV10 SEARCH PARAMETERS\nOPTIONS:");
  icurptr=&ilist;
  while (icurptr->sptr!=NULL)
    {
    fprintf(stdout,"\n\t -i %s",icurptr->sptr);
    if (outfile)
      fprintf(nsout,"\n\t -i %s",icurptr->sptr);
    icurptr=icurptr->next;
    }
  xcurptr=&xlist;
  while (xcurptr->sptr!=NULL)
    {
    fprintf(stdout,"\n\t -x %s",xcurptr->sptr);
    if (outfile)
      fprintf(nsout,"\n\t -x %s",xcurptr->sptr);
    xcurptr=xcurptr->next;
    }
  if (outfile)
    {
    fprintf(stdout,"\n\t -o %s",outfilnam);
    fprintf(nsout,"\n\t -o %s",outfilnam);
    }
  /* Nextarg should now be set to the regexp parameter */
  if (nextarg+1>maxarg)
    usage("You must specify an expression and filepattern");

  /* Get the regular expression */
  fprintf(stdout,"\nEXPRESSION:\t%s",arglist[nextarg]);
  if (outfile)
    fprintf(nsout,"\nEXPRESSION:\t%s",arglist[nextarg]);
  strcpy(regexp,arglist[nextarg]);
  strupr(regexp);
  numexp=regpars(regexp);
  nextarg++;

  /* Process the file pattern */
  fprintf(stdout,"\nFILE PATTERN:\t%s",arglist[nextarg]);
  if (outfile)
    fprintf(nsout,"\nFILE PATTERN:\t%s",arglist[nextarg]);
  strcpy(path,strupr(arglist[nextarg]));
  nextarg++;
  pattend=strlen(path)-1;
  if (path[pattend]=='+')
    {
    all=1; /* + at end so recurse subdirectories */
    path[pattend]='\0';
    pattend--;
    }

  /* Change forward slashes to backslashes */
  fslash=strchr(path,'/');
  while (fslash!=NULL)
    {
    *fslash='\\';
    fslash=strchr(path,'/');
    }

  /* Split name and path */
  if (path[pattend]=='\\')
    strcpy(name,"*.*");
  else
    {
    lastslash=strrchr(path,'\\');
    if (lastslash==NULL)
      {
      strcpy(name,path);
      path[0]='\0';
      }
    else
      {
      strcpy(name,lastslash+1);
      *(lastslash+1)='\0';
      }
    }

  
  /* Process temporary file path if any */
  if (maxarg>nextarg)
    {
    fprintf(stdout,"\nTEMP PATH:\t%s",arglist[nextarg]);
    if (outfile)
      fprintf(nsout,"\nTEMP PATH:\t%s",arglist[nextarg]);
    strcpy(tempath,arglist[nextarg]);
    /* Change forward slashes to backslashes */
    fslash=strchr(tempath,'/');
    while (fslash!=NULL)
      {
      *fslash='\\';
      fslash=strchr(tempath,'/');
      }
    if (tempath[strlen(tempath)-1]!='\\')
      strcat(tempath,"\\");
    }
  else
    tempath[0]='\0';

  /* Now have  
        file pattern in filepatt
        initial path in path
        expression in regexp
        temporary file name in tempath
        all set to 1 if recursion required
  */
  fprintf(stdout,"\n===============================================================\n");
  if (outfile)
    fprintf(nsout,"\n===============================================================\n");
  getfiles(path,name,all);
  time(&endtime);
  dosummary();
}

/* Takes Path Name All (ie recurse subdirectories) */
void getfiles(char *p,char *n,int all)
{
  int res;
  char cpath[129];
  char npath[129];
  struct find_t cbuff;
  strcpy(cpath,p);
  strcat(cpath,n);
  /* Find normal files */
  res=_dos_findfirst(cpath,0,&cbuff);
  while (res==0)
    {
    if (xfile(cbuff.name)==0 && ifile(cbuff.name) && cbuff.name[0]!='.')
      {
      strcpy(npath,p);
      strcat(npath,cbuff.name);

      /* DOIT */
      dosearch(npath);
      }
    /* Find next file */
    res=_dos_findnext(&cbuff);
    }

  /* Search directories if all specified */
  if (all)
    {
    strcpy(cpath,p);
    strcat(cpath,"*.");
    res=_dos_findfirst(cpath,_A_SUBDIR,&cbuff);
    while (res==0)
      {
      if (cbuff.name[0]=='.')
        {
        res=_dos_findnext(&cbuff);
        continue;
        }
      strcpy(npath,p);
      strcat(npath,cbuff.name);
      strcat(npath,"\\");
      getfiles(npath,n,all);
      res=_dos_findnext(&cbuff);
      }
    }
}

void dosearch(char *name)
{
  char *extptr;
  extptr=strrchr(name,'.');
  if (strstr(".ZIP.ARC.LZH.ZOO.PAK.ARJ ",extptr)&&extptr!=NULL)
    {
    numarcs++;
    doarc(name);
    }
  else
    {
    numfiles++;
    fprintf(stdout,"->SEARCHING %s\n",name);
    if (outfile)
      fprintf(nsout,"->SEARCHING %s\n",name);
    dofile(name);
    }
}


/* Gets directory of archive and passes files one at a time to dofile */
void doarc(char *arcname)
{
  FILE *f;
  char *spaceptr;
  char *slashptr;
  char *extptr;
  int ires;
  static char unzipper[20];
  static char tempfname[256];
  static char temp2fname[256];
  static char fname[256];

  /* Get archive listing */
  fprintf(stdout,"==>ARCHIVE %s\n",arcname);
  if (outfile)
    fprintf(nsout,"==>ARCHIVE %s\n",arcname);
  sprintf(buff,"fv %s>%sgrepfv1.$$$",arcname,tempath);
  ires=system(buff);
  if (ires!=0&&ires!=3)
    {
    fprintf(stdout,"\n!!!FV.COM not found or failed on '%s'!!!\n",arcname);
    if (outfile)
      fprintf(nsout,"\n!!!FV.COM not found or failed on '%s'!!!\n",arcname);
    return;
    }

  extptr=strrchr(arcname,'.');
  if (strcmp(".ZIP",extptr)==0)
    strcpy(unzipper,"pkunzip -c ");
  else if (strcmp(".PAK",extptr)==0)
    strcpy(unzipper,"pak p ");
  else if (strcmp(".ARC",extptr)==0)
    strcpy(unzipper,"arc p ");
  else if (strcmp(".LZH",extptr)==0)
    strcpy(unzipper,"lharc p ");
  else if (strcmp(".ZOO",extptr)==0)
    strcpy(unzipper,"zoo xp ");
  else if (strcmp(".ARJ",extptr)==0)
    strcpy(unzipper,"arj p ");
  else
    {
    fprintf(stderr,"\nUnrecognised archive '%s'\n",arcname);
    exit(1);
    }
  strcpy(tempfname,tempath);
  strcpy(temp2fname,tempath);
  strcat(tempfname,"grepfv1.$$$");
  strcat(temp2fname,"grepfv2.$$$");

  /* unzipper now contains the command to unarc to screen
     grepfv1.$$$ contains the list of files
     tempfname contains the full path of temp file 
     temp2fname contains the full path of temp file used to extract
       each file in the archive
  */
  f=fopen(tempfname,"rb");
  if (f==NULL)
    {
    fprintf(stderr,"\nTemporary file '%s' not found",tempfname);
    exit(1);
    }
  fgets(buff,255,f);
  while (!feof(f)&&buff[0]!='=')
    {
    fgets(buff,255,f);
    }

  zippath[0]='\0';
  fgets(buff,255,f);
  /* 
    The format of the output of fv.com is
    filename filesize etc
    directname\
              0 etc
    The output is now in the file pointed to by f
    and the first line is in buff.
  */
  while (!feof(f)&&buff[0]!='=')
    {

    /* Check for path within zip */
    slashptr=strchr(buff,'\\');
    if (slashptr!=NULL)
      {
      *(slashptr+1)='\0';
      strcpy(zippath,buff);
      slashptr=strchr(zippath,'/');
      while (slashptr!=NULL)
        {
        *slashptr='\\';
        slashptr=strchr(zippath,'/');
        }
      fgets(buff,255,f);
      continue;
      }

    /* Valid file lines start with a char greater than space */
    if (*buff<=' ')
      {
      fgets(buff,255,f);
      continue;
      }


    /* Check that there is a space terminated file name */
    spaceptr=strchr(buff,' ');
    if (spaceptr!=NULL)
      *spaceptr='\0';
    else
      {
      fgets(buff,255,f);
      continue;
      }

    /* File names starting with - upset pkunzip */
    if (*buff=='-')
      {
      fprintf(stdout,"!!!Cannot process %s\n",buff);
      if (outfile)
        fprintf(nsout,"!!!Cannot process %s\n",buff);
      fgets(buff,255,f);
      continue;
      }

    strcpy(fname,zippath);
    strcat(fname,buff);
    strupr(buff);
    if (xfile(buff)==0 && ifile(buff))
      {
      sprintf(buff,"%s %s %s>%s",unzipper,arcname,fname,temp2fname);
      ires=system(buff);
      if (ires!=0)
        {
        numfailed++;
        fprintf(stdout,"\n!!!%s FAILED!!!\n",buff);
        if (outfile)
          fprintf(nsout,"\n!!!%s FAILED!!!\n",buff);
        }
      else
        {
        numfiles++;
        fprintf(stdout,"--->SEARCHING %s\n",fname);
        if (outfile)
          fprintf(nsout,"--->SEARCHING %s\n",fname);
        dofile(temp2fname);
        remove(temp2fname);
        }
      }
    fgets(buff,255,f);
    }
  fclose(f);
  remove(tempfname);
}

/* Search a file */
void dofile(char *name)
{
  FILE *f;
  int i;
  char *cpres=NULL;
  f=fopen(name,"rb");
  if (f==NULL)
    {
    sprintf(buff,"Cannot open '%s'",name);
    usage(buff);
    }
  nsfgets(buff,81,f);
  while (!feof(f))
    {
    for (i=0;i<numexp;i++)
      {
      cpres=regstr(regsegs[i].cptr,buff);
      if (cpres!=NULL&&regsegs[i].op=='|')
        break;
      if (cpres==NULL&&regsegs[i].op!='|')
        break;
      }
    if (cpres!=(char *)NULL)
      {
      fprintf(stdout,"  %s\n",buff);
      if (outfile)
        fprintf(nsout,"  %s\n",buff);
      numfound++;
      }
    nsfgets(buff,81,f);
    }
  fclose(f);
}

void nsfgets(char *b,int max,FILE *f)
{
  register int i;
  register char *buff;
  register int inch;
  inch=0;
  buff=b;
  *buff='\0';
  while (inch<' '&&!feof(f))
    {
    inch=getc(f);
    numchars++;
    }
  /* Now have  a printable character in inch or reached EOF*/
  if (feof(f))
    return;
  for (i=0;i<(max-1)&&inch>=' '&&!feof(f);i++)
    {
    *(buff+i)=(char)inch;
    inch=getc(f);
    numchars++;
    }
  if (i==(max-1))
    {
    ungetc(inch,f);
    numchars--;
    }
  *(buff+i)='\0';
  return;
}



/* Looks for AND & and OR | chars and break up regexp into array
   of structures regsegs returns number of segs found */
int regpars(char *r)
{
  int regind=0;
  char *rpos=r;
  regsegs[0].cptr=r;
  while (*r)
    {
    switch(*r)
      {
      case '\\':
        r++;
        break;
      case '|':
      case '&':
        regsegs[regind].op=*r;
        *r='\0';
        regind++;
        if (regind>63)
          usage("YOU HAVE MORE THAN 64 SUBEXPRESSIONS IN YOUR REGULAR EXPRESSION");
        regsegs[regind].cptr=r+1;
        break;
      default:
        break;
      }
    r++;
    }
  return regind+1;
}

/* Do the search using wild cards */
char *regstr(char *r,char *s)
{
  int res;
  register char *spos,*rpos;

  res=0;
  while (*s&&res!=1)
    {
    res=1;
    spos=s;
    rpos=r;
    while (*rpos&&res==1)
      {
      switch (*rpos)
        {
        case '\\':
          rpos++;
          if (toupper(*spos)!=*rpos)
            res=0;
          break;
        case '.':
          break;
        case '*':
          rpos++;
          if (*rpos=='\\') rpos++;
          if (*rpos=='*' ) break;
          if (*rpos=='.' ) rpos++;
          if (!*rpos) 
            {
            rpos--;
            break;
            }
          while (toupper(*spos)!=*rpos && *spos)
            spos++;
          break;
        default:
          if (toupper(*spos)!=*rpos)
            res=0;
        }
      rpos++;
      spos++;
      }
    s++;
    }
  return (res==1?s:(char *)NULL);
}

int xfile(char *fname)
{
  LTYPEPTR xcurptr;
  xcurptr=&xlist;
  while (xcurptr->sptr!=NULL)
    {
    if (regstr(xcurptr->sptr,fname)!=NULL)
      {
      numexc++;
      fprintf(stdout,"!!!EXCLUDED: %s\n",fname);
      if (outfile)
        fprintf(nsout,"!!!EXCLUDED: %s\n",fname);
      return 1;
      }
    xcurptr=xcurptr->next;
    }
  return 0;

}

int ifile(char *fname)
{
  LTYPEPTR icurptr;
  icurptr=&ilist;
  if (icurptr->sptr==NULL)
    return 1;
  while (icurptr->sptr!=NULL)
    {
    if (regstr(icurptr->sptr,fname)!=NULL)
      return 1;
    icurptr=icurptr->next;
    }
  numexc++;
  fprintf(stdout,"!!!EXCLUDED: %s\n",fname);
  if (outfile)
    fprintf(nsout,"!!!EXCLUDED: %s\n",fname);
  return 0;
}

void dosummary(void)
{
  fprintf(stdout,"===============================================================\n");
  if (outfile)
    fprintf(nsout,"===============================================================\n");
  fprintf(stdout,"\nSUMMARY OF RESULTS\n");
  if (outfile)
    fprintf(nsout,"\nSUMMARY OF RESULTS\n");
  fprintf(stdout,"\nArchives searched:     \t%lu",numarcs);
  if (outfile)
    fprintf(nsout,"\nArchives searched:     \t%lu",numarcs);
  fprintf(stdout,"\nFiles searched:        \t%lu",numfiles);
  if (outfile)
    fprintf(nsout,"\nFiles searched:        \t%lu",numfiles);
  fprintf(stdout,"\nCharacters searched:   \t%lu",numchars);
  if (outfile)
    fprintf(nsout,"\nCharacters searched:   \t%lu",numchars);
  fprintf(stdout,"\nExpressions found:     \t%lu",numfound);
  if (outfile)
    fprintf(nsout,"\nExpressions found:     \t%lu",numfound);
  fprintf(stdout,"\nFiles excluded:        \t%lu",numexc);
  if (outfile)
    fprintf(nsout,"\nFiles excluded:        \t%lu",numexc);
  fprintf(stdout,"\nFailed files:          \t%lu",numfailed);
  if (outfile)
    fprintf(nsout,"\nFailed files:          \t%lu",numfailed);
  fprintf(stdout,"\nTime taken (secs):     \t%lu",endtime-startime);
  if (outfile)
    fprintf(nsout,"\nTime taken (secs):     \t%lu",endtime-startime);
}

void usage(char *error)
{
  fprintf(stderr,"\aERROR: %s",error);
  fprintf(stderr,"\nUsage:");
  fprintf(stderr,"\n\tgrepfv [options] \"regexp\" filepattern[+] tempdir");
  fprintf(stderr,"\nOPTIONS:");
  fprintf(stderr,"\n@filename read options from file - MUST BE FIRST OPTION");
  fprintf(stderr,"\n-o outfil copies the results to outfil as well as screen");
  fprintf(stderr,"\n   AVOID SEARCHING outfil you will get into a loop");
  fprintf(stderr,"\n-x regexp excludes files matching regexp");
  fprintf(stderr,"\n          e.g. -x \"*\\.exe\" excludes files ending in .exe");
  fprintf(stderr,"\n-i regexp only includes files matching regexp");
  fprintf(stderr,"\n          e.g. -i \"*\\.txt\" only includes files ending in .txt");
  fprintf(stderr,"\n");
  fprintf(stderr,"\nREGULAR EXPRESSIONS: ");
  fprintf(stderr,"\n\\    treat next char as literal");
  fprintf(stderr,"\n.     match any single character");
  fprintf(stderr,"\n*     match 0 or more characters");
  fprintf(stderr,"\n|     preceeding expression OR following");
  fprintf(stderr,"\n&     preceeding expression AND following");
  fprintf(stderr,"\n");
  fprintf(stderr,"\nThe optional + on the end of filepattern will cause grepfv to"); 
  fprintf(stderr,"\nsearch subdirectories too");
  fprintf(stderr,"\nREGULAR EXPRESSIONS MUST BE ENCLOSED BY QUOTES");
  exit(1);
}

