//------------------------------------------------------------------
// gpRegExp.cpp - Definition of the gpRegExp class.
//
// Copyright 1994 Prodis Incorporated.
//
// Purpose: The General Purpose Regular Expression class handles
//          pattern matching duties.
//
// Architect: TDE
// Developer: AKJ
//
// Modification History:
//      09/08/94 TDE: Original Code.
//      09/09/94 AKJ: Fixed TDE's stuff, and fleshed out functions.
//      01/17/95 AKJ: Added support for +.
//      02/22/95 AKJ: Minor fix to Opional to allow fallback.
//
//------------------------------------------------------------------

#include <stdlib.h>
#include <gpregexp\gpregexp.h>
#include <gpstring\gpslist.h>

#define LOWER_BOUND 1
#define UPPER_BOUND 3

// Create a gpRegExp object from a character string.
gpRegExp::gpRegExp (const char *cNewText)
        : gpString (cNewText)
  {
    nDoICount = 0;
    fTopLevel = top;
    NextAtom = 0;
    ParseAtoms ();
  }

gpRegExp::gpRegExp (char cChar, int top)
        : gpString (cChar)
  {
    nDoICount = 0;
    NextAtom = 0;    
    fTopLevel = top;
    ParseAtoms ();
  }
    
gpRegExp::gpRegExp (int top) : gpString ( )
  {
    nDoICount = 0;
    NextAtom = 0;    
    fTopLevel = top;
    ParseAtoms ();
  }

gpRegExp::gpRegExp (const gpString &s, int top)
        : gpString (s)
  {
    nDoICount = 0;
    NextAtom = 0;    
    fTopLevel = top;
    ParseAtoms ();
  }

gpRegExp::~gpRegExp ( )
  {
    if (NextAtom) delete NextAtom;
  }

gpRegExp &gpRegExp::operator= (gpString &oString)
  {
    if (NextAtom) delete NextAtom;
    NextAtom = 0;
    fTopLevel = 1;
    (*this) = oString;
    ParseAtoms ();
              
    return *this;
  }
        
gpRegExp &gpRegExp::operator= (char *cString)
  {
    if (NextAtom) delete NextAtom;
    (*this) = (cString);
    NextAtom = 0;
    fTopLevel = 1;
    ParseAtoms ();
                 
    return *this;
  }

//-------------------------------------------------------------
void gpRegExp::ParseAtoms ( )
  {
    int nPos = 0;
    int GotToken = 0;
    gpString *copy;
    ExpType = Literal;
    size_t nOffset;
    
    // Release the previous atoms and reset parameters.
    // This is ususally only for when an assign is done.
    firstOnly = 1;
    lastOnly = 0;

    if (fTopLevel)
      {
        // First, optimize by removing extraneous '.*'s 
        if (FindSubstring ("^.*") == 0)
          {
            firstOnly = 0;
            Remove (0, 3);
          }  
        else if (FindSubstring (".*") == 0)
          {
            firstOnly = 0;
            Remove (0, 2);
          }
          
        // Next, check for "Beginning-of-Line"
        if (*this[0] == '^')
            Remove (0, 1);
        else
            firstOnly = 0;

      }

    // Strip out the first atom in the string.
    copy = new gpString (cText);
    
    while (nPos < Length() && ! GotToken)
      {
        switch ((*this)[nPos])
          {
          case '\\':                  // We have to Quote the next
            Remove (nPos, 1);         // character, so remove the
            copy->Remove (nPos, 1);   // slash and get the char.
            nPos++;
            break;
            
          case '.':                   // if we get a '.'
            if ((nPos) == 0)          // and it's the first char
              if ((*this)[1] == '*')  // and it's followed by a '*'
                {
                  Remove (2);         // Then we have a '.*' token.
                  copy->Remove (0, 2);
                  GotToken = 1;
                  ExpType = MultiWild0;
                }
              else if ((*this)[1] == '+')  // if followed by '+'
                {
                  Remove (2);         // Then we have a '.+' token.
                  copy->Remove (0, 2);
                  GotToken = 1;
                  ExpType = MultiWild1;
                }
              else
                {
                  Remove (1);          // we have a plain
                  copy->Remove (0, 1); // old '.' token.
                  GotToken = 1;
                  ExpType = Wild;
                }
            else
              {
                Remove (nPos);         // we have a literal token.
                copy->Remove (0, nPos);
                GotToken = 1;
                ExpType = Literal;
              }
            break;
          case '*':                 // if we get a '*'
            if (nPos == 1)          // and it's the second character
              {
                Remove (2);         // The we have a <char>* token.
                copy->Remove (0, 2);
                GotToken = 1;
                ExpType = MultiChar0;
              }
            else
              {
                Remove (nPos - 1);  // Or, we have a literal token.
                copy->Remove (0, nPos - 1);
                GotToken = 1;
                ExpType = Literal;
              }
            break;
          case '+':                 // if we get a '+'
            if (nPos == 1)          // and it's the second character
              {
                Remove (2);         // The we have a <char>+ token.
                copy->Remove (0, 2);
                GotToken = 1;
                ExpType = MultiChar1;
              }
            else
              {
                Remove (nPos - 1);  // Or, we have a literal token.
                copy->Remove (0, nPos - 1);
                GotToken = 1;
                ExpType = Literal;
              }
            break;
          case '$':                   
            Remove (nPos);          // the buck stops here.
            copy->Remove (0);       // And we won't have any kids.
            lastOnly = 1;
            GotToken = 1;
            ExpType = Literal;
            break;
          case '[':                 // if we get a '['
            if ((nPos) > 0)         // and it's NOT the first char
              {
                Remove (nPos);      // we have a literal
                copy->Remove (0, nPos);
                ExpType = Literal;
                GotToken = 1;
              }   
            else                    // or we are beginning a range.
              nPos++;
            break;
          case ']':                 // when we get ']'
            if ((*this)[nPos + 1] == '*') // we may have [...]*
              {
                Remove (nPos + 2);
                copy->Remove (0, nPos + 2);
                GotToken = 1;
                ExpType = MultiRange0;
              }
            else if ((*this)[nPos + 1] == '+') // we may have [...]+
              {
                Remove (nPos + 2);
                copy->Remove (0, nPos + 2);
                GotToken = 1;
                ExpType = MultiRange1;
              }
            else        
              {
                Remove (nPos + 1);        // or just plain old [...]
                copy->Remove (0, nPos + 1);
                GotToken = 1;
                ExpType = Range;
              }
            break;
          case '{':                 // we have a brace
            if ((nPos) > 0)         // and it's NOT the first char
              {
                Remove (nPos);      // we have a literal
                copy->Remove (0, nPos);
                ExpType = Literal;
                GotToken = 1;
              }   
            else                    // or we are beginning an 
                                    // Optional expression
              {
                nOffset = FindChar (Of, "}");
                copy->Remove (0, nOffset+1);
                Remove (nOffset);
                Remove(0, 1);
                
                while ( (nOffset = FindChar (Of, "|")) != NPOS )
                  {
                    (*this)[nOffset] = '\0';
                    lrChildren.AddItem (
                            new gpRegExp (*this, 0) );
                    Remove (0, nOffset+1);  
                  }
                lrChildren.AddItem (
                            new gpRegExp (*this, 0) );
                GotToken = 1;
                ExpType = Optional;
              }
            break;
          case '&' :                // if we get ampersand
            if (nPos > 0)           // and we've already got an atom
              {
                GotToken = 1;       // then we stop where we are
                ExpType = Literal;
                Remove (nPos);
                copy->Remove (0, nPos);
              }
            else
              {                     // otherwise, we are starting
                nDoICount = 1;      // a meaningful atom.
                Remove (0,1);
                copy->Remove (0,1);
              }
            break;      
          default: 
            // Just copy the character.
            nPos++;
          }
      }
    // Pass the rest along to the next atom.
    if (GotToken && (*copy != ""))
        NextAtom = new gpRegExp (*copy, 0);
        // Flag this guy as NOT top level.
      
    if (copy)
        delete copy;  
  }
  
//------------------------------------------------------------------
//
//  Routine : operator== (gpString)
//
//  Function : sees if the given regular expression matches
//             this gpString.  
//
//  Notes : Normally, the gpString may be longer than the regular
//          expression;  the comparison ends with the last character
//          in that expression.  However, if the last character of
//          the expression is '$', then an exact match is called
//          for, and the gpString may not have extra characters.
//
//------------------------------------------------------------------

int gpRegExp::operator==(gpString &sExpress)
  {
    int nMin = 0;
    int lMatch = 0;     // Assume that the match will fail.
    int nPos;
    gpString sBuffer;
    char cBuffer;
    gpString sStringSaver;
    
    if (fTopLevel)
        sStringSaver = sExpress;
    
    sLastMatch = "";
    
    switch (ExpType)
      {
        case Literal     : 
        
            nPos = sExpress.FindSubstring (cText);
            if ((firstOnly && !nPos)||(!firstOnly && (nPos != NPOS)))
              {
                sLastMatch = cText;
                sExpress.Remove (0, nPos);
                sExpress.Remove (0, Length () );
                lMatch = match_remainder (sExpress);
              }
            break;
            
        case MultiChar1  :

            nMin = 1;       // set our min chars to 1 and fall thru

        case MultiChar0  :

            nPos = sExpress.FindChar(NotOf,(*this)[0]);
            if (nPos == NPOS)
                nPos = sExpress.Length ();
                
            lMatch = DecrementingMatch(nMin, nPos, sExpress);    
            break;
            
        case Wild        : 
        
            if (sExpress.Length () )
              {
                sLastMatch = sExpress[0];
                lMatch = match_remainder(sExpress.Remove (0, 1) );
              }  
            break;
            
        case MultiWild1  : 
        
            nMin = 1;       // set our min chars to 1 and fall thru          
            
        case MultiWild0  : 
        
            nPos = sExpress.Length ();
            lMatch = DecrementingMatch(nMin, nPos, sExpress);    
            break;
                         
        case Range       : 
        
            cBuffer = sExpress[0];
            cBuffer = toupper (cBuffer);    
            if ( (cBuffer >= toupper(cText[LOWER_BOUND])) && 
                 (cBuffer <= toupper(cText[UPPER_BOUND])))
              {
                lMatch = match_remainder(sExpress.Remove (0,1));
                sLastMatch = cBuffer;
              }  
            break;
            
        case MultiRange1 : 
        
            nMin = 1;       // set our min chars to 1 and fall thru
            
        case MultiRange0 : 
        
            for (nPos = 0; 
                 (toupper(sExpress[nPos]) >= 
                      toupper(cText[LOWER_BOUND])) && 
                 (toupper(sExpress[nPos]) <= 
                      toupper(cText[UPPER_BOUND]));
                 nPos++
                );
                 
            lMatch = DecrementingMatch(nMin, nPos, sExpress);       
            break;                   
            
        case Optional  :
          { 
        
            gpString sBuffer(sExpress);
            if (lrChildren.Seek (sExpress))
                sLastMatch = lrChildren.Peek()->LastMatch();
            if ((lMatch = match_remainder (sExpress)) == 0)
              {
                sLastMatch = "";
                lMatch = match_remainder (sBuffer);
              }
          }    
      }
    
    if (fTopLevel)
        sExpress = sStringSaver;
    
    return (lMatch);            
  }

// These helpers will keep trying to match a Multi-type atom.
// First, try to match as much as possible, then try to match
// the next atom.  If that atom succeeds, good.
// If not, we need to decrement our match string by one character
// and retry.  We do this until we have reached our minimum chars.

int gpRegExp::DecrementingMatch(int nMin, int nPos
            , gpString &sExpress)
  {
    int lMatch = 0;
    gpString sBuffer;
    
    for (; !lMatch && (nPos >= nMin); nPos--)
      { 
        sBuffer = sExpress;
        sBuffer.Remove(0, nPos);
        lMatch = match_remainder(sBuffer);
      }  
    if (lMatch)
      {
        sLastMatch = sExpress;
        sLastMatch.Remove(++nPos);
      }
    return lMatch;         
  }
  
int gpRegExp::match_remainder (gpString &sExpress)
  {
    int lMatch = 1;

    if (lastOnly)
      {
        if (sExpress.Length () )
            lMatch = 0;
      }
    else
      { 
        if (NextAtom)
            lMatch = ((*NextAtom) == sExpress);
      }    
    return lMatch;        
  }

//------------------------------------------------------------------
//
//  Routine : DumpParameters
//
//  Function : travers the atom tree, creating a list of the
//             meaningful parameters.
//
//------------------------------------------------------------------

void gpRegExp::DumpParameters (StringList &lsParms)
  {
    if (nDoICount)
        lsParms.AddItem (new gpString (LastMatch () ));  
      
    if (NextAtom)
        NextAtom->DumpParameters (lsParms);
  }

//------------------------------------------------------------------
// The following code implements a List of gpRegExp's
// We keep it here because the Optional atoms use it.
//------------------------------------------------------------------

RegExpList::RegExpList ( ): List ()
  {
    pFirst = pLast = pCurrent = 0;
  }
         
RegExpList::~RegExpList ( )
  {
    for (gpRegExp *eRegExp = Reset ();
         eRegExp;
         eRegExp = GetNext ()
        )
         delete eRegExp;            
  }
  
gpRegExp *RegExpList::Seek (gpString &sName)
  {
    int lFound = 0;

    Reset ();
    while (!lFound && pCurrent)
      {
        if (*(Peek ()) == sName )
            lFound = 1;
        else
            GetNext ();
      }
   
    return pCurrent;
  }

gpRegExp *RegExpList::Seek (char *cName)
  {
    gpString sName (cName);
    return (Seek (sName) );
  }

gpRegExp *RegExpList::Reset ( ) 
  {
    return (pFirst = pCurrent = (gpRegExp *)List::Reset () );
  }

gpRegExp *RegExpList::GetNext ( ) 
  {
    return (pCurrent = (gpRegExp *)List::GetNext () );
  }

gpRegExp *RegExpList::AddItem (gpRegExp *eNew) 
  {
    pLast = pCurrent = (gpRegExp *)List::AddItem (eNew);
        
    if (!pFirst)
        pFirst = pLast;
        
    return (pCurrent);  
  }

gpRegExp *RegExpList::Peek ( ) 
  {
    return pCurrent;
  } 

gpRegExp *RegExpList::Seek (int nSequence)
  {
    if (nListSize < nSequence)
        return 0;
    Reset (); 
    for (int i = 1; i < nSequence; GetNext (), i++);
    return pCurrent;      
  }

gpRegExp &RegExpList::operator[] (int nSequence)
  {
    return (*(Seek (nSequence)));
  }

void RegExpList::Clear ( )
  {
    for (gpRegExp *eRegExp = Reset ();
         eRegExp;
         eRegExp = GetNext ()
        )
         delete eRegExp;
    List::Clear ();
    pFirst = pLast = pCurrent = 0;
  }  
