{$A+,B-,D-,E-,F-,G-,I-,L-,N-,O-,R-,S-,V+,X+}
{$M 6144,8192,655360}
Program Words;
{ WORDS - A word extracter program.  Copyright 1990,91 by Edwin T. Floyd. }
Uses Dos, Crt, Token, PairHeap;

Const
  WordChar = ['a'..'z','A'..'Z']; { Default WordSet }
  DefaultOutput = '';             { Default output filename (''=stdout) }
  BufSize = 4096;                 { I/O buffer size }

Type
  SetOpType = (Union, Intersection, Complement);
  SetOfChar = Set Of Char;
  SortEntryType = Object(HeapEntry)
  { Data structure used for sorting }
    Token : Word;
  End;
  SortHeapType = Object(Heap)
  { PairHeap compare function override }
    Function Less(Var x, y : HeapEntry) : Boolean; Virtual;
  End;
  FileEntryPtr = ^FileEntry;
  FileEntry = Record
  { Input file name list entry }
    NextFile : FileEntryPtr;
    FileName : PathStr;
  End;

Const
  FileList : FileEntryPtr = Nil;       { File name list (head) }
  LastFile : FileEntryPtr = Nil;       { File name list (tail) }
  HashTab : PToken = Nil;              { Hash table pointer }
  TestTab : PToken = Nil;              { Test hash table pointer }
  WordCount : LongInt = 0;             { Total number of words examined }
  ReturnCode : Word = 0;               { Return code for Halt }
  WordSet : SetOfChar = WordChar;      { Words are made of these }
  SetOp : SetOpType = Union;           { Set operation }
  Alphabetize : Boolean = False;       { If true, sort output words }
  LowerCase : Boolean = False;         { If true, case is significant }
  HighOrder : Boolean = False;         { If true, clear high-order bits }
  SuppressOutput : Boolean = False;    { If true, do not write output file }
  OutOfMemory : Boolean = False;       { Set true by HandleHeapError }
  Aborted : Boolean = False;           { True if operator aborted }
  OutName : PathStr = DefaultOutput;   { Output file name }

Var
  OldMem : LongInt;                    { Original value of MemAvail }
  SortHeap : SortHeapType;             { Sorter object }
  TextFile : File;                     { Input/Output file }
  TextBuf : Array[1..BufSize] Of Char; { I/O buffer for TextFile }

{$S+}

Function ProcessParameter(s : String) : Boolean; Forward;

Function ParseParamString(s : String) : Boolean;
{ Extract parameters from a string and process them; return True if all OK. }
Var
  i, j : Word;
  ParamsOk : Boolean;
Begin
  ParamsOk := True;
  While (s <> '') And (s[Length(s)] = ' ') Do Dec(s[0]);
  While s <> '' Do Begin
    i := 1;
    While (i <= Length(s)) And (s[i] = ' ') Do Inc(i);
    j := Succ(i);
    While (j <= Length(s)) And (s[j] <> ' ') Do Inc(j);
    If Not ProcessParameter(Copy(s, i, j - i)) Then ParamsOk := False;
    Delete(s, 1, Pred(j));
  End;
  ParseParamString := ParamsOk;
End;

Function ProcessParameter(s : String) : Boolean;
{ Process command line parameter or file name; return True if OK. }
Var
  ThisFile : FileEntryPtr;
  IncludeFile : Text;
  ParamOk : Boolean;
  i, j : Word;
  IoRes : Integer;

  Procedure GetFiles(Var s : String);
  Var
    Path : PathStr;
    Dir : DirStr;
    Name : NameStr;
    Ext : ExtStr;
    Search : SearchRec;
  Begin
    Path := FExpand(s);
    FSplit(Path, Dir, Name, Ext);
    FindFirst(Path, Archive, Search);
    If DosError <> 0 Then Begin
      WriteLn('No files match ', s);
      ParamOk := False;
    End;
    While DosError = 0 Do Begin
      Path := Dir + Search.Name;
      ThisFile := FileList;
      While (ThisFile <> Nil) And (ThisFile^.FileName <> Path) Do
        ThisFile := ThisFile^.NextFile;
      If ThisFile = Nil Then Begin
        New(ThisFile);
        If ThisFile <> Nil Then Begin
          With ThisFile^ Do Begin
            NextFile := Nil;
            FileName := Path;
          End;
          If LastFile = Nil Then FileList := ThisFile
          Else LastFile^.NextFile := ThisFile;
          LastFile := ThisFile;
        End;
      End Else WriteLn('Already in list: ', Path);
      FindNext(Search);
    End;
  End;

Begin
  ParamOk := True;
  If (s[1] = '-') Or (s[1] = '/') Then Case UpCase(s[2]) Of
    'U' : SetOp := Union;
    'I' : SetOp := Intersection;
    'C' : SetOp := Complement;
    'A' : If s[3] = '-' Then Alphabetize := False Else Alphabetize := True;
    'L' : If s[3] = '-' Then LowerCase := False Else LowerCase := True;
    'H' : If s[3] = '-' Then HighOrder := False Else HighOrder := True;
    'O' : Begin
      Delete(s, 1, 2);
      For i := 1 To Length(s) Do s[i] := UpCase(s[i]);
      If (s <> '') And ((s[1] = '-') Or (s = 'NUL')) Then Begin
        SuppressOutput := True;
        OutName := '-';
      End Else Begin
        SuppressOutput := False;
        If s = '' Then OutName := s Else OutName := FExpand(s);
      End;
    End;
    'W' : Begin
      Delete(s, 1, 2);
      Case s[1] Of
        '+' : ;
        '-' : WordSet := [];
        Else Begin
          WriteLn('WordSet (-W) option must be followed by + or -.');
          ParamOk := False;
        End;
      End;
      Delete(s, 1, 1);
      For i := 1 To Length(s) Do
        WordSet := WordSet + [s[i]];
    End;
    Else Begin
      WriteLn('Unrecognized option: ', s);
      ParamOk := False;
    End;
  End Else If s[1] = '@' Then Begin
    Delete(s, 1, 1);
    For i := 1 To Length(s) Do s[i] := UpCase(s[i]);
    Assign(IncludeFile, s);
    Reset(IncludeFile);
    IoRes := IoResult;
    If IoRes = 0 Then Begin
      WriteLn('Processing include file ', s);
      Repeat
        ReadLn(IncludeFile, s);
        IoRes := IoResult;
        If IoRes = 0 Then If Not ParseParamString(s) Then ParamOk := False;
      Until Eof(IncludeFile) Or (IoRes <> 0);
      If IoRes <> 0 Then Begin
        WriteLn('Error ', IoRes, ' reading include file');
        ParamOk := False;
      End;
      Close(IncludeFile);
      IoRes := IoResult;
    End Else Begin
      WriteLn('Error ', IoRes, ' opening include file ', s);
      ParamOk := False;
    End;
  End Else GetFiles(s);
  ProcessParameter := ParamOk;
End;

Procedure ParseParams;
{ Interpret environment and command line parameters; display Help info. }
Var
  i, j : Word;
  ParamsOk : Boolean;
  Ch : Char;
  s : String;
Begin
  WriteLn('WORDS v1.2 - A word extractor program.  Copyright (c) 1990,91 by Edwin T. Floyd.');
  ParamsOk := True;
  If Not ParseParamString(GetEnv('WORDS')) Then Begin
    WriteLn('Error found in SET WORDS=.. environment string');
    ParamsOk := False;
  End;
  For i := 1 To ParamCount Do Begin
    FillChar(s[1], 255, ' ');
    s := ParamStr(i);
    If Not ProcessParameter(s) Then ParamsOk := False;
  End;
  If Not ParamsOk Then Begin
    WriteLn('At least one parameter was in error.  Run WORDS with no parameters');
    WriteLn('to see documentation.');
    Halt(1);
  End Else If FileList = Nil Then Begin
    WriteLn;
    WriteLn('  WORDS filenames.. [-U/-I/-C] [-A] [-L] [-H] [-W[+/-]abc..] [-Oname] [@name]' );
    WriteLn;
    WriteLn('All command line parameters are separated by spaces.  Input text filenames');
    WriteLn('and options may be intermixed; options are distinguished by a leading hyphen:');
    WriteLn;
    WriteLn('  -U, -I or -C specifies the set operation to be performed on the extracted');
    WriteLn('  words from the files.  The operations are:');
    WriteLn('    -U Union:        Keep all unique words from any input file (default);');
    WriteLn('    -I Intersection: Keep unique words common to all files;');
    WriteLn('    -C Complement:   Keep unique words from second and subsequent files only');
    WriteLn('                     if they are not contained in the first file.');
    WriteLn('  -A[-] Sort output words alphabetically (default off).');
    WriteLn('  -H[-] Clear high-order bits on input file (i.e. WordStar, default off).');
    WriteLn('  -L[-] Lower case is significant (default off).');
    WriteLn('  -W-abc.. Replace the word character set with the indicated characters');
    WriteLn('     (default is all alphabetic characters, upper and lower case).');
    WriteLn('  -W+abc.. Add additional characters to the word character set.');
    WriteLn('  -O[name] Name the output file (default is name omitted => stdout).');
    WriteLn('  -O- Suppress output (counts are still displayed on screen).');
    WriteLn;
    WriteLn('The "@" prefixes the name of an ASCII include file which may contain');
    Write('filenames, options, and nested include files, in any order.    ');
    Ch := ReadKey;
    WriteLn;
    WriteLn;
    WriteLn('You may use the DOS "SET" command to specify default parameters.  Examples:');
    WriteLn;
    WriteLn('  SET WORDS=-U -A+ -L+ -Owords.out -W-ABCDEFGHIJKLMNOPQRSTUVWXYZ');
    WriteLn('  SET WORDS=@defaults.wrd -O');
    WriteLn;
    WriteLn('Command line parameters override "SET" parameters.  WORDS examples:');
    WriteLn;
    WriteLn('  WORDS oldwords.lst document.txt -W+-'' -C -Onewwords.lst');
    WriteLn('  WORDS @filename.lst -I -Oallwords.txt');
    WriteLn('  WORDS file1.txt -A+ -U -L- -O | nextprog');
    WriteLn;
    WriteLn('WORDS was written by:');
    WriteLn;
    WriteLn('  Edwin T. Floyd         [76067,747]  (CompuServe)');
    WriteLn('  #9 Adams Park Court    404/576-3305 (work)');
    WriteLn('  Columbus, GA 31909     404/322-0076 (home)');
    Halt(0);
  End Else Begin
    Case SetOp Of
      Union : s := '-U';
      Intersection : s := '-I';
      Complement : s := '-C';
    End;
    If Alphabetize Then ch := '+' Else ch := '-';
    s := s + ' -A' + ch;
    If LowerCase Then ch := '+' Else ch := '-';
    s := s + ' -L' + ch;
    If HighOrder Then ch := '+' Else ch := '-';
    s := s + ' -H' + ch;
    OldMem := MemAvail;
    WriteLn('Options: ', s, ' -O', OutName, ', ',
      OldMem Shr 10, 'k free.');
    WriteLn('Press <Esc> to stop.');
  End;
End;

{$S-}

Function SortHeapType.Less(Var x, y : HeapEntry) : Boolean;
{ Sort compare function override }
Var
  xx : SortEntryType Absolute x;
  yy : SortEntryType Absolute y;
Begin
  Less := HashTab^.TokenAddress(xx.Token)^ < HashTab^.TokenAddress(yy.Token)^;
End;

Function ParseInputBlock(Len : Word) : Word;
{ Insert words from input block into hash table }
Var
  Words : Word;
  t : TokenString;
  i, Toss : Word;
Begin
  i := 1;
  Words := 0;
  While i <= Len Do Begin
    t := '';
    While (i <= Len) And Not (TextBuf[i] In WordSet) Do Inc(i);
    If i <= Len Then Begin
      While (i <= Len) And (Length(t) < TokenStringSize)
      And (TextBuf[i] In WordSet) Do Begin
        Inc(t[0]);
        If LowerCase Then t[Ord(t[0])] := TextBuf[i]
        Else t[Ord(t[0])] := UpCase(TextBuf[i]);
        Inc(i);
      End;
      Inc(Words);
      Case SetOp Of
        Union : Toss := HashTab^.TokenInsertText(t);
        Intersection : If (TestTab <> Nil) And (TestTab^.TextToken(t) <> 0) Then
          Toss := HashTab^.TokenInsertText(t);
        Complement : If (TestTab <> Nil) And (TestTab^.TextToken(t) = 0) Then
          Toss := HashTab^.TokenInsertText(t);
      End;
    End;
  End;
  ParseInputBlock := Words;
End;

Procedure ProcessNextFile;
{ Open and process the next input file pointed to by FileList. }
Var
  ThisFile : FileEntryPtr;
  TempTab : PToken;
  FileWords : LongInt;
  i, MaxLen, Len : Word;
  FileResult : Integer;
Begin
  ThisFile := FileList;
  With ThisFile^ Do Begin
    Write(FileName, ': ');
    Assign(TextFile, FileName);
    Reset(TextFile, 1);
    FileResult := IoResult;
    If FileResult = 0 Then Begin
      If HashTab = Nil Then New(HashTab, Init);
      Len := 0;
      FileWords := 0;
      Repeat
        BlockRead(TextFile, TextBuf[Succ(Len)], BufSize-Len, i);
        FileResult := IoResult;
        If FileResult = 0 Then Begin
          MaxLen := Len + i;
          If HighOrder Then For i := Succ(Len) To MaxLen Do
            TextBuf[i] := Chr(Ord(TextBuf[i]) And $7F);
          Len := MaxLen;
          If Not Eof(TextFile) Then Begin
            While (Len > 0) And (TextBuf[Len] In WordSet) Do Dec(Len);
            If (Len = 0) Then Len := MaxLen;
          End;
          FileWords := FileWords + ParseInputBlock(Len);
          MaxLen := MaxLen - Len;
          If MaxLen > 0 Then
            Move(TextBuf[Succ(Len)], TextBuf[1], MaxLen);
          Len := MaxLen;
          Write(^M, FileName, ': ', FileWords, ' words, ',
            HashTab^.TokMaxToken, ' kept, ', (OldMem-MemAvail) Shr 10, 'k');
          While KeyPressed Do If ReadKey = ^[ Then Aborted := True;
        End;
      Until Eof(TextFile) Or (FileResult <> 0) Or OutOfMemory Or Aborted;
      Close(TextFile);
      WriteLn(^M, FileName, ': ', FileWords, ' words, ',
        HashTab^.TokMaxToken, ' kept, ', (OldMem-MemAvail) Shr 10, 'k');
      WordCount := WordCount + FileWords;
    End Else WriteLn('Unable to open input file ', FileName);
    If FileResult <> 0 Then Begin
      WriteLn('Error ', FileResult);
      Inc(ReturnCode);
    End;
    FileList := NextFile;
    If SetOp = Intersection Then Begin
      TempTab := TestTab;
      TestTab := HashTab;
      HashTab := TempTab;
      If HashTab <> Nil Then Begin
        Dispose(HashTab, Done);
        HashTab := Nil;
      End;
    End;
  End;
  Dispose(ThisFile);
End;

Procedure ProcessFirstFile;
{ Process the first input file. }
Var
  TempTab : PToken;
  Op : SetOpType;
Begin
  Op := SetOp;
  SetOp := Union;
  ProcessNextFile;
  SetOp := Op;
  If SetOp In [Intersection, Complement] Then Begin
    TempTab := TestTab;
    TestTab := HashTab;
    HashTab := TempTab;
  End;
End;

Procedure SortWords;
{ Write words to output file, optionally sorted. }
Var
  SortEntry : ^SortEntryType;
  FileResult : Integer;
  i : Word;
  OutFile : Text;
Begin
  If SuppressOutput Then WriteLn('Output suppressed') Else Begin
    Assign(OutFile, OutName);
    SetTextBuf(OutFile, TextBuf);
    ReWrite(OutFile);
    FileResult := IoResult;
    If FileResult = 0 Then Begin
      If Alphabetize Then With SortHeap Do Begin
        Init;
        For i := 1 To HashTab^.TokMaxToken Do Begin
          New(SortEntry);
          If SortEntry <> Nil Then Begin
            SortEntry^.Token := i;
            Insert(SortEntry^);
          End;
        End;
        If OutOfMemory Then Begin
          WriteLn('Sort suppressed due to insufficient memory');
          Alphabetize := False;
          Inc(ReturnCode);
        End;
      End;
      If Alphabetize Then With SortHeap Do Begin
        Write('Sorting and writing ', EntryCount, ' words to ');
        If OutName = '' Then Write('<stdout>') Else Write(OutName);
        WriteLn(', ', (OldMem-MemAvail) Shr 10, 'k');
        For i := 1 To EntryCount Do Begin
          SortEntry := DeleteLowEntry;
          If FileResult = 0 Then Begin
            WriteLn(OutFile, HashTab^.TokenAddress(SortEntry^.Token)^);
            FileResult := IoResult;
          End;
        End;
      End Else Begin
        Write('Writing ', HashTab^.TokMaxToken, ' words to ');
        If OutName = '' Then WriteLn('<stdout>') Else WriteLn(OutName);
        For i := 1 To HashTab^.TokMaxToken Do If FileResult = 0 Then Begin
          WriteLn(OutFile, HashTab^.TokenAddress(i)^);
          FileResult := IoResult
        End;
      End;
      If FileResult <> 0 Then Begin
        WriteLn('Error ', FileResult, ' writing file ', OutName);
        Inc(ReturnCode);
      End;
      Close(OutFile);
      FileResult := IoResult;
      If FileResult <> 0 Then Begin
        WriteLn('Error ', FileResult, ' closing file ', OutName);
        Inc(ReturnCode);
      End;
    End Else WriteLn('Error ', FileResult, ' opening file ', OutName);
  End;
End;

{$F+}
Function HandleHeapError(Size : Word) : Integer;
Begin
  If Size > 0 Then Begin
    HandleHeapError := 1;
    OutOfMemory := True;
  End;
End;
{$F-}

Begin
  FileMode := $40;
  HeapError := @HandleHeapError;
  OldMem := MemAvail;
  ParseParams;
  ProcessFirstFile;
  While (FileList <> Nil) And Not (OutOfMemory Or Aborted) Do ProcessNextFile;
  If OutOfMemory Then Begin
    WriteLn('Input file processing terminated due to insufficient memory');
    WriteLn('Words collected so far will be written to output file');
    Inc(ReturnCode);
  End;
  If Aborted Then Begin
    WriteLn('File processing aborted by operator');
    SuppressOutput := True;
    Inc(ReturnCode);
  End;
  If SetOp = Intersection Then Begin
    HashTab := TestTab;
    TestTab := Nil;
  End Else If Alphabetize And Not SuppressOutput Then Begin
    WriteLn('Maximizing free memory for sort');
    If TestTab <> Nil Then Dispose(TestTab, Done);
    TestTab := Nil;
  End;
  WriteLn('Final Counts: ', WordCount, ' words examined, ',
    HashTab^.TokMaxToken, ' kept, ', (OldMem-MemAvail) Shr 10, 'k in use');
  OutOfMemory := False;
  SortWords;
  WriteLn('Done!');
  Halt(ReturnCode);
End.
