Program Scan;
{$D-}
{  SCAN.PAS - A utility to scan binary files for text strings.

Usage:  SCAN [-<offswitches>] [+<onswitches>] [input file] [output file]
where:    <offswitches> is the list of switches to turn off,
          <onswitches> is the list of switches to turn on, and
          input file and output file are the source and dest. files.
          To specify an output file you must give an input file.
          If either is not specified, SCAN will use stdin/stdout so
          you can also use command-line redirection.

or SCAN ? 
     for a list of switches.

For a complete list of switches, see the accompanying documentation.

Switches are processed left-to-right both within and between + and - groups.

Notes:  The default minimum string length is 4 characters.  The maximum
possible string length is 255 and any which run over will be cut into 255-
har lengths.

This program is public domain.  Knock yourself out.
}

uses Ecase;  { International version of UpCase }

type
     SwitchList = record
          DispEsc, DispTab, ConvTab, DispCR, DispLF, DispFF, AllowNull,
          DispNull, ForceNull, StripLead, StripTrail, HighOk, GraphicOk, 
          ForeignOk, Extended, StripHiIn, ConvSpace, UpperCase, English, 
          ConvBslash, ConvHiNum, ConvHiDot, ShortLine: boolean;
     end;

     filename = string[80];      {Input/output files if not stdin/stdout}
     msgstring = string[75];
     HighChars = #128..#255;
     Printset = set of HighChars;
var
     ifile, ofile: filename;
     Sw: SwitchList;
     MinLen: byte;
     Infile: file of char;
     Outfile: text;

Const
     ForeignSet: PrintSet = [#128..#154, #160..#165, #168, #173..#175];
     GraphicSet: PrintSet = [#176..#223];
     HibitSet: PrintSet = [#128..#255];

     Short = 72;         { Short line length S+ }
     Long = 255;         { Long line length S- }
     SpaceReplace = '_'; { Char to replace Space on ConvSpace }
     DotReplace = '.';   { Char to replace hi-bit on ConvHiDot }
     DefMin = 4;         { Default minimum line length }
     DefInput = '';      { Default input file (console)}
     DefOutput = '';     { Default output file (console)}

     Copyright1: MsgString = 
' SCAN Version 2.1 05-Jul-88 by Kenneth Herron. Placed in the public domain.';
{=============================}
procedure DoHelp;

begin
     writeln(Copyright1);
     writeln;
     writeln('Usage: SCAN [-off] [+on] [infile [outfile]]');
     writeln('Switches are:');
     writeln('$  Translate ESC to \$    \  Translate \ to \\');
     writeln('C  Translate CR to \C     L  Translate LF to \L');
     writeln('F  Translate FF to \F     @  Translate TAB to \T');
     writeln('T  Make TAB printable     H  Make ascii 128-255 printable');
     writeln('0  Make NULL printable    ?  Make foreign chars printable');
     writeln('!  Str must end in NULL   G  Make graphic chars printable');
     writeln('%  Strip hi bit (input)   E  Str must have vowel & consonant');
     writeln('{  Strip leading spaces   }  Strip trailing spaces');
     writeln('U  Upper-case output      B  Translate space to ', SpaceReplace);
     writeln('.  Convert hi-bit to ', DotReplace, 
                                   '    #  Convert hi-bit to ASCII');
     writeln('S  Max string length is (-)', Long:3,
             ' or (+)', Short:2,' characters');
     writeln;
     halt;
end;

{=============================}
procedure SetSwitches(var Ifile, Ofile: Filename; 
                      var SW: Switchlist; var MinLen: byte);

var
     T: filename;
     H, I: byte;
     J: integer;
     Setting: boolean;

begin
{ Set default switches }
fillchar(SW, SizeOf(SW), false);
with sw do
begin
     DispTab := true;
     StripLead := true;
     ShortLine := true;
end;
for H := 1 to ParamCount do
begin
     T := paramstr(H);
     if (T[1] = '+') or (T[1] = '-') then
     begin
          Setting := T[1] = '+';
          for I := 2 to length(T) do
          with sw do
          case upcase(T[I]) of
          'B': ConvSpace := setting;
          'E': English := setting;
          '\': ConvBslash := setting;
          '$': begin
                    DispEsc := setting;
                    ConvBslash := setting or ConvBslash;
               end;
          'T': begin
                    DispTab := setting;
                    ConvBslash := setting or ConvBslash
               end;
          '@': begin
                    ConvTab := setting;
                    DispTab := setting;
                    ConvBslash := setting or ConvBslash
               end;
          '0': begin
                    DispNull := setting;
                    ConvBslash := setting or ConvBslash
               end;
          '!': ForceNull := setting;
          '{': StripLead := setting;
          '}': StripTrail := setting;
          'H': begin
                    HighOk := setting;
                    GraphicOk := (not setting) and GraphicOk;
                    ForeignOk := (not setting) and ForeignOk
               end;
          '?': begin
                    ForeignOk := setting;
                    HighOk := (not setting) and HighOk
               end;
          'G': begin
                    GraphicOk := setting;
                    HighOk := (not setting) and HighOk
               end;
          'U': UpperCase := setting;
          'C': begin
                    DispCr := setting;
                    ConvBslash := setting or ConvBslash
               end;
          'L': begin
                    DispLf := setting;
                    ConvBslash := setting or ConvBslash
               end;
          'F': begin
                    DispFF := setting;
                    ConvBslash := setting or ConvBslash
               end;
          '#': begin
                    ConvHiNum := setting;
                    ConvBslash := setting or ConvBslash
               end;
          '.': ConvHiDot := setting;
          '%': StripHiIn := setting;
          'S': ShortLine := setting;
          '1'..'9':
               MinLen := ord(T[I]) and 15;
          end
     end
     else { File name }
          if ifile = '' then
               ifile := T
          else
          if ofile = '' then
               ofile := T
end;

{perform some housekeeping}
with SW do
begin
     if StripHiIn then
     begin
          HighOk := false;
          ForeignOk := false;
          GraphicOk := false
     end;
     AllowNull := DispNull or ForceNull;
     Extended := HighOk or ForeignOk or GraphicOk
end;
end;  {procedure SetSwitches}
{=============================}
procedure Process;

type
     MaxString = string[255];

var
     Len: byte;               { Max Length of a string }
     Str: MaxString;
     ch: char;
     Printable: PrintSet;
     StopStr: boolean;
     HighValid: PrintSet;

Function Validate(var Str: maxstring): boolean;

{ check any built strings to see if they shouldn't be printed for
some reason.  Currently two options are checked--E (must contain a
consonant & vowel) and ! (must end in NULL).  Strings may also be
rejected for being too short but we don't check that here. }

var foundc, foundv: boolean;
    I: byte;

begin
     if sw.ForceNull and (str[length(str)] <> #0) then
          Validate := false
     else
     if SW.English then 
     begin
     { routine to check the string for >= one consonant
       & >= one vowel }
          foundc := false;
          foundv := false;
          I := 1;
          repeat
               foundv := foundv or (upcase(str[I]) in 
                    ['A','E','I','O','U','Y']);
               foundc := foundc or (upcase(str[I]) in 
                    ['B'..'D','F'..'H','J'..'N','P'..'T','V'..'Z']);
               inc(I)
          until (foundv and foundc) or (I > length(str));
          Validate := foundv and foundc
     end
     else Validate := true
end;

procedure massage(var str: maxstring);

{ Perform changes to the string which can be most efficiently done
  all at once.  Currently we strip leading & trailing blanks, remove
  high bits, uppercase letters, and convert spaces to '_'s.
}

var I, First, Last: byte;

begin
with sw do
begin
     First := 1;
     if StripLead then  {leading spaces}
          while str[First] = ' ' do inc(First);
     Last := length(str);
     if StripTrail then  {trailing spaces}
          while str[Last] = ' ' do dec(Last);
     if StripLead or StripTrail then
          if Last < First then 
               Str := ''
          else
               Str := copy(Str, First, (Last - first) + 1);
     if UpperCase or ConvSpace then
     for I := 1 to length(str) do
          if UpperCase then
               Str[I] := UpCase(Str[I])
          else
               if Str[I] = ' ' then Str[I] := SpaceReplace;
end
end;

procedure print(var str: maxstring);

var I: byte;

begin
for I := 1 to length(Str) do
begin
     case str[I] of
          ' '..'[',
          ']'..'`',
          '{'..'~': write(Outfile, str[I]);
          #128..#255: if sw.ConvHiDot then
                         write(Outfile, '.')
                    else if sw.ConvHiNum then
                         write(Outfile, '\', ord(str[I]):3)
                    else
                         write(Outfile, str[I]);
          #9:       if sw.ConvTab then
                         write(Outfile, '\T')
                    else
                         write(Outfile, #9);
          '\':      if sw.ConvBslash then
                         write(Outfile, '\\')
                    else
                         write(Outfile, '\');
          #27:      write(Outfile, '\$');
          #13:      write(Outfile, '\C');
          #10:      write(Outfile, '\L');
          #12:      write(Outfile, '\F');
          #0:       if sw.DispNull then write(Outfile, '\0');
          else write(Outfile, str[I])
     end;  { case }
end;
writeln(Outfile)
end;

begin
{ set up the high-character set }
if sw.Extended then
begin
     if sw.HighOk then 
          HighValid := HibitSet
     else
          HighValid := [];
     if sw.ForeignOk then
          HighValid := HighValid + ForeignSet;
     if sw.GraphicOk then
          HighValid := HighValid + GraphicSet
end;

{ Set up the max string length }
if sw.ShortLine then
     Len := Short
else
     Len := Long;

while not eof(infile) do
begin
     { set up to read one string }
     StopStr := false;
     Str := '';
     repeat
          read(infile, ch);
          if sw.StripHiIn then
               ch := char(byte(ch) and $7f);
          if 
          ((ch >= ' ') and (ch <= '~')) or             { printable chars }
          (Sw.DispTab and (ch = #9))    or             { tab }
          (Sw.DispEsc and (ch = #27))   or             { escape }
          (Sw.DispCR and (ch = #13))    or             { carriage ret. }
          (Sw.DispLF and (ch = #10))    or             { line feed }
          (Sw.DispFF and (ch = #12))    or             { form feed }
          (Sw.AllowNull and (ch = #0))  or             { null }
          (sw.Extended and (ch in HighValid)) then     { extended set }
          begin
               Str := str + ch;
               Stopstr := (length(str) = Len) or (ch = #0)
          end
          else
               StopStr := true;
     until stopstr or eof(infile);
     if (length(str) >= MinLen) and Validate(str) then
     begin
          massage(str);
          if length(str) > 0 then print(str);
     end
end { while block }
end;
{=============================}
begin {main}
if (paramcount = 1) and (paramstr(1) = '?') then DoHelp;
     { DOHELP halts when it's finished }
Ifile := DefInput;
Ofile := DefOutput;
Minlen := DefMin;
SetSwitches(Ifile, Ofile, Sw, MinLen);
assign(infile, ifile);
assign(Outfile, ofile);
FileMode := 0;   {read-only}
{$I-}
reset(infile);
if IOResult <> 0 then
begin
     if Ifile = '' then Ifile := 'standard input';
     writeln('Couldn''t open ', Ifile);
     halt(1)
end;
rewrite(Outfile);
if IOResult <> 0 then
begin
     if Ofile = '' then Ofile := 'standard output';
     writeln('Couldn''t open ', Ofile);
     halt(2)
end;
{$i+}
Process;
close(infile);
close(Outfile)
end.
