/* TDS v1.0 - Transliteration, Deletion, and Squeeze filter
 * Copyright 1990 by Edward Lee 
 * edlee@chinet.chi.il.us
 *
 * TDS is a fast superset of the UNIX Sys V(tm) TR program.
 *
 * Suggested compilation:
 *     MSDOS Turbo C v2.0:  tcc -K -O -f- -mt -lt tds.c
 *                  Sys V:  cc -O -s tds.c -o tds
 * Sys V with shared libs:  cc -O -s tds.c -o tds -lc_s
 *
 * This program uses a prefix string length indicator to allow ALL characters
 * to be handled.  Examine the MYSTR type definition if you do not know what 
 * is meant by a prefix string length indicator.  This idea is borrowed from
 * Texas Instruments by the author of this program, and it is implemented
 * here in a more general way.  As far as I know, the concept previously had
 * no name.  The advantage of using a prefix string length indicator is that
 * it requires no special character to terminate a string.
 */

/*
* PROGRAM HISTORY
*~01:00-04:20 08Jun1990, Program performs multiple char translation
* 16:15-18:33 ", Added option switch processing
* 10:00-11:11 09Jun, Added -c)omplement operation in setup_t()
* 12:20-12:33 ", Added -s)queeze operation in main()
* 12:33-13:33 ", Moved -c)omplement operation to its own routine
* 23:00-23:30 ", Added -d)elete operation in main()
* 23:30-23:52 ", Tied the various options together
* 21:31-22:46 10Jun, Changed prefixed string type into a structure
* 16:50-17:19 11Jun, Parenthesized conditional, corrected off-by-one error
* 17:19-18:56 ", Induced and incorporated undocumented length rules
* 18:56-17:42 ", Cleaned up unneeded variables, tested program
* 01:05-01:20 14Jun, Added code for -ds option combination
* 02:30-02:42 Optimized -ds code
*       01:55 17Jun, Added #ifndef ushort, compiled under Turbo C v2.0
* 15:30-16:56 23Aug, Began to incorporate esch() and range() routines which
*                    were developed and tested separately over four days 
* 01:15-01:39 24Aug, Modified range() for variable parameters
* 01:00-03:06 27Aug, Revised comments, checked code consistency & compromised
* 15:34-18:32 29Aug, Modifed range() and range2() for MYSTR, added error msgs,
*                    compiled, tested, corrected off-by-one error in range2(),
* 03:17-03:25 01Sep, Corrected a change in getopts() which introduced an
*                    indefinite pointer condition.
*       02:40 25Nov, Corrected the update of s2->i[] in setup_t()
*       20:34 25Nov, Cleaned up unused variables, unneeded brackets
*       11:25 21Dec, Test
*~23:30-23:47 21Dec, Deleted unused function, minor loop optimization, test
*~12:00-12:45 22Dec, Deleted unnecessary complications in setup_t, test,
*                    optimized (delete && squeeze) section, test
*~21:45-22:07 22Dec, Optimized setup_t(), test
*~      11:20 23Dec, Cleanup, test
*       11:20 26Dec, Replaced char with unchar to prevent crashes on *nix
*                    systems; spent two days tracking this down
*                    Deleted range() and replaced function with range2()
*       14:15 26Dec, Final test before release
 */

#include <stdio.h>

#ifndef __TURBOC__
#include <sys/types.h>
#endif

#ifndef unchar
#define unchar unsigned char
#endif

#ifndef ushort
#define ushort short unsigned int
#endif

#define SIZE (32768)
#define TRUE (1)
#define FALSE (0)

#define MAXMYSLEN (256)
typedef struct {
          int    len;
          unchar s [MAXMYSLEN];
          ushort i [256];         /* Map of unique characters in {string}, */
                                  /* a.k.a the i)ntersection of {string}   */
                                  /* with the character set {0...255} */
        } MYSTR;

unchar a[SIZE];                   /* Input/Output character buffer */

unchar t[] = {   /* Translation table for character codes 0-255 */
  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
 10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
 20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
 30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
 40,  41,  42,  43,  44,  45,  46,  47,  48,  49,
 50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
 60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
 70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
 80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
 90,  91,  92,  93,  94,  95,  96,  97,  98,  99,
100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
250, 251, 252, 253, 254, 255
};  /* Translation table for character codes 0-255 */

int delete, invert, squeeze=FALSE;      /* Boolean variables */


void error(s)
 unchar *s;
{
  (void)fputs(s, stderr);
  (void)fputs("\n", stderr);
  exit(1);
} /* error */


/* Routine to decode backslash escape codes at run-time */
int esch(p)
 unchar **p;
{
int n;

  if (**p != '\\') {              /* Backslash escape code? */
     n=(int)**p;                  /* No */

     if (**p=='\000')             /* Real end-of-string? */
        return(-1);               /* Yes */

     ++(*p);                      /* Some other character */
     return(n);
  }

  ++(*p);                         /* Skip over '\'      */

  /* Octal escape character */
  if (**p >= '0' && **p <= '7') {
     n = (**p-48);

     ++(*p);

     if (**p >= '0' && **p <= '7') {
        n = (n<<3) + (**p-48);       /* (n<<3) = (n*8) */

        ++(*p);

        if (**p >= '0' && **p <= '7') {
           n = (n<<3) + (**p-48);
           ++(*p);
        }
     }

     if (n>255)
        error ("tds: the maximum octal escape code is \\377");

     return(n);
  }  /* Octal */

  /* Decimal escape character */
  if (**p == 'd' || **p == 'D') {
     ++(*p);

     if (**p >= '0' && **p <= '9') {
        n = (**p-48);

        ++(*p);
 
        if (**p >= '0' && **p <= '9') {
           n = n*10 + (**p-48);

           ++(*p);

           if (**p >= '0' && **p <= '9') {
              n = n*10 + (**p-48);
              ++(*p);
           }
        }

        if (n>255)
           error ("tds: the maximum decimal escape code is \\d255");

        return(n);
     }
  }  /* Decimal */

  /* Hexadecimal escape character */
  if (**p=='h' || **p=='H' || **p=='x' || **p=='X') {
     ++(*p);

     if((**p >= '0' && **p <= '9') ||
        (**p >= 'A' && **p <= 'F') || (**p >= 'a' && **p <= 'f')) {

        if (**p <= '9')              /* Make ASCII hex digit into an integer */
           n = (**p-48);             /* 48 = ((int)'0') */
        else
        if (**p <= 'F')
           n = (**p-55);             /* 55 = ((int)'A' - 10) */
        else
           n = (**p-87);             /* 87 = ((int)'a' - 10) */

        ++(*p);

        if((**p >= '0' && **p <= '9') ||
           (**p >= 'A' && **p <= 'F') || (**p >= 'a' && **p <= 'f')) {

           if (**p <= '9')
              n = (n<<4) + (**p-48);   /* (n<<4) = (n*16) */
           else
           if (**p <= 'F')
              n = (n<<4) + (**p-55);
           else
              n = (n<<4) + (**p-87);

           ++(*p);
        }

        return(n);
     }
  }  /* Hexadecimal */

  /* Miscellaneous escape codes */
  if (**p=='a' || **p=='A') {    /* A)udible bell      */
     ++(*p);
     return(7);
  }

  if (**p=='b' || **p=='B') {    /* B)ackspace         */
     ++(*p);
     return(8);
  }

  if (**p=='t' || **p=='T') {    /* T)ab               */
     ++(*p);
     return(9);
  }

  if (**p=='n' || **p=='N') {   /* N)ewline, linefeed */
     ++(*p);
     return(10);
  }

  if (**p=='v' || **p=='V') {   /* V)ertical tab      */
     ++(*p);
     return(11);
  }

  if (**p=='f' || **p=='F') {   /* F)ormfeed          */
     ++(*p);
     return(12);
  }

  if (**p=='r' || **p=='R') {   /* carriage R)eturn   */
     ++(*p);
     return(13);
  }

  if (**p=='s' || **p=='S') {  /* S)pace             */
     ++(*p);
     return(32);
  }

  if (**p=='\\') {             /* backslash          */
     ++(*p);
     return(92);
  }

  if (**p=='\000')             /* real end-of-string */
     return(-1);

  n=(int)**p;                  /* non-escape code    */
  ++(*p);
  return(n);
}  /* esch */


/* Append a character to a prefix string */
void pcappend(c, s)
 unchar c;
 MYSTR *s;
{
  if (s->len >= MAXMYSLEN)
     error ("tds: maximum string length exceeded");

  s -> s[s->len] = c;
  s -> len += 1;               /* Update length indicator */
  s -> i[(int)c] = TRUE;       /* Update intersection map */
}  /* pcappend */


void range2(s, d)
 unchar **s;
 MYSTR *d;
{
int c1, c2, n;

  if (**s=='[')
     ++(*s);

  while (**s != ']') {
        c1 = esch (s);                  /* Get the 1st character */

        if (c1 < 0)
           error ("tds: was expecting a character map in string2, e.g. [a-zr*5]");

        if (**s=='-') {
           ++(*s);

           c2 = esch (s);               /* Get the 2nd character */
           if (c2 < 0)
              error ("tds: was expecting end of character range after '-', e.g. [a-zZ-A], in string2");

           if (c1 < c2)                 /* Does the range ascend or descend? */
              while (c1 <= c2)          /* It ascends */
                    pcappend (c1++, d); /* Fill up the destination buffer */
           else
              while (c1 >= c2)          /* It descends */
                    pcappend (c1--, d); /* Fill up the destination buffer */
        } else
        if (**s=='*') {
           ++(*s);

           n=(-1);

           if (**s >= '0' && **s <= '9') {
              n = (**s-48);

              ++(*s);
 
              if (**s >= '0' && **s <= '9') {
                 n = n*10 + (**s-48);

                 ++(*s);

                 if (**s >= '0' && **s <= '9') {
                    n = n*10 + (**s-48);

                    ++(*s);
                 }
              }
           }

           if (n>256)
              error ("tds: character multiplier may not exceed 256, e.g. [a*256], in string2");

           if (n<0)
              n= 256 - (d -> len);

           while (n--)
                 (void)pcappend(c1, d);

        } else
        error ("tds: was expecting a '-' or '*' in character range, e.g. [a-zr*5], in string2");
  }  /* while */

  ++(*s);  /* Skip over closing ']' */
}  /* range2 */


void clr_mystr(s)
 MYSTR *s;
{
int i;

  s -> len = 0;

  i=256;
  do {
       --i;
       s -> i[i] = FALSE;
  } while (i);

}  /* clr_mystr */


void getopts(argc, argv, as1, as2)
 int argc;
 unchar *argv[];
 MYSTR *as1, *as2;
{
unchar *c;
int flag, i;

  flag = 0;

  for (i=1; i<argc; i++) {
      c=argv[i];

      if (*c == '-') 
         while (*c++) {
               if (*c == 'c')
                  invert=TRUE;
               else
               if (*c == 'd')
                  delete=TRUE;
               else
               if (*c == 's')
                  squeeze=TRUE;
          }  /* while */
      else
          if (flag==0) {
             while (*c)
                   if (*c == '[')
                      (void)range2 (&c, as1);
                   else
                      (void)pcappend(esch (&c), as1);

             ++flag;
          } else
             while (*c)
                   if (*c == '[')
                      (void)range2 (&c, as2);
                   else
                      (void)pcappend(esch (&c), as2);

  }  /* for */
}  /* getopts */


/* Complement prefixed string:  s = {0...255 character set} - {s.i[]} */
void complement(s)
 MYSTR *s;
{
int i=256;
int j=0;

  do {                            /* Invert the intersection map */
      --i;
      s -> i[i] = 1 - (s -> i[i]);
  } while (i);

  s -> len = 256 - (s -> len);    /* Update the length */

  for (i=0; i<256; i++)
      if (s -> i[i] == TRUE)
         s -> s[j++] = (char)i;   /* Make string reflect new map and length */
}  /* complement */


/* Set up translation table */
void setup_t(s1, s2)
 MYSTR *s1, *s2;
{
int i;
int len = s1->len;

   if (s2->len == 0)        /* Added for compatibility with tr */
      *s2 = *s1;
   else
   if (s1->len > s2->len)   /* Length of s1 > length of s2 ?   */
      len = s2->len;

   for (i=0; i<len; i++)    /* Make {t} = s/{s1}/{s2}/ */
       t[ (int)(s1->s[i]) ] = s2->s[i];
}  /* setup_t */


int main (argc, argv)
 int argc;
 unchar *argv[];
{
int ch, i, j, n;
int lastch=(-1);   /* Initialize to a number outside the 0-255 character set */
unchar tch;
MYSTR s1, s2;

  (void)clr_mystr(&s1);
  (void)clr_mystr(&s2);

  (void)getopts(argc, argv, &s1, &s2);


  if (invert)
     (void)complement(&s1);


  (void)setup_t(&s1, &s2);


  if (delete && squeeze) {
     do {
          n=fread(a, 1, SIZE, stdin);

          j=0;
          for (i=0; i<n; i++) {
              if (s1.i[ (int)a[i] ] == FALSE) {
                 ch=(int)a[i];    /* Present, untranslated input character */

                 if ( (ch != lastch) ||
                      (s2.i[ch] == FALSE) ) {
                    a[j++] = ch;
                    lastch = ch;
                 }  /* if */

             }  /* if */

          }

          (void)fwrite(a, 1, j, stdout);
     } while (n!=0);

     return(0);
  }  /* if (delete && squeeze) */


  if (delete) {
     do {
          n=fread(a, 1, SIZE, stdin);

          j=0;
          for (i=0; i<n; i++)
              if (s1.i[ (int)a[i] ] == FALSE)
                 a[j++] = a[i];

          (void)fwrite(a, 1, j, stdout);
     } while (n!=0);

     return(0);
  }  /* if (delete) */


  if (squeeze) {
     do {
          n=fread(a, 1, SIZE, stdin);

          j=0;
          for (i=0; i<n; i++) {
              ch=(int)a[i];       /* Present, untranslated input character */
              tch=t[ch];          /* Present, translated input character */

              if ( (tch != lastch) ||
                 (s2.i[tch] == FALSE) ) {
                 a[j++] = tch;
                 lastch = tch;
              }
          }  /* for */

          (void)fwrite(a, 1, j, stdout);
     } while (n!=0);

     return(0);
  }  /* if (squeeze) */


/* A straight-forward translation filter when there are no option flags: */
  do {
       n=fread(a, 1, SIZE, stdin);

       for (i=0; i<n; i++)
           a[i] = t[ (int)a[i] ];

       (void)fwrite(a, 1, n, stdout);
  } while (n!=0);
  return(0);
}  /* main */
