unit rnrchar;

{ 
  Character set support for rnr - handles conversion between
  IBM PC codepages and character sets used on USENET and mail.

  The implementation follows the MIME standard, defined in RFC 1521.
  However, it does not implement all of the required transfer-encodings
  and document types; thus it cannot claim to be MIME compatible.

  In order for the conversion to be put into effect, the option
  '--local-charset filename' must be used.

  By Henrik Storner (storner@osiris.ping.dk)
}

{$i rnr-def.pas}

interface

uses rnrglob, genericf, rnrio, rnrproc, rnrfile;

{ These need to be global, as they are written in headers }

var 
  postingsetname: string;    { charset used for outgoing posts }
  mailingsetname: string;    { charset used for outgoing mail  }
  mailxfername  : string;    { transfer-encoding for mail      }


procedure linetolocal(var s: string);
procedure localtoline(var s: string);
procedure setreadencoding(chsetheader: string; encheader: string);
procedure setsendencoding(chsetheader: string; encheader: string);
procedure loadcharsets(fn: string);


implementation

type
  charset_ptr = ^charset_rec;
  charset_rec = record
                  name: string;
                  cnvtable : string;
                  next : charset_ptr;
                end;

{ RFC 1521 transfer encodings }

  xferenc = (
    xfer_7bit,    {* 7 bit - no translation needed  *}
    xfer_8bit,    {* 8 bit - no translation needed  *}
    xfer_binary,  {* binary - no translation needed *}
    xfer_quoted,  {* Quoted printable               *}
    xfer_base64,  {* Base64                         *}
    xfer_user     {* Userdefined - don't touch!     *}
    );

var

{ These are only used locally }

  charsets      : charset_ptr;

{ For decoding news and mail; default translation if no MIME header }

  defaultset    : charset_ptr;

{ For decoding news and mail; set up for each article }

  decodingset   : string;  { Translation table for line->local         }
  decodingxfer  : xferenc; { Current transfer encoding                 }

{ For encoding news and mail; set up after message is edited         }
{ NOTE: This has to be setup after message is edited, as user may    }
{ choose to alter the charset/transfer-encoding headers.             }

  encodingset   : string;  { Translation table for local->line         }
  encodingxfer  : xferenc; { Transfer-encoding                         }

{ Characters passed uncoded when using quoted-printable }

  mailsafechars : set of char;



function hexdigitval(c: char): integer;

begin
  case c of
  '0'..'9': hexdigitval:= ord(c) - ord('0');
  'A'..'F': hexdigitval:= ord(c) - ord('A') + 10;
  'a'..'f': hexdigitval:= ord(c) - ord('a') + 10;
  else      hexdigitval:= -1;
  end;
end;

function hexstring(v: byte): string;

const
  hx : array[0..15] of char = '0123456789ABCDEF';
begin
  hexstring:= hx[v div 16] + hx[v mod 16];
end;


procedure transferencode(var s:string; encoding: xferenc);

{ Convert a line of data to the selected transfer-encoding }

var
  r: string;
  i: integer;
  lastbreak: integer;

begin
  case (encoding) of
   xfer_7bit,
   xfer_8bit,
   xfer_binary,
   xfer_user:
     { Don't change it }
     ;

   xfer_base64:
     { NOT SUPPORTED }
     ;

   xfer_quoted:
     begin
     r := '';
     lastbreak:= 0; { Indicates last position for soft line-break }

     for i:= 1 to length(s) do
       if ( (s[i] in mailsafechars) or
            ((s[i] in [#9, #32]) and (i < length(s))) ) then
         begin
           { These are not encoded: Plain ascii except '=', plus }
           { SPACE and TAB that are not last on the line.        }
           if ((length(r) - lastbreak) > 73) then
             begin
               { Need to insert soft linebreak }
               r := r + '=' + #10;
               lastbreak:= length(r);
             end;
           r := r + s[i];
         end
       else
         begin
           { These get encoded }
           if ((length(r) - lastbreak) > 71) then
             begin
               r := r + '=' + #10;
               lastbreak:= length(r);
             end;
           r := r + '=' + hexstring(ord(s[i]));
         end;

     s := r;
     end; { xfer_quoted }
  end;
end;


procedure transferdecode(var s:string; encoding: xferenc);

{ Convert a line of data from the transferencoding form to }
{ canonical form. This must be done BEFORE applying the    }
{ conversions necessary for the charset used.              }

var 
  r: string;
  i: integer;
  hexval: integer;

begin
  case (encoding) of
    xfer_7bit,   { 7-bit is OK.                                            }
    xfer_8bit,   { 8-bit is OK.                                            }
    xfer_binary, { Binary may not work, because of linelength limitations! }
    xfer_user:   { No guarantee that this will work.                       }
     { Don't do a thing here }
     ;

    xfer_base64: 
       if s<>'' then
         s:= '<base64 decoding not supported>';

    xfer_quoted:

{ Quoted-printable encoding. }

{ This encoding passes most ASCII characters unaltered.  Exception is     }
{ control characters and 8-bit characters, and the '=', which are encoded }
{ as <char> := "="hexval So decode by scanning for '=' and rebuilding     }
{ original This encoding may add linebreaks in the middle of an input     }
{ line, or may specify that no linebreak should occur at the end of an    }
{ inputline. We are unable to obey neither of these.                      }

       begin
         r := '';
         i := 1;
         while (i <= length(s)) do
           begin
           if (s[i] = '=') then
             begin
               inc(i);
               if (i < length(s)) then
                 begin
                   { Expect 2-digit hex code here.             }
                   { i<length(s) ==> we do have two characters }
                   hexval:= 16*hexdigitval(s[i]) + hexdigitval(s[i+1]);
                   if (hexval >= 0) then
                     r := r + chr(hexval)
                   else
                     { Coding error! Not valid hex digit. }
                     { Pass thru unaltered. Warn user ??  }
                     r := r + copy(s, i-1, 3);

                   inc(i,2);
                 end
               else  { i>=length(s) }
                 if (i > length(s)) then
                   begin
                     { '=' as last character ==> soft line break }
                     { Cannot handle this.                       }
                   end
                 else
                   begin
                     { Coding error - only one hex digit. }
                     { Pass thru unaltered. Warn user ??  }
                     r := r + '=' + s[i];
                     inc(i);
                   end;
               end
            else { not `=' }
              begin
                { Ordinary, unquoted character. Could check that  }
                { it is a legal character (ASCII), but why bother }
                r := r + s[i];
                inc(i);
              end;
          end;

          s := r;
       end; { quoted printable }

  end; { case }
end;



procedure detectcharset(var chsetheader: string; encheader: string;
                        var xlate: string; var xfer: xferenc;
                        defset: charset_ptr);
var 
  p: charset_ptr;
  found: boolean;

begin

{ Process the character set header (if any) and set up translation    }
{ tables for the linetolocal routine. Takes the contents of the       }
{ 'Content-type' and 'Content-transfer-encoding' header as parameter. }

{ First, detect the character set defined by the content-type }

  p := charsets;
  found:= false;
  chsetheader := lower(chsetheader);

  if (chsetheader <> '') then
    while ((p <> nil) and (not found)) do
      begin
        found := (pos(p^.name, chsetheader) <> 0);
        if (not found) then
          p:= p^.next;
      end;  

  if found then
    xlate:= p^.cnvtable
  else
    begin
      if (defset = nil) then
        xlate := ''
      else
        xlate:= defset^.cnvtable;
    end;


{ Next, determine the transfer-encoding }

  encheader := lower(encheader);
  if (pos('7bit', encheader) <> 0) then
    xfer := xfer_7bit
  else if (pos('8bit', encheader) <> 0) then
    xfer := xfer_8bit
  else if (pos('quoted-printable', encheader) <> 0) then
    xfer := xfer_quoted
  else if (pos('base64', encheader) <> 0) then
    xfer := xfer_base64
  else if (pos('binary', encheader) <> 0) then
    xfer := xfer_binary
  else if (encheader <> '') then
    xfer := xfer_user
  else
    xfer := xfer_7bit;  { Default if no transfer-encoding specified }

end;


procedure setreadencoding(chsetheader: string; encheader: string);

begin
  detectcharset(chsetheader, encheader, decodingset, decodingxfer, defaultset);
end;


procedure setsendencoding(chsetheader: string; encheader: string);

var
  i: integer;
  reverseset: string;

begin
  detectcharset(chsetheader, encheader, reverseset, encodingxfer, nil);

{ Have to reverse the translation, as this is for local->line }

  encodingset:= '';
  for i:= 1 to 255 do
    encodingset:= encodingset + chr(i);

{ If no header in message, then reverseset is empty }

  if (reverseset <> '') then  
    for i:= 1 to 255 do
      encodingset[ord(reverseset[i])] := chr(i);
end;



procedure loadcharsets;

type
  wanted_type = (name_wanted, trans_wanted);

var
  f: text;
  l: string;
  newset: charset_ptr;
  p: charset_ptr;
  w: wanted_type;
  token: string;
  i, err, v1, v2: integer;

begin
  w := name_wanted;

  safereset(f,fn);
  if (fileresult > 0) then
    begin
      xwritelnss('Cannot open characterset file ', fn);
      shutdown(1);
    end;

  while not eof(f) do
    begin
      readln(f, l);
      token:= chopfirstw(l);
      if (length(token) > 0) then
        begin
        if (token[1] <> '#') then
          begin
            case w of
             name_wanted: 
              begin
                new(newset);
                newset^.name:= token;
                newset^.cnvtable:= '';
                for i:= 1 to 255 do
                  newset^.cnvtable:= newset^.cnvtable + chr(i);
                newset^.next:= charsets;
                charsets:= newset;
                w := trans_wanted;

                notquietlnss('Charset loaded: ', newset^.name);

                repeat
                  token := lower(chopfirstw(l));

                  if (token = '/post') then
                    begin
                      postingsetname:= newset^.name;
                      notquietlns(' - used for outgoing posts');
                    end
                  else if (copy(token, 1, 5) = '/mail') then
                    begin
                      mailingsetname := newset^.name;
                      notquietlns(' - used for outgoing mail');

                      token:= copy(token, 7, 255);
                      if (token = '7bit') then
                        mailxfername := '7bit'
                      else if (token = '8bit') then
                        mailxfername := '8bit'
                      else if (token = 'quoted') then
                        mailxfername := 'quoted-printable'
                      else if (token = 'quoted-safe') then
                        begin

                           { Extra safe quoted-printable }
                           { Will survive EBCDIC gateways unscathed }
                           mailxfername := 'quoted-printable';
                           mailsafechars := ['A'..'Z', 'a'..'z', '0'..'9',
                                          #39..#41, #43..#47, #58, #63];

                           notquietlns(
              ' - (enabling EBCDIC safe quoted-printable encoding)');

                        end
                      else 
                        begin
                          xwritelnss('unknown mailset transferencoding: ',
                            token);
                          shutdown(1);
                        end; 

                      notquietlnss(' - mail transfer-encoding: ', mailxfername);
                    end
                  else if (token = '/default') then
                    begin
                      defaultset := newset;
                      notquietlns(' - used as default for reading posts');
                    end
                  else if (token <> '') then
                    begin
                      xwritelnss('unknown charset option:', token);
                      shutdown(1);
                    end;
                until (token = '');
              end;

             trans_wanted:
               begin
               if (lower(token) = 'end') then
                 w := name_wanted
               else
                 begin
                   i:= pos('=', token);
                   if ((i = 0) or (i = length(token))) then
                     begin
                       xwritelnss('Bad charset conversion', token);
                       shutdown(1);
                     end;

                   val(copy(token, 1, i-1), v1, err);
                   if (err = 0) then
                     val(copy(token, i+1, length(token)-i), v2, err);
                   if (err <> 0) then
                     begin
                       xwritelnss('Bad charset conversion: ', token);
                       shutdown(1);
                     end;
                   if ((not (v1 in [1..255]))  and (not (v2 in [1..255]))) then
                     begin
                       xwritelnss('Bad charset conversion, must be 1..255 : ', 
                                token);
                       shutdown(1);
                     end;
                     
                  newset^.cnvtable[v1] := chr(v2);
                end;
              end;

            end; { case }
          end; { not comment }
        end; { not blank line }

    end; { while not eof }

  close(f);
end;


procedure linetolocal;

var
  i: integer;

begin

  { First, process the transferencoding }
  transferdecode(s, decodingxfer);

  { Next, apply any charactersets specified }
  if (decodingset <> '') then
    for i:= 1 to length(s) do
      s[i]:= decodingset[ord(s[i])];
end;


procedure localtoline;

var
   i: integer;

begin
  { Apply character set encoding }
  if (encodingset <> '') then
    for i:= 1 to length(s) do
      s[i]:= encodingset[ord(s[i])];
   
{ Apply transfer-encoding }
  transferencode(s, encodingxfer);

end;


begin { Module init code }

  charsets       := nil;
  defaultset     := nil;

  postingsetname := 'us-ascii';
  mailingsetname := 'us-ascii';
  mailxfername   := '7bit';

  mailsafechars  := [#33..#60, #62..#126];

end. { Module ends }
