program REGRESS_FILE;
        uses MATRIXU,CRT;

     {by       Josh Mitteldorf
               6800 Scotforth Rd
               Philadelphia, PA 19119
               70441,1305 }


   {Performs a linear regression (best line fit) on an ASCII-formatted file.
    File should be organized in columns, with only whitespace between the
    columns.  You may specify at run time which column contains the dependent
    variable and which contains the independent variables.  Columns which
    are not specified as either are read in and skipped.  They may contain
    non-numeric data without causing an error.  However, non-numeric data in
    any variable cell will cause an entire row of numbers to be skipped over,
    with a message to the user.

    If a row is found with more or less than the specified number of columns,
    the program attempts to skip that row and move on.}

const Nmax=20;  NSQ=400;    cmax=50;  maxstr=80;

type float=double;
     vector=array[1..Nmax] of float;
     matrix=array[1..NSQ] of float;

const errval:float=9.87654321987654321E37;  ef=#26;   space=' ';   tab=#9;
      cr=#13;  lf=#10;   crlf:array[1..2] of char=(cr,lf);

var  f                                   :text;
     xxsum,xxavg,xxvar,xinv              :matrix;
     xavg,xyavg,xsum,xysum,x,
     xavgyavg,aw,num                     :vector;
     y,ysum,yysum,yavg,yyavg,norm,
     yvar,dy,dyf                         :float;
     n,d,count,t1,t2,t3,cols,ycol,nmiss  :integer;
     g                                   :file of float;
     xcol                                :array[1..Nmax] of integer;
     filename                            :string[maxstr];
     vertical                            :boolean;

procedure CumRegistersToZero;
          var i,j  :integer;
          begin
          ysum:=0; yysum:=0; n:=0;  nmiss:=0;
          for i:=1 to d do begin
              xsum[i]:=0;
              xysum[i]:=0;
              for j:=1 to d do xxsum[d*pred(i)+j]:=0;
              end;
          end;

procedure Accumulate;
          var k,i,j   :integer;
              xi      :float;
          begin
          inc(n);
         {$ifdef DIAGNOSTIC} write(n:4,'  '); {$endif}
          ysum:=ysum+y;    yysum:=yysum+sqr(y);
          for i:=1 to d do begin
                  xi:=x[i];
                  xsum[i]:=xsum[i]+xi;
                  xysum[i]:=xysum[i]+y*xi;
                  for j:=i to d do xxsum[d*pred(i)+j]:=xxsum[d*pred(i)+j]+xi*x[j];
                  end;                        {Upper half only, for speed}
              end;

procedure SumsToAverages;
          var i,j  :integer;

          begin
          if (n-d-1<=0) then begin
             writeln(crlf,'Not enough points for regression.');
             writeln('You have ',n,' lines of valid data and ',d,' regression variables.');
             halt; end;
          norm:=1/n;
          yavg:=ysum*norm;    yyavg:=yysum*norm;
            {$ifdef DIAGNOSTIC} write('XSUM: '); vecprint(xsum,d); {$endif}
          ScalarMultV(addr(xsum),norm,addr(xavg),d);
            {$ifdef DIAGNOSTIC} write('XAVG: '); vecprint(xavg,d); {$endif}
          ScalarMultV(addr(xavg),yavg,addr(xavgyavg),d);
          ScalarMultV(addr(xysum),norm,addr(xyavg),d);
          ScalarMult(addr(xxsum),norm,addr(xxavg),d);
   {$ifdef DIAGNOSTIC} write('XXAVG: '); for i:=1 to d do write('[',i,']=',xxavg[{pred(1)*d+} i]:11:6,'  '); writeln; {$endif}
          for i:=1 to d do for j:=succ(i) to d do xxavg[d*pred(j)+i]:=xxavg[d*pred(i)+j];
                                {Because we saved time and computed upper half before}
          yvar:=yyavg-sqr(yavg);
          end;

procedure ReportMissing;
          begin
          inc(nmiss);
          write(crlf,'Missing data in line ',n + nmiss,'.');
          end;

procedure Regression;
          var xavg2          :matrix;
              wv             :vector;
              i,j            :integer;
              wr             :float;
              c              :array[1..cmax] of float;
              ok,warned      :boolean;
              ch             :char;

   procedure DistributeAndCheckData;
             var i :integer;
             begin
             y:=c[ycol];
             ok:=(y<>errval);
             for i:=1 to d do begin
                 x[i]:=c[xcol[i]];
                 ok:=(ok and (x[i]<>errval));
                 end;
             repeat read(f,ch) until ((eof(f) or not (ch in [space,tab])));
             if ((ch<>cr) and (ch<>ef)) then begin
                write(crlf,'Warning!  Unexpected number of columns in line ',1+n+nmiss,'.');
                ok:=false;  warned:=true; inc(nmiss);
                end
             else warned:=false;
             repeat read(f,ch) until ((ch=lf) or (eof(f)));
             if (eof(f) and (x[1]=0) and (y=0)) then ok:=false;
                {A blank line at the end of the file reads in as a row of zeros.}
             end;


          begin {Regression}
          CumRegistersToZero;
          repeat
             i:=1;
             for i:=1 to cols do begin
{$I-}            read(f,c[i]);
                 if (ioresult>0) then c[i]:=errval;
                 { write(c[i]:16); }
{$I+}            end;
             DistributeAndCheckData;
{$ifdef DIAGNOSTIC}   write(y:8:5); for i:=1 to d do write(x[i]:8:5); writeln; {$endif}
             if (ok) then Accumulate else if (warned) or (eof(f) and (x[1]=0) and (y=0)) then else ReportMissing;
          until (ioresult>0) or (eof(f));
          SumsToAverages;
          Diadic(addr(xavg),addr(xavg),addr(xavg2),d);
          MatDif(addr(xxavg),addr(xavg2),addr(xxvar),d);

          VecDif(addr(xyavg),addr(xavgyavg),addr(num),d);
            {$ifdef DIAGNOSTIC}
             writeln('Y AVG: ',yavg:11);
             write(crlf,'XY AVG:'); for i:=1 to d do write(xyavg[i]:11,' ');
             write(crlf,'XAVYAV:'); for i:=1 to d do write(xavgyavg[i]:11,' ');
             write(crlf,'NUM: '); for i:=1 to d do write(num[i]:11,' ');
             write(crlf,'DEN: '); for i:=1 to d do begin
                 for j:=1 to d do write(xxvar[i,j]:11,' '); write(crlf,'     ');
                 end; {$endif}
          Solve(addr(xxvar),addr(num),addr(aw),d);
          Prod(addr(xxvar),addr(aw),addr(wv),d);  {wv:=(x)  a}
          dy:=sqrt(yvar - dot(addr(wv),addr(aw),d));   {y= (y) - a(x)}
          dyf:=dy/sqrt(n-d-1);
{          for i:=1 to d do write(dyf/sqrt(xxavg[d*pred(i)+i]-xavg2[d*pred(i)+i]):9:5); writeln;  (This is my own measure.)  }
          Inverse(addr(xxvar),addr(xinv),d);
          end;

procedure Report;
          var i    :integer;
          begin
          clrscr;
          write('INDEPENDENT: col ',ycol,'  DEPENDENT:'); for i:=1 to d do write(' ',xcol[i]);
          writeln(crlf,n,' point regression with ',d,' variables.  ',n-d-1,' degrees of freedom.');
          writeln('R = ',Dot(addr(aw),addr(num),d)/yvar:9:5);
          writeln('Y (RMS deviance from line) = ',dy:9:5,',');
          writeln('Compare (Y) = ',sqrt(yvar):9:5);
          if (vertical) then begin
            writeln(crlf,'   COL    COEFFICIENT      STD ERROR     STD DEVIATION    PROP. OF VARIANCE');
            writeln(     '--------  -----------      ---------     -------------    -----------------');
            writeln(     ' const  ',yavg-dot(addr(xavg),addr(aw),d):13:7,dy:15:7);
            for i:=1 to d do
              writeln(xcol[i]:5,aw[i]:16:7,dyf*sqrt(xinv[d*pred(i)+i]):15:7,sqrt(xxvar[pred(i)*d+i]):18:5,
                  xxvar[pred(i)*d+i]*sqr(aw[i])/yvar:21:5);
            end
          else begin
            writeln('B = ',yavg-dot(addr(xavg),addr(aw),d):9:5,'  (Std error=Y)');
            writeln('Coefficient vector A:');
            for i:=1 to d do write(aw[i]:9:5); writeln;
            writeln('Std errors of coefficients:');
            for i:=1 to d do write(dyf*sqrt(xinv[d*pred(i)+i]):9:5); writeln;
            writeln('Std Deviations (X):');
            for i:=1 to d do write(sqrt(xxvar[pred(i)*d+i]):9:5); writeln;
            writeln('Proportion of Y''s variance attributable to:');
            for i:=1 to d do write(xxvar[pred(i)*d+i]*sqr(aw[i])/yvar:9:5); writeln;
            end;
          end;


procedure Parse(xstr :string);
          var i,j,k,ta  :integer;
              substr    :string[80];
              v         :float;
          begin
          for i:=succ(length(xstr)) to maxstr do xstr[i]:=' ';
          i:=0; k:=1;
          while (i<length(xstr)) do begin
                substr:=''; j:=1;
                repeat inc(i) until (xstr[i] in ['0'..'9']);
                repeat substr[j]:=xstr[i]; inc(i); inc(j); until not (xstr[i] in ['0'..'9']);
                substr[0]:=char(pred(j));
                val(substr,v,ta);
                xcol[k]:=round(v); inc(k);
                end;
          d:=pred(k);
          write('There are ',d,' variables in cols ');
          for k:=1 to d do write(xcol[k],' ');
          end;

procedure Input;
          var ki  :char;
             xstr :string[80];
             i    :integer;
             ok   :boolean;
          begin
          clrscr;
          writeln('ASCII FILE -> Regression',crlf);
{$I-}     repeat
             write('What is the name of the file containing the data? ');
                readln(filename); assign(f,filename); reset(f);
          until (ioresult=0);
{$I+}     write('Total number of ');
           textattr:=112; write('columns'); textattr:=7;
           write(' in file: '); readln(cols);
          repeat
            write('Which column contains the ');
             textattr:=112; write('dependent'); textattr:=7;
             write(' variable? '); readln(ycol);
             ok:=((ycol<=cols) and (ycol>0));
          until ok;
          repeat
             write('Which columns contain the ');
              textattr:=112; write('independent'); textattr:=7;
              write(' variables? '); readln(xstr);
             Parse(xstr);
             ok:=true;
             for i:=1 to d do ok:=(ok and ((xcol[i]<=cols) and (xcol[i]>0)));
             if ok then begin
                write(crlf,'  Correct? '); readln(ki);
                end
             else begin ki:='N'; writeln; end;
          until upcase(ki) in ['Y','y'];
          textattr:=112; write('H'); textattr:=7;
          write('orizontal or ');
          textattr:=112; write('V'); textattr:=7;
          write('ertical output form? ');
          repeat ki:=upcase(readkey) until ki in ['H','V']; writeln(ki);
          if (ki='V') then vertical:=true else vertical:=false;
          {window(1,8,80,25); clrscr; window(1,1,80,25); gotoxy(1,8);}
          write('Computing...');
          end;

begin
Input;
Regression;
Report;
end.