/*
 * File......: LLSS.PRG
 * Author....: Berend M. Tober
 * CIS ID....: 70541,1030
 * Date......: $Date$
 * Revision..: $Revision$
 * Log file..: $Logfile$
 * 
 * This is an original work by Berend M. Tober and is placed in the
 * public domain.
 *
 * Modification history:
 * ---------------------
 * 1994/05/01 BMT Re-submission to NANFOR.LIB librarian.
 * 1994/04/21 BMT Employed objected oriented error trapping.  This module
 *                no longer QUITs execution on errors, but instead BREAKs,
 *                passing back to the calling module an error object.
 *                Eliminated _ftListCurrent().
 * 1994/04/19 BMT Repaired _ftListCurrent() output device call.
 * 1994/04/18 BMT Cleaned-up documentation.
 * 1994/04/18 BMT Submitted file to NANFOR.LIB librarian.
 *
 * $Log$
 *
 */


/*  $DOC$
 *  $FUNCNAME$
 *     FT_LLSS()
 *  $CATEGORY$
 *     Mathematical/statistical functions
 *  $ONELINER$
 *     Compute least-squared error linear regression statistics.
 *  $SYNTAX$
 *     FT_LLSS( <aX>, <aY> ) -> { nCorrelation, nSlope, nIntercept }
 *  $ARGUMENTS$
 *     <aX> array of data representing independent variable.
 *     <aY> array of data representing dependent variable.
 *  $RETURNS$
 *     Array containing correlation coefficient, slope, and
 *     y-intercept of best-fit line.
 *  $DESCRIPTION$
 *     These routines are useful for computing the best-fit line
 *     through a collection of data point pairs.  That is,
 *
 *       given {Xi, Yi} for i := 1,...,N
 *
 *       find m and b such that the line y := m*x + b is the "best
 *       fit" line, in the linear, least-squared error sense, and
 *       also compute r the correlation coefficient between the
 *       set {Xi} and {Yi}.
 *
 *     Two approaches are possible:
 *
 *     (1) Post-processing.
 *     This method is applicable to situations in which the entire
 *     set of data point pairs is available simultaneously, as in
 *     after a test has been performed or historical data has
 *     otherwise been gathered.  A single call to function FT_LLSS()
 *     with the data arrays will return the statistical results
 *     as an array { r, m, b } (cf. example #1).
 *
 *     (2) Dynamic updating.
 *     This method is applicable to real-time systems in which the
 *     the data set to be analyzed is not completely available
 *     at the start of the analysis.  Data points are accumulated
 *     using function FT_LLSSAccumPt() which updates static variables
 *     containing statistical parameters with each call.  The
 *     current solution is available after at least two data points
 *     have been accumulated (cf. example #2).
 *  $EXAMPLES$
 *     aStats := FT_LLSS( aX, aY )
 *  $SEEALSO$
 *  $INCLUDE$
 *  $END$
 */

//*************************************************************************
// File: LLSS.PRG -- Created by Berend M. Tober 1994/03/27
//*************************************************************************
// Routines for implementing linear, least-squares error solution to fit
// a straight line through an arbitrary number of (x,y) data points.

/*
 * Functions in this module
 * ------------------------
 * T_LLSS()                   Example applications
 * FT_LLSS( aX, aY )          Computes regression statistics for data set
 * FT_LLSSInit()              Initializes static variables to zero
 * _ftAvg(nN, nX, nS)         Accumulates number to running average
 * FT_LLSSAccumPt( nX, nY )   Accumulates data point (nX, nY) to static array
 * FT_LLSS_R()                Returns correlation coefficient
 * FT_LLSS_M()                Returns best-fit slope
 * FT_LLSS_B()                Returns best-fit y-intercept
 * FT_LLSS_N()                Returns number of data points
 * FT_LLSS_X()                Returns average of X's
 * FT_LLSS_Y()                Returns average of Y's
 * _ftAvg(nN, nX, nS)         Accumulates number to running average
 * _ftErrOnePoint( cSubprogram ) Raises error due to insufficient data points
 * _ftErrDivByZero( cSubprogram )Raises error due to zero denominator
 */
/*
   The elements of array aLLS are maintained on-the-fly, i.e., they
   are updated with each call to FT_LLSSAccumPt(), so that the regression
   statistics are available at any point in the data-accumulation
   phase after more than one data point has been accumulated.
 */
#define LS_NPTS   aLLS[1]  // Number of accumulated data point pairs
#define LS_AVGX   aLLS[2]  // Average of X's
#define LS_AVGY   aLLS[3]  // Average of Y's
#define LS_AVGXX  aLLS[4]  // Average of squared X's
#define LS_AVGYY  aLLS[5]  // Average of squared Y's
#define LS_AVGXY  aLLS[6]  // Average of cross product X*Y's

STATIC aLLS    := {}

#define EOL  CHR(10)+CHR(13)

#ifdef FT_TEST
PROCEDURE T_LLSS()            // Example applications
   LOCAL aX := {20,25,30,35,40,45,50,55,60}  // Sample independent data
   LOCAL aY := {22,21,20,23,19,18,16,14,11}  // Sample dependent data
   LOCAL i                                   // Local index for looping
   LOCAL oError

   CLEAR SCREEN

   * Example 1 - Shows direct implementation for the case when all data
   *             is available and ready for computation, such as in post-
   *             processing of test data or analysis of historical
   *             financial data, for instance.

   ? "EXAMPLE 1 - Post-processing data analysis"
   // List data
   ?
   ? "Data point number "
   AEVAL( aX, {|x,i| DISPOUT( STR(i, 4, 0)) })
   ? "Abscissa          "
   AEVAL( aX, {|x,i| DISPOUT( STR(x, 4, 0)) })
   ? "Ordinate          "
   AEVAL( aY, {|x,i| DISPOUT( STR(x, 4, 0)) })
   // List results
   ?
   ? "Correl/Slope/Y-intercept"
   ?
   AEVAL(FT_LLSS( aX, aY ),{|x| DISPOUT( STR(X, 7, 3)) })

   ? //Couple of blank lines
   ?

   * Example 2 - Shows point-by-point accumulation of data and prints
   *             out data and results as accumulation takes place, as
   *             might be desirable in real-time data processing or
   *             statistical process control applications, for instance.

   ? "EXAMPLE 2 - Dynamic data acquisition"
   ?

   FT_LLSSInit()              // Initializes (i.e., set to zero) statistics

   // Simple tabular output to show dynamic updating of statistics
   ?'  N   X   Y  AVGX   AVGY   AVGXX  AVGYY  AVGXY  Corr  Slope Y-interc'
   FOR i := 1 TO LEN(aX)
      /* In here you would have whatever code was required to produce
         a data point.  It could be an interface with some instrument
         taking measurements of a process, or a DJ News retrival routine
         for stock market prices, or maybe a statistical simulation of
         some dynamical equations.
      */
      FT_LLSSAccumPt(aX[i], aY[i])  // Update statistic registers
      ? STR(LS_NPTS     , 3, 0)
      ??STR(aX[i]       , 4, 0)
      ??STR(aY[i]       , 4, 0)
      ??STR(LS_AVGX     , 7, 2)
      ??STR(LS_AVGY     , 7, 2)
      ??STR(LS_AVGXX    , 7, 1)
      ??STR(LS_AVGYY    , 7, 1)
      ??STR(LS_AVGXY    , 7, 1)
      BEGIN SEQUENCE
      IF i>1                        // Must have more than one data point
         ??STR(FT_LLSS_R(), 7, 3)   // Output correlation coefficient
         ??STR(FT_LLSS_M(), 7, 3)   // Output slope of line
         ??STR(FT_LLSS_B(), 7, 3)   // Output y-intercept
      ENDIF
      RECOVER USING oError
         ? "Error in "+oError:operation+":"+oError:description
      END SEQUENCE
   NEXT

   QUIT
RETURN
#endif

*************************************************************************
FUNCTION FT_LLSS( aX, aY )
   // Computes regression statistics for data set
   LOCAL oError

   FT_LLSSInit()              // Initializes (i.e., set to zero) statistics

   IF LEN(aX) <> LEN(aY)
      oError            := ErrorNew()
      oError:args       := { aX, aY }
      oError:description:= "Array lengths unequal."
      oError:file       := "LLSS.PRG"
      oError:operation  := "FT_LLSS()"
      BREAK( oError )
   ENDIF

   AEVAL( aX, {|x,i| FT_LLSSAccumPt(aX[i], aY[i]) })
RETURN { FT_LLSS_R(), FT_LLSS_M(), FT_LLSS_B() }

*************************************************************************
FUNCTION FT_LLSSInit()   // Initializes static variables to zero
RETURN aLLS  := {0,0,0,0,0,0}

*************************************************************************
STATIC FUNCTION _ftAvg(nN, nX, nS)
   * Accumulates number to running average
   * Parameters:
   *    nN    - Number of this data point
   *    nX    - Data value to be incorporated
   *    nS    - Existing average (prior to accumulating nX)
   LOCAL nA := (nN-1)/nN   // All data is weighted equally
RETURN (nX/nN + nA*nS)

*************************************************************************
FUNCTION FT_LLSSAccumPt( nX, nY )
   //  Accumulates data point (nX, nY) to static array
   IF (VALTYPE(nX)='N' .OR. VALTYPE(nX)='D') .AND. ;
      (VALTYPE(nY)='N' .OR. VALTYPE(nY)='D')
      LS_NPTS++
      LS_AVGX  := _ftAvg( LS_NPTS,  nX  , LS_AVGX )
      LS_AVGY  := _ftAvg( LS_NPTS,  nY  , LS_AVGY )
      LS_AVGXX := _ftAvg( LS_NPTS, nX*nX, LS_AVGXX)
      LS_AVGYY := _ftAvg( LS_NPTS, nY*nY, LS_AVGYY)
      LS_AVGXY := _ftAvg( LS_NPTS, nX*nY, LS_AVGXY)
   ENDIF

RETURN LS_NPTS

*************************************************************************
FUNCTION FT_LLSS_R()
   // Returns correlation coefficient
   LOCAL oError
   LOCAL nDemX := LS_AVGXX-LS_AVGX*LS_AVGX   // First factor in denominator
   LOCAL nDemY := LS_AVGYY-LS_AVGY*LS_AVGY   // Second factor in denominator
   LOCAL nDem  := nDemX*nDemY
   LOCAL nNum  := LS_AVGXY-(LS_AVGX*LS_AVGY) // Numerator
   IF LS_NPTS == 0
      _ftErrOnePoint("FT_LLSS_R()")
   ELSEIF nDem == 0
      _ftErrDivByZero( "FT_LLSS_R()" )
   ENDIF
RETURN nNum/SQRT(nDem)

*************************************************************************
FUNCTION FT_LLSS_M()
   // Returns best-fit slope
   LOCAL oError
   LOCAL nNum := LS_AVGXY - LS_AVGX*LS_AVGY  // Numerator
   LOCAL nDem := LS_AVGXX - LS_AVGX*LS_AVGX  // Denominator
   IF LS_NPTS == 0
      _ftErrOnePoint("FT_LLSS_M()")
   ELSEIF nDem == 0
      _ftErrDivByZero( "FT_LLSS_M()" )
   ENDIF
RETURN nNum/nDem

*************************************************************************
FUNCTION FT_LLSS_B()
   IF LS_NPTS == 0
      _ftErrOnePoint("FT_LLSS_B()")
   ENDIF
RETURN (LS_AVGY-LS_AVGX*FT_LLSS_M())// Returns best-fit y-intercept

*************************************************************************
FUNCTION FT_LLSS_N()
RETURN (LS_NPTS)                    // Returns number of data points

*************************************************************************
FUNCTION FT_LLSS_X()
RETURN (LS_AVGX)                    // Returns average of X's

*************************************************************************
FUNCTION FT_LLSS_Y()
RETURN (LS_AVGY)                    // Returns average of Y's

*************************************************************************
STATIC FUNCTION _ftErrOnePoint( cSubprogram )
   // Raises error due to insufficient data points
   LOCAL oError
   oError            := ErrorNew()
   oError:args       := aLLS
   oError:description:= "Data set includes only one data point"
   oError:file       := "LLSS.PRG"
   oError:operation  := cSubprogram
   BREAK( oError )
RETURN NIL

*************************************************************************
STATIC FUNCTION _ftErrDivByZero( cSubprogram )
   // Raises error due to zero denominator in correlation or slope
   LOCAL oError
   oError            := ErrorNew()
   oError:args       := aLLS
   oError:description:= "Denominator is zero"
   oError:file       := "LLSS.PRG"
   oError:operation  := cSubprogram
   BREAK( oError )
RETURN NIL
