/* whetstone/
 *      Whetstone benchmark in C.  This program is a translation of the
 *	original Algol version in "A Synthetic Benchmark" by H.J. Curnow
 *      and B.A. Wichman in Computer Journal, Vol  19 #1, February 1976.
 *
 *	Used to test compiler optimization and floating point performance.
 *
 *	Compile by:		cc -O -s -o whet whet.c
 *	or:			cc -O -DPOUT -s -o whet whet.c
 *	if output is desired.
 */

// modified by l.g. kahn for windows NT to do parallel version. 
 
// #define ITERATIONS	10 /* 1 Million Whetstone instructions */
//#define ITERATIONS	100 /* 10 Million Whetstone instructions */

#include "math.h"
#include <stdlib.h>
#include <stdio.h>

/* processor benchmark using multiple threads */



#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
#include <io.h>
#include <time.h>
#include <process.h>
#include <string.h>
#include <direct.h>
#include <errno.h>

#include <fcntl.h>
#include <stdarg.h>
#include <malloc.h>

#define maxthreads 8

DWORD threadid[maxthreads];
HANDLE threadhandle[maxthreads];
DWORD ProcAff;
DWORD SysAff;
DWORD ThdAff;

DWORD thread_start_time[maxthreads];
DWORD thread_stop_time[maxthreads];
DWORD starttime, stoptime;
BOOLEAN setprocessoraffinity = FALSE;
BOOLEAN assignedpaffin[maxthreads] = {FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE };
DWORD paffinarray[maxthreads] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000 };


/*BOOL
WINAPI
GetProcessAffinityMask(
    HANDLE hProcess,
    LPDWORD lpProcessAffinityMask,
    LPDWORD lpSystemAffinityMask
    );

DWORD
WINAPI
SetThreadAffinityMask(
    HANDLE hThread,
    DWORD dwThreadAffinityMask
    );
*/
int noprocessors = 0;
int availprocessors = 0;

double 		e1[4], t, t1, t2;
int		i, j, k, l, n1, n2, n3, n4, n6, n7, n8, n9, n10, n11;

int ITERATIONS = 100; // = 10 million whetstones

int number_of_processors(affin)
DWORD affin;

{
   
  int numprocessors = 0;

  // return the number of processors based on the system affinity dword
  // each bit represents a processor
  // ie 00000011 = 3 = 2 processors 
  if (affin == 0x0001)
    numprocessors = 1;
  else if (affin == 0x0003)
	 numprocessors = 2;
  else if (affin == 0x0007)
     numprocessors = 3;
  else if (affin == 0x000F)
     numprocessors = 4;
  else if (affin == 0x001F)
     numprocessors = 5;
  else if (affin == 0x003F)
     numprocessors = 6;
  else if (affin == 0x007F)
     numprocessors = 7;
  else if (affin == 0x00FF)
     numprocessors = 8;
  else if (affin == 0x01FF)
     numprocessors = 9;
  else if (affin = 0x03FF)
     numprocessors == 10;
  else if (affin = 0x07FF)
     numprocessors = 11;
  else if (affin == 0x0FFF)
     numprocessors = 12;
  else if (affin == 0x1FFF)
     numprocessors = 13;
  else if (affin == 0x3FFF)
     numprocessors = 14;
  else if (affin == 0x7FFF)
     numprocessors = 15;
   else if (affin == 0xFFFF)
     numprocessors = 16;

else 

  {
    printf("ERROR: Illegal processor affinity mask = %x defaulting to 1 processor. \n",affin);
 	fflush(stdout);
	numprocessors = 1;
  }

  
return numprocessors;

}

void set_thread_to_processor(thnd,processorno,threadno)
HANDLE thnd;
int processorno;
int threadno;

{

 DWORD affinmask = 0;
 DWORD ThdAff = 0;


  // override if set thread individually
  if (assignedpaffin[threadno])
    {
	 printf("Setting thread number %d with Affinity mask %ld \n",threadno,paffinarray[threadno]);
	 fflush(stdout);
	 
     ThdAff = SetThreadAffinityMask( thnd,paffinarray[threadno]);
     if ( !ThdAff )
      printf( "SetThreadAffinityMask(%ld) failed\n\n",paffinarray[threadno] );
     else printf( "Previous thread affinity %x\n\n", ThdAff );
     fflush(stdout);
	}

   else

    {

  printf("Setting thread number %d to only run on processor %d \n",threadno,processorno);
  fflush(stdout);

  switch (processorno)
   	   {
	     case 1:
		  affinmask = (DWORD)1;
		  break;
		 
		 case 2:
		  affinmask = (DWORD)1 << 1;
		  break;
         
         case 3:
		  affinmask = (DWORD)1 << 2;
		  break;

         case 4:
		  affinmask = (DWORD)1 << 3;
		  break;

         case 5:
		  affinmask = (DWORD)1 << 4;
		  break;

	     case 6:
		  affinmask = (DWORD)1 << 5;
		  break;
		 
		 case 7:
		  affinmask = (DWORD)1 << 6;
		  break;
         
         case 8:
		  affinmask = (DWORD)1 << 7;
		  break;

  

		 default: printf("Illegal processor number %d in set thread to processor\n",processorno);
		          printf("Defaulting to processor 1 \n");
				  fflush(stdout);
				  affinmask = 1 << 1;


		}

	 // now set the thing

   ThdAff = SetThreadAffinityMask( thnd,affinmask);
if ( !ThdAff )
      printf( "SetThreadAffinityMask(%ld) failed\n\n",affinmask );
   else printf( "Previous thread affinity %x\n\n", ThdAff );
   fflush(stdout);
  }
 }	


#ifdef POUT
void pout(n, j, k, x1, x2, x3, x4)
int n, j, k;
double x1, x2, x3, x4;
{
	printf("%6d%6d%6d  %5e  %5e  %5e  %5e\n",
		n, j, k, x1, x2, x3, x4);
}
#endif



void pa_1(e)
double e[4];
{
	register int j;

	j = 0;
     lab:
	e[0] = (  e[0] + e[1] + e[2] - e[3] ) * t;
	e[1] = (  e[0] + e[1] - e[2] + e[3] ) * t;
	e[2] = (  e[0] - e[1] + e[2] + e[3] ) * t;
	e[3] = ( -e[0] + e[1] + e[2] + e[3] ) / t2;
	j += 1;
	if (j < 6)
		goto lab;
}


void pa_2(e)
double e[4];
{
	register int j;

	j = 0;
     lab:
	e[0] = (  e[0] + e[1] + e[2] - e[3] ) * t;
	e[1] = (  e[0] + e[1] - e[2] + e[3] ) * t;
	e[2] = (  e[0] - e[1] + e[2] + e[3] ) * t;
	e[3] = ( -e[0] + e[1] + e[2] + e[3] ) / t2;
	j += 1;
	if (j < 6)
		goto lab;
}

void pa_3(e)
double e[4];
{
	register int j;

	j = 0;
     lab:
	e[0] = (  e[0] + e[1] + e[2] - e[3] ) * t;
	e[1] = (  e[0] + e[1] - e[2] + e[3] ) * t;
	e[2] = (  e[0] - e[1] + e[2] + e[3] ) * t;
	e[3] = ( -e[0] + e[1] + e[2] + e[3] ) / t2;
	j += 1;
	if (j < 6)
		goto lab;
}

void pa_4(e)
double e[4];
{
	register int j;

	j = 0;
     lab:
	e[0] = (  e[0] + e[1] + e[2] - e[3] ) * t;
	e[1] = (  e[0] + e[1] - e[2] + e[3] ) * t;
	e[2] = (  e[0] - e[1] + e[2] + e[3] ) * t;
	e[3] = ( -e[0] + e[1] + e[2] + e[3] ) / t2;
	j += 1;
	if (j < 6)
		goto lab;
}

void pa_5(e)
double e[4];
{
	register int j;

	j = 0;
     lab:
	e[0] = (  e[0] + e[1] + e[2] - e[3] ) * t;
	e[1] = (  e[0] + e[1] - e[2] + e[3] ) * t;
	e[2] = (  e[0] - e[1] + e[2] + e[3] ) * t;
	e[3] = ( -e[0] + e[1] + e[2] + e[3] ) / t2;
	j += 1;
	if (j < 6)
		goto lab;
}

void pa_6(e)
double e[4];
{
	register int j;

	j = 0;
     lab:
	e[0] = (  e[0] + e[1] + e[2] - e[3] ) * t;
	e[1] = (  e[0] + e[1] - e[2] + e[3] ) * t;
	e[2] = (  e[0] - e[1] + e[2] + e[3] ) * t;
	e[3] = ( -e[0] + e[1] + e[2] + e[3] ) / t2;
	j += 1;
	if (j < 6)
		goto lab;
}

void pa_7(e)
double e[4];
{
	register int j;

	j = 0;
     lab:
	e[0] = (  e[0] + e[1] + e[2] - e[3] ) * t;
	e[1] = (  e[0] + e[1] - e[2] + e[3] ) * t;
	e[2] = (  e[0] - e[1] + e[2] + e[3] ) * t;
	e[3] = ( -e[0] + e[1] + e[2] + e[3] ) / t2;
	j += 1;
	if (j < 6)
		goto lab;
}

void pa_8(e)
double e[4];
{
	register int j;

	j = 0;
     lab:
	e[0] = (  e[0] + e[1] + e[2] - e[3] ) * t;
	e[1] = (  e[0] + e[1] - e[2] + e[3] ) * t;
	e[2] = (  e[0] - e[1] + e[2] + e[3] ) * t;
	e[3] = ( -e[0] + e[1] + e[2] + e[3] ) / t2;
	j += 1;
	if (j < 6)
		goto lab;
}


void p3_1(x, y, z)
double x, y, *z;
{
	x  = t * (x + y);
	y  = t * (x + y);
	*z = (x + y) /t2;
}

void p3_2(x, y, z)
double x, y, *z;
{
	x  = t * (x + y);
	y  = t * (x + y);
	*z = (x + y) /t2;
}

void p3_3(x, y, z)
double x, y, *z;
{
	x  = t * (x + y);
	y  = t * (x + y);
	*z = (x + y) /t2;
}

void p3_4(x, y, z)
double x, y, *z;
{
	x  = t * (x + y);
	y  = t * (x + y);
	*z = (x + y) /t2;
}

void p3_5(x, y, z)
double x, y, *z;
{
	x  = t * (x + y);
	y  = t * (x + y);
	*z = (x + y) /t2;
}

void p3_6(x, y, z)
double x, y, *z;
{
	x  = t * (x + y);
	y  = t * (x + y);
	*z = (x + y) /t2;
}

void p3_7(x, y, z)
double x, y, *z;
{
	x  = t * (x + y);
	y  = t * (x + y);
	*z = (x + y) /t2;
}

void p3_8(x, y, z)
double x, y, *z;
{
	x  = t * (x + y);
	y  = t * (x + y);
	*z = (x + y) /t2;
}


void p0_1()
{
	e1[j] = e1[k];
	e1[k] = e1[l];
	e1[l] = e1[j];
}

void p0_2()
{
	e1[j] = e1[k];
	e1[k] = e1[l];
	e1[l] = e1[j];
}

void p0_3()
{
	e1[j] = e1[k];
	e1[k] = e1[l];
	e1[l] = e1[j];
}

void p0_4()
{
	e1[j] = e1[k];
	e1[k] = e1[l];
	e1[l] = e1[j];
}

void p0_5()
{
	e1[j] = e1[k];
	e1[k] = e1[l];
	e1[l] = e1[j];
}

void p0_6()
{
	e1[j] = e1[k];
	e1[k] = e1[l];
	e1[l] = e1[j];
}

void p0_7()
{
	e1[j] = e1[k];
	e1[k] = e1[l];
	e1[l] = e1[j];
}

void p0_8()
{
	e1[j] = e1[k];
	e1[k] = e1[l];
	e1[l] = e1[j];
}

DWORD thread0()
{

double		x1, x2, x3, x4, x, y, z;

thread_start_time[0] = GetTickCount();

	/* initialize constants */

	t   =   0.499975;
	t1  =   0.50025;
	t2  =   2.0;

	/* set values of module weights */

	n1  =   0 * ITERATIONS;
	n2  =  12 * ITERATIONS;
	n3  =  14 * ITERATIONS;
	n4  = 345 * ITERATIONS;
	n6  = 210 * ITERATIONS;
	n7  =  32 * ITERATIONS;
	n8  = 899 * ITERATIONS;
	n9  = 616 * ITERATIONS;
	n10 =   0 * ITERATIONS;
	n11 =  93 * ITERATIONS;

/* MODULE 1:  simple identifiers */

	x1 =  1.0;
	x2 = x3 = x4 = -1.0;

	for(i = 1; i <= n1; i += 1) {
		x1 = ( x1 + x2 + x3 - x4 ) * t;
		x2 = ( x1 + x2 - x3 - x4 ) * t;
		x3 = ( x1 - x2 + x3 + x4 ) * t;
		x4 = (-x1 + x2 + x3 + x4 ) * t;
	}
#ifdef POUT
	pout(n1, n1, n1, x1, x2, x3, x4);
#endif


/* MODULE 2:  array elements */

	e1[0] =  1.0;
	e1[1] = e1[2] = e1[3] = -1.0;

	for (i = 1; i <= n2; i +=1) {
		e1[0] = ( e1[0] + e1[1] + e1[2] - e1[3] ) * t;
		e1[1] = ( e1[0] + e1[1] - e1[2] + e1[3] ) * t;
		e1[2] = ( e1[0] - e1[1] + e1[2] + e1[3] ) * t;
		e1[3] = (-e1[0] + e1[1] + e1[2] + e1[3] ) * t;
	}
#ifdef POUT
	pout(n2, n3, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 3:  array as parameter */

	for (i = 1; i <= n3; i += 1)
		pa_1(e1);
#ifdef POUT
	pout(n3, n2, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 4:  conditional jumps */

	j = 1;
	for (i = 1; i <= n4; i += 1) {
		if (j == 1)
			j = 2;
		else
			j = 3;

		if (j > 2)
			j = 0;
		else
			j = 1;

		if (j < 1 )
			j = 1;
		else
			j = 0;
	}
#ifdef POUT
	pout(n4, j, j, x1, x2, x3, x4);
#endif

/* MODULE 5:  omitted */

/* MODULE 6:  integer arithmetic */

	j = 1;
	k = 2;
	l = 3;

	for (i = 1; i <= n6; i += 1) {
		j = j * (k - j) * (l -k);
		k = l * k - (l - j) * k;
		l = (l - k) * (k + j);

		e1[l - 2] = j + k + l;		/* C arrays are zero based */
		e1[k - 2] = j * k * l;
	}
#ifdef POUT
	pout(n6, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 7:  trig. functions */

	x = y = 0.5;

	for(i = 1; i <= n7; i +=1) {
		x = t * atan(t2*sin(x)*cos(x)/(cos(x+y)+cos(x-y)-1.0));
		y = t * atan(t2*sin(y)*cos(y)/(cos(x+y)+cos(x-y)-1.0));
	}
#ifdef POUT
	pout(n7, j, k, x, x, y, y);
#endif

/* MODULE 8:  procedure calls */

	x = y = z = 1.0;

	for (i = 1; i <= n8; i +=1)
		p3_1(x, y, &z);
#ifdef POUT
	pout(n8, j, k, x, y, z, z);
#endif

/* MODULE9:  array references */

	j = 1;
	k = 2;
	l = 3;

	e1[0] = 1.0;
	e1[1] = 2.0;
	e1[2] = 3.0;

	for(i = 1; i <= n9; i += 1)
		p0_1();
#ifdef POUT
	pout(n9, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE10:  integer arithmetic */

	j = 2;
	k = 3;

	for(i = 1; i <= n10; i +=1) {
		j = j + k;
		k = j + k;
		j = k - j;
		k = k - j - j;
	}
#ifdef POUT
	pout(n10, j, k, x1, x2, x3, x4);
#endif

/* MODULE11:  standard functions */

	x = 0.75;
	for(i = 1; i <= n11; i +=1)
		x = sqrt( exp( log(x) / t1));

#ifdef POUT
	pout(n11, j, k, x, x, x, x);
#endif

// lgk modifications to record kips and start and times

thread_stop_time[0] = GetTickCount();

return 0;

}




DWORD thread1()
{

double		x1, x2, x3, x4, x, y, z;

	/* initialize constants */
thread_start_time[1] = GetTickCount();

	t   =   0.499975;
	t1  =   0.50025;
	t2  =   2.0;

	/* set values of module weights */

	n1  =   0 * ITERATIONS;
	n2  =  12 * ITERATIONS;
	n3  =  14 * ITERATIONS;
	n4  = 345 * ITERATIONS;
	n6  = 210 * ITERATIONS;
	n7  =  32 * ITERATIONS;
	n8  = 899 * ITERATIONS;
	n9  = 616 * ITERATIONS;
	n10 =   0 * ITERATIONS;
	n11 =  93 * ITERATIONS;

/* MODULE 1:  simple identifiers */

	x1 =  1.0;
	x2 = x3 = x4 = -1.0;

	for(i = 1; i <= n1; i += 1) {
		x1 = ( x1 + x2 + x3 - x4 ) * t;
		x2 = ( x1 + x2 - x3 - x4 ) * t;
		x3 = ( x1 - x2 + x3 + x4 ) * t;
		x4 = (-x1 + x2 + x3 + x4 ) * t;
	}
#ifdef POUT
	pout(n1, n1, n1, x1, x2, x3, x4);
#endif


/* MODULE 2:  array elements */

	e1[0] =  1.0;
	e1[1] = e1[2] = e1[3] = -1.0;

	for (i = 1; i <= n2; i +=1) {
		e1[0] = ( e1[0] + e1[1] + e1[2] - e1[3] ) * t;
		e1[1] = ( e1[0] + e1[1] - e1[2] + e1[3] ) * t;
		e1[2] = ( e1[0] - e1[1] + e1[2] + e1[3] ) * t;
		e1[3] = (-e1[0] + e1[1] + e1[2] + e1[3] ) * t;
	}
#ifdef POUT
	pout(n2, n3, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 3:  array as parameter */

	for (i = 1; i <= n3; i += 1)
		pa_2(e1);
#ifdef POUT
	pout(n3, n2, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 4:  conditional jumps */

	j = 1;
	for (i = 1; i <= n4; i += 1) {
		if (j == 1)
			j = 2;
		else
			j = 3;

		if (j > 2)
			j = 0;
		else
			j = 1;

		if (j < 1 )
			j = 1;
		else
			j = 0;
	}
#ifdef POUT
	pout(n4, j, j, x1, x2, x3, x4);
#endif

/* MODULE 5:  omitted */

/* MODULE 6:  integer arithmetic */

	j = 1;
	k = 2;
	l = 3;

	for (i = 1; i <= n6; i += 1) {
		j = j * (k - j) * (l -k);
		k = l * k - (l - j) * k;
		l = (l - k) * (k + j);

		e1[l - 2] = j + k + l;		/* C arrays are zero based */
		e1[k - 2] = j * k * l;
	}
#ifdef POUT
	pout(n6, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 7:  trig. functions */

	x = y = 0.5;

	for(i = 1; i <= n7; i +=1) {
		x = t * atan(t2*sin(x)*cos(x)/(cos(x+y)+cos(x-y)-1.0));
		y = t * atan(t2*sin(y)*cos(y)/(cos(x+y)+cos(x-y)-1.0));
	}
#ifdef POUT
	pout(n7, j, k, x, x, y, y);
#endif

/* MODULE 8:  procedure calls */

	x = y = z = 1.0;

	for (i = 1; i <= n8; i +=1)
		p3_2(x, y, &z);
#ifdef POUT
	pout(n8, j, k, x, y, z, z);
#endif

/* MODULE9:  array references */

	j = 1;
	k = 2;
	l = 3;

	e1[0] = 1.0;
	e1[1] = 2.0;
	e1[2] = 3.0;

	for(i = 1; i <= n9; i += 1)
		p0_2();
#ifdef POUT
	pout(n9, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE10:  integer arithmetic */

	j = 2;
	k = 3;

	for(i = 1; i <= n10; i +=1) {
		j = j + k;
		k = j + k;
		j = k - j;
		k = k - j - j;
	}
#ifdef POUT
	pout(n10, j, k, x1, x2, x3, x4);
#endif

/* MODULE11:  standard functions */

	x = 0.75;
	for(i = 1; i <= n11; i +=1)
		x = sqrt( exp( log(x) / t1));

#ifdef POUT
	pout(n11, j, k, x, x, x, x);
#endif

// lgk modifications to record kips and start and times

thread_stop_time[1] = GetTickCount();

return 0;

}


DWORD thread2()
{

double		x1, x2, x3, x4, x, y, z;

	/* initialize constants */
thread_start_time[2] = GetTickCount();

	t   =   0.499975;
	t1  =   0.50025;
	t2  =   2.0;

	/* set values of module weights */

	n1  =   0 * ITERATIONS;
	n2  =  12 * ITERATIONS;
	n3  =  14 * ITERATIONS;
	n4  = 345 * ITERATIONS;
	n6  = 210 * ITERATIONS;
	n7  =  32 * ITERATIONS;
	n8  = 899 * ITERATIONS;
	n9  = 616 * ITERATIONS;
	n10 =   0 * ITERATIONS;
	n11 =  93 * ITERATIONS;

/* MODULE 1:  simple identifiers */

	x1 =  1.0;
	x2 = x3 = x4 = -1.0;

	for(i = 1; i <= n1; i += 1) {
		x1 = ( x1 + x2 + x3 - x4 ) * t;
		x2 = ( x1 + x2 - x3 - x4 ) * t;
		x3 = ( x1 - x2 + x3 + x4 ) * t;
		x4 = (-x1 + x2 + x3 + x4 ) * t;
	}
#ifdef POUT
	pout(n1, n1, n1, x1, x2, x3, x4);
#endif


/* MODULE 2:  array elements */

	e1[0] =  1.0;
	e1[1] = e1[2] = e1[3] = -1.0;

	for (i = 1; i <= n2; i +=1) {
		e1[0] = ( e1[0] + e1[1] + e1[2] - e1[3] ) * t;
		e1[1] = ( e1[0] + e1[1] - e1[2] + e1[3] ) * t;
		e1[2] = ( e1[0] - e1[1] + e1[2] + e1[3] ) * t;
		e1[3] = (-e1[0] + e1[1] + e1[2] + e1[3] ) * t;
	}
#ifdef POUT
	pout(n2, n3, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 3:  array as parameter */

	for (i = 1; i <= n3; i += 1)
		pa_3(e1);
#ifdef POUT
	pout(n3, n2, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 4:  conditional jumps */

	j = 1;
	for (i = 1; i <= n4; i += 1) {
		if (j == 1)
			j = 2;
		else
			j = 3;

		if (j > 2)
			j = 0;
		else
			j = 1;

		if (j < 1 )
			j = 1;
		else
			j = 0;
	}
#ifdef POUT
	pout(n4, j, j, x1, x2, x3, x4);
#endif

/* MODULE 5:  omitted */

/* MODULE 6:  integer arithmetic */

	j = 1;
	k = 2;
	l = 3;

	for (i = 1; i <= n6; i += 1) {
		j = j * (k - j) * (l -k);
		k = l * k - (l - j) * k;
		l = (l - k) * (k + j);

		e1[l - 2] = j + k + l;		/* C arrays are zero based */
		e1[k - 2] = j * k * l;
	}
#ifdef POUT
	pout(n6, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 7:  trig. functions */

	x = y = 0.5;

	for(i = 1; i <= n7; i +=1) {
		x = t * atan(t2*sin(x)*cos(x)/(cos(x+y)+cos(x-y)-1.0));
		y = t * atan(t2*sin(y)*cos(y)/(cos(x+y)+cos(x-y)-1.0));
	}
#ifdef POUT
	pout(n7, j, k, x, x, y, y);
#endif

/* MODULE 8:  procedure calls */

	x = y = z = 1.0;

	for (i = 1; i <= n8; i +=1)
		p3_3(x, y, &z);
#ifdef POUT
	pout(n8, j, k, x, y, z, z);
#endif

/* MODULE9:  array references */

	j = 1;
	k = 2;
	l = 3;

	e1[0] = 1.0;
	e1[1] = 2.0;
	e1[2] = 3.0;

	for(i = 1; i <= n9; i += 1)
		p0_3();
#ifdef POUT
	pout(n9, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE10:  integer arithmetic */

	j = 2;
	k = 3;

	for(i = 1; i <= n10; i +=1) {
		j = j + k;
		k = j + k;
		j = k - j;
		k = k - j - j;
	}
#ifdef POUT
	pout(n10, j, k, x1, x2, x3, x4);
#endif

/* MODULE11:  standard functions */

	x = 0.75;
	for(i = 1; i <= n11; i +=1)
		x = sqrt( exp( log(x) / t1));

#ifdef POUT
	pout(n11, j, k, x, x, x, x);
#endif

// lgk modifications to record kips and start and times

thread_stop_time[2] = GetTickCount();

return 0;

}


DWORD thread3()
{

double		x1, x2, x3, x4, x, y, z;

	/* initialize constants */
thread_start_time[3] = GetTickCount();

	t   =   0.499975;
	t1  =   0.50025;
	t2  =   2.0;

	/* set values of module weights */

	n1  =   0 * ITERATIONS;
	n2  =  12 * ITERATIONS;
	n3  =  14 * ITERATIONS;
	n4  = 345 * ITERATIONS;
	n6  = 210 * ITERATIONS;
	n7  =  32 * ITERATIONS;
	n8  = 899 * ITERATIONS;
	n9  = 616 * ITERATIONS;
	n10 =   0 * ITERATIONS;
	n11 =  93 * ITERATIONS;

/* MODULE 1:  simple identifiers */

	x1 =  1.0;
	x2 = x3 = x4 = -1.0;

	for(i = 1; i <= n1; i += 1) {
		x1 = ( x1 + x2 + x3 - x4 ) * t;
		x2 = ( x1 + x2 - x3 - x4 ) * t;
		x3 = ( x1 - x2 + x3 + x4 ) * t;
		x4 = (-x1 + x2 + x3 + x4 ) * t;
	}
#ifdef POUT
	pout(n1, n1, n1, x1, x2, x3, x4);
#endif


/* MODULE 2:  array elements */

	e1[0] =  1.0;
	e1[1] = e1[2] = e1[3] = -1.0;

	for (i = 1; i <= n2; i +=1) {
		e1[0] = ( e1[0] + e1[1] + e1[2] - e1[3] ) * t;
		e1[1] = ( e1[0] + e1[1] - e1[2] + e1[3] ) * t;
		e1[2] = ( e1[0] - e1[1] + e1[2] + e1[3] ) * t;
		e1[3] = (-e1[0] + e1[1] + e1[2] + e1[3] ) * t;
	}
#ifdef POUT
	pout(n2, n3, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 3:  array as parameter */

	for (i = 1; i <= n3; i += 1)
		pa_4(e1);
#ifdef POUT
	pout(n3, n2, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 4:  conditional jumps */

	j = 1;
	for (i = 1; i <= n4; i += 1) {
		if (j == 1)
			j = 2;
		else
			j = 3;

		if (j > 2)
			j = 0;
		else
			j = 1;

		if (j < 1 )
			j = 1;
		else
			j = 0;
	}
#ifdef POUT
	pout(n4, j, j, x1, x2, x3, x4);
#endif

/* MODULE 5:  omitted */

/* MODULE 6:  integer arithmetic */

	j = 1;
	k = 2;
	l = 3;

	for (i = 1; i <= n6; i += 1) {
		j = j * (k - j) * (l -k);
		k = l * k - (l - j) * k;
		l = (l - k) * (k + j);

		e1[l - 2] = j + k + l;		/* C arrays are zero based */
		e1[k - 2] = j * k * l;
	}
#ifdef POUT
	pout(n6, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 7:  trig. functions */

	x = y = 0.5;

	for(i = 1; i <= n7; i +=1) {
		x = t * atan(t2*sin(x)*cos(x)/(cos(x+y)+cos(x-y)-1.0));
		y = t * atan(t2*sin(y)*cos(y)/(cos(x+y)+cos(x-y)-1.0));
	}
#ifdef POUT
	pout(n7, j, k, x, x, y, y);
#endif

/* MODULE 8:  procedure calls */

	x = y = z = 1.0;

	for (i = 1; i <= n8; i +=1)
		p3_4(x, y, &z);
#ifdef POUT
	pout(n8, j, k, x, y, z, z);
#endif

/* MODULE9:  array references */

	j = 1;
	k = 2;
	l = 3;

	e1[0] = 1.0;
	e1[1] = 2.0;
	e1[2] = 3.0;

	for(i = 1; i <= n9; i += 1)
		p0_4();
#ifdef POUT
	pout(n9, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE10:  integer arithmetic */

	j = 2;
	k = 3;

	for(i = 1; i <= n10; i +=1) {
		j = j + k;
		k = j + k;
		j = k - j;
		k = k - j - j;
	}
#ifdef POUT
	pout(n10, j, k, x1, x2, x3, x4);
#endif

/* MODULE11:  standard functions */

	x = 0.75;
	for(i = 1; i <= n11; i +=1)
		x = sqrt( exp( log(x) / t1));

#ifdef POUT
	pout(n11, j, k, x, x, x, x);
#endif

// lgk modifications to record kips and start and times

thread_stop_time[3] = GetTickCount();

return 0;

}


DWORD thread4()
{

double		x1, x2, x3, x4, x, y, z;

	/* initialize constants */
thread_start_time[4] = GetTickCount();

	t   =   0.499975;
	t1  =   0.50025;
	t2  =   2.0;

	/* set values of module weights */

	n1  =   0 * ITERATIONS;
	n2  =  12 * ITERATIONS;
	n3  =  14 * ITERATIONS;
	n4  = 345 * ITERATIONS;
	n6  = 210 * ITERATIONS;
	n7  =  32 * ITERATIONS;
	n8  = 899 * ITERATIONS;
	n9  = 616 * ITERATIONS;
	n10 =   0 * ITERATIONS;
	n11 =  93 * ITERATIONS;

/* MODULE 1:  simple identifiers */

	x1 =  1.0;
	x2 = x3 = x4 = -1.0;

	for(i = 1; i <= n1; i += 1) {
		x1 = ( x1 + x2 + x3 - x4 ) * t;
		x2 = ( x1 + x2 - x3 - x4 ) * t;
		x3 = ( x1 - x2 + x3 + x4 ) * t;
		x4 = (-x1 + x2 + x3 + x4 ) * t;
	}
#ifdef POUT
	pout(n1, n1, n1, x1, x2, x3, x4);
#endif


/* MODULE 2:  array elements */

	e1[0] =  1.0;
	e1[1] = e1[2] = e1[3] = -1.0;

	for (i = 1; i <= n2; i +=1) {
		e1[0] = ( e1[0] + e1[1] + e1[2] - e1[3] ) * t;
		e1[1] = ( e1[0] + e1[1] - e1[2] + e1[3] ) * t;
		e1[2] = ( e1[0] - e1[1] + e1[2] + e1[3] ) * t;
		e1[3] = (-e1[0] + e1[1] + e1[2] + e1[3] ) * t;
	}
#ifdef POUT
	pout(n2, n3, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 3:  array as parameter */

	for (i = 1; i <= n3; i += 1)
		pa_5(e1);
#ifdef POUT
	pout(n3, n2, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 4:  conditional jumps */

	j = 1;
	for (i = 1; i <= n4; i += 1) {
		if (j == 1)
			j = 2;
		else
			j = 3;

		if (j > 2)
			j = 0;
		else
			j = 1;

		if (j < 1 )
			j = 1;
		else
			j = 0;
	}
#ifdef POUT
	pout(n4, j, j, x1, x2, x3, x4);
#endif

/* MODULE 5:  omitted */

/* MODULE 6:  integer arithmetic */

	j = 1;
	k = 2;
	l = 3;

	for (i = 1; i <= n6; i += 1) {
		j = j * (k - j) * (l -k);
		k = l * k - (l - j) * k;
		l = (l - k) * (k + j);

		e1[l - 2] = j + k + l;		/* C arrays are zero based */
		e1[k - 2] = j * k * l;
	}
#ifdef POUT
	pout(n6, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 7:  trig. functions */

	x = y = 0.5;

	for(i = 1; i <= n7; i +=1) {
		x = t * atan(t2*sin(x)*cos(x)/(cos(x+y)+cos(x-y)-1.0));
		y = t * atan(t2*sin(y)*cos(y)/(cos(x+y)+cos(x-y)-1.0));
	}
#ifdef POUT
	pout(n7, j, k, x, x, y, y);
#endif

/* MODULE 8:  procedure calls */

	x = y = z = 1.0;

	for (i = 1; i <= n8; i +=1)
		p3_5(x, y, &z);
#ifdef POUT
	pout(n8, j, k, x, y, z, z);
#endif

/* MODULE9:  array references */

	j = 1;
	k = 2;
	l = 3;

	e1[0] = 1.0;
	e1[1] = 2.0;
	e1[2] = 3.0;

	for(i = 1; i <= n9; i += 1)
		p0_5();
#ifdef POUT
	pout(n9, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE10:  integer arithmetic */

	j = 2;
	k = 3;

	for(i = 1; i <= n10; i +=1) {
		j = j + k;
		k = j + k;
		j = k - j;
		k = k - j - j;
	}
#ifdef POUT
	pout(n10, j, k, x1, x2, x3, x4);
#endif

/* MODULE11:  standard functions */

	x = 0.75;
	for(i = 1; i <= n11; i +=1)
		x = sqrt( exp( log(x) / t1));

#ifdef POUT
	pout(n11, j, k, x, x, x, x);
#endif

// lgk modifications to record kips and start and times

thread_stop_time[4] = GetTickCount();

return 0;

}


DWORD thread5()
{

double		x1, x2, x3, x4, x, y, z;

	/* initialize constants */
thread_start_time[5] = GetTickCount();

	t   =   0.499975;
	t1  =   0.50025;
	t2  =   2.0;

	/* set values of module weights */

	n1  =   0 * ITERATIONS;
	n2  =  12 * ITERATIONS;
	n3  =  14 * ITERATIONS;
	n4  = 345 * ITERATIONS;
	n6  = 210 * ITERATIONS;
	n7  =  32 * ITERATIONS;
	n8  = 899 * ITERATIONS;
	n9  = 616 * ITERATIONS;
	n10 =   0 * ITERATIONS;
	n11 =  93 * ITERATIONS;

/* MODULE 1:  simple identifiers */

	x1 =  1.0;
	x2 = x3 = x4 = -1.0;

	for(i = 1; i <= n1; i += 1) {
		x1 = ( x1 + x2 + x3 - x4 ) * t;
		x2 = ( x1 + x2 - x3 - x4 ) * t;
		x3 = ( x1 - x2 + x3 + x4 ) * t;
		x4 = (-x1 + x2 + x3 + x4 ) * t;
	}
#ifdef POUT
	pout(n1, n1, n1, x1, x2, x3, x4);
#endif


/* MODULE 2:  array elements */

	e1[0] =  1.0;
	e1[1] = e1[2] = e1[3] = -1.0;

	for (i = 1; i <= n2; i +=1) {
		e1[0] = ( e1[0] + e1[1] + e1[2] - e1[3] ) * t;
		e1[1] = ( e1[0] + e1[1] - e1[2] + e1[3] ) * t;
		e1[2] = ( e1[0] - e1[1] + e1[2] + e1[3] ) * t;
		e1[3] = (-e1[0] + e1[1] + e1[2] + e1[3] ) * t;
	}
#ifdef POUT
	pout(n2, n3, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 3:  array as parameter */

	for (i = 1; i <= n3; i += 1)
		pa_6(e1);
#ifdef POUT
	pout(n3, n2, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 4:  conditional jumps */

	j = 1;
	for (i = 1; i <= n4; i += 1) {
		if (j == 1)
			j = 2;
		else
			j = 3;

		if (j > 2)
			j = 0;
		else
			j = 1;

		if (j < 1 )
			j = 1;
		else
			j = 0;
	}
#ifdef POUT
	pout(n4, j, j, x1, x2, x3, x4);
#endif

/* MODULE 5:  omitted */

/* MODULE 6:  integer arithmetic */

	j = 1;
	k = 2;
	l = 3;

	for (i = 1; i <= n6; i += 1) {
		j = j * (k - j) * (l -k);
		k = l * k - (l - j) * k;
		l = (l - k) * (k + j);

		e1[l - 2] = j + k + l;		/* C arrays are zero based */
		e1[k - 2] = j * k * l;
	}
#ifdef POUT
	pout(n6, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 7:  trig. functions */

	x = y = 0.5;

	for(i = 1; i <= n7; i +=1) {
		x = t * atan(t2*sin(x)*cos(x)/(cos(x+y)+cos(x-y)-1.0));
		y = t * atan(t2*sin(y)*cos(y)/(cos(x+y)+cos(x-y)-1.0));
	}
#ifdef POUT
	pout(n7, j, k, x, x, y, y);
#endif

/* MODULE 8:  procedure calls */

	x = y = z = 1.0;

	for (i = 1; i <= n8; i +=1)
		p3_6(x, y, &z);
#ifdef POUT
	pout(n8, j, k, x, y, z, z);
#endif

/* MODULE9:  array references */

	j = 1;
	k = 2;
	l = 3;

	e1[0] = 1.0;
	e1[1] = 2.0;
	e1[2] = 3.0;

	for(i = 1; i <= n9; i += 1)
		p0_6();
#ifdef POUT
	pout(n9, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE10:  integer arithmetic */

	j = 2;
	k = 3;

	for(i = 1; i <= n10; i +=1) {
		j = j + k;
		k = j + k;
		j = k - j;
		k = k - j - j;
	}
#ifdef POUT
	pout(n10, j, k, x1, x2, x3, x4);
#endif

/* MODULE11:  standard functions */

	x = 0.75;
	for(i = 1; i <= n11; i +=1)
		x = sqrt( exp( log(x) / t1));

#ifdef POUT
	pout(n11, j, k, x, x, x, x);
#endif

// lgk modifications to record kips and start and times

thread_stop_time[5] = GetTickCount();

return 0;

}


DWORD thread6()
{

double		x1, x2, x3, x4, x, y, z;

	/* initialize constants */
thread_start_time[6] = GetTickCount();

	t   =   0.499975;
	t1  =   0.50025;
	t2  =   2.0;

	/* set values of module weights */

	n1  =   0 * ITERATIONS;
	n2  =  12 * ITERATIONS;
	n3  =  14 * ITERATIONS;
	n4  = 345 * ITERATIONS;
	n6  = 210 * ITERATIONS;
	n7  =  32 * ITERATIONS;
	n8  = 899 * ITERATIONS;
	n9  = 616 * ITERATIONS;
	n10 =   0 * ITERATIONS;
	n11 =  93 * ITERATIONS;

/* MODULE 1:  simple identifiers */

	x1 =  1.0;
	x2 = x3 = x4 = -1.0;

	for(i = 1; i <= n1; i += 1) {
		x1 = ( x1 + x2 + x3 - x4 ) * t;
		x2 = ( x1 + x2 - x3 - x4 ) * t;
		x3 = ( x1 - x2 + x3 + x4 ) * t;
		x4 = (-x1 + x2 + x3 + x4 ) * t;
	}
#ifdef POUT
	pout(n1, n1, n1, x1, x2, x3, x4);
#endif


/* MODULE 2:  array elements */

	e1[0] =  1.0;
	e1[1] = e1[2] = e1[3] = -1.0;

	for (i = 1; i <= n2; i +=1) {
		e1[0] = ( e1[0] + e1[1] + e1[2] - e1[3] ) * t;
		e1[1] = ( e1[0] + e1[1] - e1[2] + e1[3] ) * t;
		e1[2] = ( e1[0] - e1[1] + e1[2] + e1[3] ) * t;
		e1[3] = (-e1[0] + e1[1] + e1[2] + e1[3] ) * t;
	}
#ifdef POUT
	pout(n2, n3, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 3:  array as parameter */

	for (i = 1; i <= n3; i += 1)
		pa_7(e1);
#ifdef POUT
	pout(n3, n2, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 4:  conditional jumps */

	j = 1;
	for (i = 1; i <= n4; i += 1) {
		if (j == 1)
			j = 2;
		else
			j = 3;

		if (j > 2)
			j = 0;
		else
			j = 1;

		if (j < 1 )
			j = 1;
		else
			j = 0;
	}
#ifdef POUT
	pout(n4, j, j, x1, x2, x3, x4);
#endif

/* MODULE 5:  omitted */

/* MODULE 6:  integer arithmetic */

	j = 1;
	k = 2;
	l = 3;

	for (i = 1; i <= n6; i += 1) {
		j = j * (k - j) * (l -k);
		k = l * k - (l - j) * k;
		l = (l - k) * (k + j);

		e1[l - 2] = j + k + l;		/* C arrays are zero based */
		e1[k - 2] = j * k * l;
	}
#ifdef POUT
	pout(n6, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 7:  trig. functions */

	x = y = 0.5;

	for(i = 1; i <= n7; i +=1) {
		x = t * atan(t2*sin(x)*cos(x)/(cos(x+y)+cos(x-y)-1.0));
		y = t * atan(t2*sin(y)*cos(y)/(cos(x+y)+cos(x-y)-1.0));
	}
#ifdef POUT
	pout(n7, j, k, x, x, y, y);
#endif

/* MODULE 8:  procedure calls */

	x = y = z = 1.0;

	for (i = 1; i <= n8; i +=1)
		p3_7(x, y, &z);
#ifdef POUT
	pout(n8, j, k, x, y, z, z);
#endif

/* MODULE9:  array references */

	j = 1;
	k = 2;
	l = 3;

	e1[0] = 1.0;
	e1[1] = 2.0;
	e1[2] = 3.0;

	for(i = 1; i <= n9; i += 1)
		p0_7();
#ifdef POUT
	pout(n9, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE10:  integer arithmetic */

	j = 2;
	k = 3;

	for(i = 1; i <= n10; i +=1) {
		j = j + k;
		k = j + k;
		j = k - j;
		k = k - j - j;
	}
#ifdef POUT
	pout(n10, j, k, x1, x2, x3, x4);
#endif

/* MODULE11:  standard functions */

	x = 0.75;
	for(i = 1; i <= n11; i +=1)
		x = sqrt( exp( log(x) / t1));

#ifdef POUT
	pout(n11, j, k, x, x, x, x);
#endif

// lgk modifications to record kips and start and times

thread_stop_time[6] = GetTickCount();

return 0;

}


DWORD thread7()
{

double		x1, x2, x3, x4, x, y, z;

	/* initialize constants */
thread_start_time[7] = GetTickCount();

	t   =   0.499975;
	t1  =   0.50025;
	t2  =   2.0;

	/* set values of module weights */

	n1  =   0 * ITERATIONS;
	n2  =  12 * ITERATIONS;
	n3  =  14 * ITERATIONS;
	n4  = 345 * ITERATIONS;
	n6  = 210 * ITERATIONS;
	n7  =  32 * ITERATIONS;
	n8  = 899 * ITERATIONS;
	n9  = 616 * ITERATIONS;
	n10 =   0 * ITERATIONS;
	n11 =  93 * ITERATIONS;

/* MODULE 1:  simple identifiers */

	x1 =  1.0;
	x2 = x3 = x4 = -1.0;

	for(i = 1; i <= n1; i += 1) {
		x1 = ( x1 + x2 + x3 - x4 ) * t;
		x2 = ( x1 + x2 - x3 - x4 ) * t;
		x3 = ( x1 - x2 + x3 + x4 ) * t;
		x4 = (-x1 + x2 + x3 + x4 ) * t;
	}
#ifdef POUT
	pout(n1, n1, n1, x1, x2, x3, x4);
#endif


/* MODULE 2:  array elements */

	e1[0] =  1.0;
	e1[1] = e1[2] = e1[3] = -1.0;

	for (i = 1; i <= n2; i +=1) {
		e1[0] = ( e1[0] + e1[1] + e1[2] - e1[3] ) * t;
		e1[1] = ( e1[0] + e1[1] - e1[2] + e1[3] ) * t;
		e1[2] = ( e1[0] - e1[1] + e1[2] + e1[3] ) * t;
		e1[3] = (-e1[0] + e1[1] + e1[2] + e1[3] ) * t;
	}
#ifdef POUT
	pout(n2, n3, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 3:  array as parameter */

	for (i = 1; i <= n3; i += 1)
		pa_8(e1);
#ifdef POUT
	pout(n3, n2, n2, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 4:  conditional jumps */

	j = 1;
	for (i = 1; i <= n4; i += 1) {
		if (j == 1)
			j = 2;
		else
			j = 3;

		if (j > 2)
			j = 0;
		else
			j = 1;

		if (j < 1 )
			j = 1;
		else
			j = 0;
	}
#ifdef POUT
	pout(n4, j, j, x1, x2, x3, x4);
#endif

/* MODULE 5:  omitted */

/* MODULE 6:  integer arithmetic */

	j = 1;
	k = 2;
	l = 3;

	for (i = 1; i <= n6; i += 1) {
		j = j * (k - j) * (l -k);
		k = l * k - (l - j) * k;
		l = (l - k) * (k + j);

		e1[l - 2] = j + k + l;		/* C arrays are zero based */
		e1[k - 2] = j * k * l;
	}
#ifdef POUT
	pout(n6, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE 7:  trig. functions */

	x = y = 0.5;

	for(i = 1; i <= n7; i +=1) {
		x = t * atan(t2*sin(x)*cos(x)/(cos(x+y)+cos(x-y)-1.0));
		y = t * atan(t2*sin(y)*cos(y)/(cos(x+y)+cos(x-y)-1.0));
	}
#ifdef POUT
	pout(n7, j, k, x, x, y, y);
#endif

/* MODULE 8:  procedure calls */

	x = y = z = 1.0;

	for (i = 1; i <= n8; i +=1)
		p3_8(x, y, &z);
#ifdef POUT
	pout(n8, j, k, x, y, z, z);
#endif

/* MODULE9:  array references */

	j = 1;
	k = 2;
	l = 3;

	e1[0] = 1.0;
	e1[1] = 2.0;
	e1[2] = 3.0;

	for(i = 1; i <= n9; i += 1)
		p0_8();
#ifdef POUT
	pout(n9, j, k, e1[0], e1[1], e1[2], e1[3]);
#endif

/* MODULE10:  integer arithmetic */

	j = 2;
	k = 3;

	for(i = 1; i <= n10; i +=1) {
		j = j + k;
		k = j + k;
		j = k - j;
		k = k - j - j;
	}
#ifdef POUT
	pout(n10, j, k, x1, x2, x3, x4);
#endif

/* MODULE11:  standard functions */

	x = 0.75;
	for(i = 1; i <= n11; i +=1)
		x = sqrt( exp( log(x) / t1));

#ifdef POUT
	pout(n11, j, k, x, x, x, x);
#endif

// lgk modifications to record kips and start and times

thread_stop_time[7] = GetTickCount();
return 0;

}


/* ----------------------------------------------------------- */

void report_results(threadcount)
  int threadcount;
  {

  DWORD totaltime = 0;
  DWORD localtime = 0;
  double totalcount = 0;
  double localcount = 0;
  int i;

   
     printf("Parallel Whetstone results with %d thread(s).\n",threadcount);
	 printf("______________________________________________________________\n\n");

	 for (i=0; i<threadcount; i++)
	   {
	    printf("Thread %d ---->\n",i);
		printf("                Start time = %u End time = %u \n",thread_start_time[i],
		        thread_stop_time[i]);
		
		localtime = thread_stop_time[i] - thread_start_time[i];
		localcount = ITERATIONS*100000; // 10 million if iter = 100

		printf("                Total time = %u Milliseconds, Operations = %.0lf \n",localtime,localcount);
		printf("                Whetstone Ops/Second = %10.4lf \n",(double)(localcount / (double)localtime) * (double)1000);
		fflush(stdout);

		totaltime = stoptime - starttime;
		totalcount = totalcount + localcount;

	    }

	  // now do overall totals


	  printf("Overall Total time = %u Operations = %.0lf \n",totaltime,totalcount);
	  printf("Overall Whetstone Ops/Second = %10.4lf \n",(double)(totalcount / (double)totaltime) * (double)1000);
          fflush(stdout);		
  }

/* --------------------------------------------------------------------- */


/* --------------------------------------------------------------------- */


void main(argc,argv)
int argc;
char **argv;

{
   int current_arg = 1;
   BOOLEAN more_args = TRUE;
   BOOLEAN done = FALSE;
   int i;
   int threadcount = 1;
   DWORD dwWaitStatus = 0;

    if (argc > 1)
     {
   /* process arguments */
   if (argv[current_arg][0] != '-')
     {
      printf("Illegal parms \n");
      fflush(stdout);
      exit(1);
	 }
   else
		
    do 
     {
      /* get the first one and if it doesn't start with a - we have a problem */
		
       switch (argv[current_arg][1])
           {
				 
				  // assign paffin value 
				  case 'v':
				  if (argc < (current_arg + 3))
				    {
				      printf("ERROR: Two Parameters needed for Affinity assignment Thread followed by value \n");
					  fflush(stdout);
					  exit(1);
				    }
				   else
				     {
					
						int tid = atoi(argv[current_arg + 1]);
				        DWORD affin = (DWORD)atol(argv[current_arg + 2]);
						int i = 0;

					    if ((affin < 0x0001) || (affin > 0x01FF))
						  {
						   printf("ERROR: Affinity value must be between 0x0001 and 0x01FF. \n");
						   fflush(stdout);
						   exit(1);
						  }

					    
						if (strncmp(argv[current_arg + 1],"*",1) == 0)
						   {
					  		  for (i=0; i<maxthreads; i++)
					  		    {
					  		     paffinarray[i] = affin;
								 assignedpaffin[i] = TRUE;
								}
						   }

						else if ((tid <1) || (tid > 8))
					     {
						   printf("ERROR: Thread value must be between 1 and 8. \n");
						   fflush(stdout);
						   exit(1);
						  }
						else // only affin 1 thread 
						  {
						   paffinarray[tid] = affin;
						   assignedpaffin[tid] = TRUE;
						  }
						        
					   current_arg = current_arg + 2;
					}
				   break;

                 case 'i':
				  if (argc < (current_arg + 2))
				    {
				      printf("ERROR: Parameter needed for iterations value. \n");
					  fflush(stdout);
					  exit(1);
				    }
				   else
				     {
				        ITERATIONS = atoi(argv[current_arg + 1]);
					    if ((ITERATIONS <10) || (ITERATIONS > 10000))
					     {
						   printf("ERROR: Iteration value must be between 10 and 10000 \n");
						   fflush(stdout);
						   exit(1);
						  }
						else ++current_arg;
					}
				   break;

				  case 't':
				    if (argc < (current_arg + 2))
					  {
					    printf("ERROR: Parameter needed for thread count value. \n");
						fflush(stdout);
						exit(1);
					  }
					else
					  {
					   threadcount = atoi(argv[current_arg + 1]);
					   if ((threadcount <1) || (threadcount > 8))
					     {
						   printf("ERROR: Thread count must be between 1 and 8 \n");
						   fflush(stdout);
						   exit(1);
						  }
						else ++current_arg;
					   }
					  break;

					case 'a':
					  /* set threads to only run on 1 processor this will help determine
					  if all processors are working if not certain threads will be starved */

					  setprocessoraffinity = TRUE;
					  break;
					  
					default: break;

	} /* end of switch */
						 
			   
    ++current_arg;
	if (current_arg >= argc)
	  more_args = FALSE;

    } while ((!done) && (more_args));
   
   } 
   /* initialize */

   
     printf("NT Whetstone Multiprocessor Benchmark by L. Kahn (C) 1994 \n");
     printf("__________________________________________________________\n\n");
     printf("Number of Iterations = %d, Simultaneous Thread(s) = %d.\n",ITERATIONS,threadcount);
	 
   if (!GetProcessAffinityMask( GetCurrentProcess(), &ProcAff, &SysAff) )
     {
       printf( "ERROR: GetProcessAffinityMask failed, defaulting to 1 processor\n\n" );
       noprocessors = 1;
	   availprocessors = 1;
	 }

   else
      {
        printf( "System Affinity = %x\nProcess affinity = %x\n", SysAff, ProcAff );
		noprocessors = number_of_processors(SysAff);
	    availprocessors = number_of_processors(ProcAff);
	  
	    printf("Number of processors reported = %d \n",noprocessors);
		printf("Number of available processors reported = %d \n\n",availprocessors);
	    fflush(stdout);
	  }

	 if (setprocessoraffinity)
	   {
	    int i = 0;
	    printf("Each thread will be assigned to run ONLY on each sequential processor, or\n");
		printf("on a particular set of processors if the default values have been overridden\n");
		printf("with the -v option.  If you run more threads than you have processors the \n");
		printf("additional threads will run on all the processors.\n");
		printf("By assiging threads to particular processors (that exist in your machine) you \n");
		printf("can determine if they are working properly.\n");
		printf("The following threads have the affinity mask overridden with the following values: \n");

		for (i=0; i<threadcount; i++)
		  {
		   if (assignedpaffin[i])
		     {
			  printf(" -->  Thread %d will be assigned affinity of %ld\n",i,paffinarray[i]);
			  fflush(stdout);
			 }
		   }
	  }  

	for (i=0; i<threadcount; i++)
	   {
	      thread_start_time[i] = 0;
		  thread_stop_time[i] = 0;

	  switch (i)
	    {
	    
	    case 0:
	   threadhandle[0] = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)thread0,NULL,CREATE_SUSPENDED,&threadid[0]);
        break;

        case 1:
       threadhandle[1] = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)thread1,NULL,CREATE_SUSPENDED,&threadid[0]);
	    break;

	    case 2:
	   threadhandle[2] = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)thread2,NULL,CREATE_SUSPENDED,&threadid[0]);
        break;

        case 3:
       threadhandle[3] = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)thread3,NULL,CREATE_SUSPENDED,&threadid[0]);
	    break;

	    case 4:
	   threadhandle[4] = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)thread4,NULL,CREATE_SUSPENDED,&threadid[0]);
        break;

        case 5:
       threadhandle[5] = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)thread5,NULL,CREATE_SUSPENDED,&threadid[0]);
	    break;

	    case 6:
	   threadhandle[6] = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)thread6,NULL,CREATE_SUSPENDED,&threadid[0]);
        break;

        case 7:
       threadhandle[7] = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)thread7,NULL,CREATE_SUSPENDED,&threadid[0]);
		break;
	
	  default: break;

	}			// end of switch

 } // end of loop

	  /* now that the threads are created start them up */
	  starttime = GetTickCount();
	  for (i=0; i<threadcount; i++)
	    
	    {

		  if (setprocessoraffinity)
		    set_thread_to_processor(threadhandle[i],i+1,i);

	    ResumeThread(threadhandle[i]);
		
		}

     dwWaitStatus = WaitForMultipleObjects(threadcount, threadhandle,TRUE,INFINITE);
     stoptime = GetTickCount();

  if (dwWaitStatus == WAIT_FAILED) 
    {
	 printf("ERROR: Wait for multiple objects failed \n");
	 fflush(stdout);
	 exit(1);
	}
  else 
    {
	 printf("All threads have terminated \n");
	 fflush(stdout);
	}
 // now report results

 report_results(threadcount);

}


