/*
 * ksort.c
 * Copyright (c) 1989 Josh Greifer
 * sort big text files that DOS sort.com can't cope with
 * over 30 times faster than DOS sort.com
 * try it on huge files!
 *
 * compile with Microsoft C V. 6.0 like this:
 * (note that the Compact Model is used)
 *	cl /AC /Ox ksort.c
 * or use the makefile provided:
 *	make ksort.mak
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <ctype.h>

#define MAXLINELEN 1024
#define DEFARRAYSIZE 16000
#define MAXARRAYSIZE 64000
#define MAXFILES    FOPEN_MAX-5
#define EMBUFSIZE   4096
#define QSSSIZE     1024

#define DUMMY1ST    ((char *)-1)
#define DUMMYLAST   ((char *)-2)

FILE *inf, *ouf;
int _near offset = 1;
int _near skipspace;
int _near rename_;
int _near verbose;
unsigned int _near asize;
unsigned int _near insert;

char *infname, *oufname;

typedef struct {
    FILE *fp;		/* file pointer */
    char *fn;		/* file name	*/
    char *buf;		/* buffer	*/
} FS;

FS _near tmpftab[MAXFILES];

int _near tmpfiles;   /* number of temp files created */

void parseargs(int, char **);
void error(char *, char *);
int revcmpi(char *, char *);
int revcmp(char *, char *);
void merge(void);
void spool_to_tmpfile( char * _huge *array );

void	ksort( char * _huge *, unsigned int, int (*)( char *, char *));
void	isort( char * _huge *, unsigned int, int (*)( char *, char *));

int (*cmpfns[])(char *, char *) = { stricmp, strcmp };
int (*revcmpfns[])(char *, char *) = { revcmpi, revcmp };
int (**cmpfn)(char *, char *) = cmpfns;

void getnextline( FS *f );

unsigned int _near * _near qs;		     /* quicksort stack bottom */
char _near *qsss = "QuickSort internal stack";
char *emergency;			    /* freed when malloc() fails */
char * _huge *array;

main(int argc, char **argv)
{
    char buf[MAXLINELEN];
    char *p = buf;
    register unsigned int i=0, j=0;
    long l = 0L;

    tmpftab[0].fp = (FILE *)1; /* pseudo file */

    emergency = malloc(EMBUFSIZE);
    parseargs(argc, argv);

    if ((array = (char *_huge *)halloc((asize+1),sizeof(char *))) == NULL)
	error("Default buffer too big - use /B to specify size", NULL);

   if ((qs = (int _near *)_nmalloc(QSSSIZE*sizeof(int)))==0)
	error("Can't allocate %s\n", qsss);
loop:
    if (verbose) fprintf(stderr, "\nReading file...");
    if (verbose == 2) fprintf(stderr, "\nReading line %ld", l);

    for (i = 1; i < asize && fgets(buf, MAXLINELEN, inf); i++, l++) {
	if (verbose == 2) fprintf(stderr, "\rReading line %ld", l);
	if (skipspace) for (p = buf; isspace(*p); p++);
	if ((array[i] = strdup(p)) == NULL) {
	    free(emergency);
	    emergency = NULL;
	    array[i++] = strdup(p);
	    break;
	}
    }
    if (verbose) fprintf(stderr, "\nSorting...");
    array[0] = DUMMY1ST;
    array[i] = DUMMYLAST;

    ksort(array, i-1, *cmpfn);

    if (i == asize || !emergency) {

	spool_to_tmpfile(array);

	if (!emergency) emergency = malloc(EMBUFSIZE);
	goto loop;
    }
    else {
	if (tmpfiles) merge();
	else {
	    if (verbose) fprintf(stderr, "\nOutputting sorted file...");
	    for (j = 1; j < i; j++)
		fputs(array[j], ouf);
	}
    }
    fclose(inf);
    fclose(ouf);
    if (rename_) {
	unlink(infname);
	rename(oufname, infname);
    }
    return 0;
}

void	spool_to_tmpfile( char * _huge *array )
{
    char buf[FILENAME_MAX];

    FS *tmpfs = &tmpftab[++tmpfiles];

    unsigned int j;

    if (verbose)
	fprintf(stderr,"\nWriting temp file %d...",tmpfiles);

    sprintf(buf, "SRTQ_%d.$$$", tmpfiles);
    tmpfs->fn = strdup(buf);
    if ((tmpfs->fp = fopen(tmpfs->fn, "w+"))==0)
	error("Error creating temp file %s", tmpfs->fn);
    if ((tmpfs->buf = (char *)malloc(MAXLINELEN)) == 0)
	error("Can't allocate temp file buffer.\n", NULL);

    for (j = 1; array[j] != DUMMYLAST; j++) {
	fputs(array[j], tmpfs->fp);
	if (array[j] != DUMMY1ST) free(array[j]);
    }
}

void error(char *message, char *var)
{
    if (emergency) free(emergency);
    if (*message != '\n') fprintf(stderr, "\nFatal error:");
    fprintf(stderr, message, var);
    exit(1);
}


void merge()
{
    register FS *i,*j;
    FS *maxtmpf = &tmpftab[tmpfiles+1];

    if (verbose) fprintf(stderr, "\nMerging %d temp file%c..", tmpfiles,
	tmpfiles > 1 ? 's' : '.');

    for (i = tmpftab+1; i < maxtmpf; i++) {
	fflush(i->fp);
	fseek(i->fp,0L,SEEK_SET);
    }

    for (i = tmpftab; i < maxtmpf; i++)
	if (i->fp)	      /* tmpftab[m].fp set to NULL after EOF */
	    getnextline(i);
    if (verbose) fprintf(stderr, "\nOutputting sorted file...");
    for (;;) {
	for (j = tmpftab; !j->fp; j++)	   /* find a buffer */
	    if (j >= maxtmpf) return;	   /* EOF on all temp files */
	for (i = j+1; i < maxtmpf; i++)    /* compare all other bufs with j */
	    if (i->fp)
		if ((*cmpfn)(j->buf+offset, i->buf+offset) > 0)
		    j = i;		    /* j := next buffer to be output */
	fputs(j->buf, ouf);
	getnextline(j);
    }
}

void getnextline( register FS *f )
{
    static int pos;
    if (f == tmpftab) {     /* pseudo-file -- data is in array[] */
	if ((f->buf = array[++pos]) == DUMMYLAST) {
	    f->fp = NULL;
	    f->buf = "";
	}
    }
    else if (fgets(f->buf, MAXLINELEN, f->fp)==NULL) {
	fclose(f->fp);
	f->fp = NULL;
	unlink(f->fn);
    }
}

#define CLEARSTACK() stackptr = qs; stackend = stackptr+QSSSIZE-2
#define STACKEMPTY() (stackptr == qs)
#define PUSH(a,b)    if (stackptr >= stackend) error("%v overflow", qsss); \
    else *stackptr++ = (a); *stackptr++ = (b)
#define POP(a,b)     (b) = *--stackptr; (a) = *--stackptr

#define SWAP(Km, Kn)		temp = (Km); (Km) = (Kn); (Kn) = temp

#define LESSTHAN(Km, Kn)	((Km) == DUMMY1ST ? 1 : \
    (Kn) == DUMMY1ST ? 0 : (Km) == DUMMYLAST ? 0 : \
    (Kn) == DUMMYLAST ? 1 : (*comp)( (Km)+offset, (Kn)+offset) < 0)

#define GREATERTHAN(Km, Kn)	((Km) == DUMMY1ST ? 0 : \
    (Kn) == DUMMY1ST ? 1 : (Km) == DUMMYLAST ? 1 : \
    (Kn) == DUMMYLAST ? 0 : (*comp)( (Km)+offset, (Kn)+offset) > 0)

void	ksort( char * _huge *v, unsigned int N, int (*comp)( char *, char *))
{
    unsigned int l, r, i, j;
    char *temp;
    unsigned int     _near *stackptr;
    unsigned int     _near *stackend;
    unsigned int     M = insert;
    unsigned int    p = 0;

/* Q1: Initialize   */
    if (N <= M) goto Q9;
    if (verbose) fprintf(stderr, "\nQuickSort...%c",
				     verbose == 2 ? '\n' : '\0');
    CLEARSTACK();
    l = 1; r = N;

Q2: /* Begin new stage */
    if (verbose == 2) fprintf(stderr, "\r%u partitions processed",++p);
    i = l; j = r+1;

Q3: /* Compare Ki : K */
    ++i;
    if (LESSTHAN(v[i], v[l]))
	goto Q3;

Q4: /* Compare K : Kj */
    --j;
    if (LESSTHAN(v[l], v[j]))
	goto Q4;

/* Q5: Test i : j */
    if (j <= i) {
	SWAP(v[l], v[j]);
	goto Q7;
    }

/* Q6: Exchange */
    SWAP(v[i], v[j]);
    goto Q3;

Q7: /* Put on stack */
    if ((r-j >= j-l) && (j-l > M) ) {
	PUSH(j+1, r);
	r = j-1;
	goto Q2;
    }
    if ((j-l > r-j) && (r-j > M)) {
	PUSH(l, j-1);
	l = j+1;
	goto Q2;
    }
    if ((r-j > M) && (M >= j-l)) {
	l = j+1;
	goto Q2;
    }
    if ((j-l > M) && (M >= r-j) ) {
	r = j-1;
	goto Q2;
    }

/* Q8: take off stack */
    if (!STACKEMPTY()) {
	POP(l, r);
	goto Q2;
    }
Q9: /* Straight insertion sort */
    if (M > 1) isort(v, N, comp);
}

void	isort( char * _huge *v, unsigned int N, int (*comp)( char *, char *))
{
    unsigned int i, j;
    char *K;
    if (verbose) fprintf(stderr, "\nInsertion sort...%c",
				     verbose == 2 ? '\n' : '\0');
    for (j = 2; j <= N; j++) {
	if (verbose == 2) fprintf(stderr, "\r%u records processed",j-1);
	if (GREATERTHAN(v[j-1], v[j])) {
	    K = v[j];
	    i = j-1;
	    do {
		v[i+1] = v[i];
		i--;
	    } while (GREATERTHAN(v[i], K));
	    v[i+1] = K;
	}
    }
}

int revcmpi( register char *a1, register char *a2)
{
    return strcmpi(a2, a1);
}

int revcmp( register char *a1, register char *a2)
{
    return strcmp(a2, a1);
}

void parseargs(int argc, char **argv)
{
    static char usage[] = "\n\
	*** Ksort version 1.03 Copyright (c) Josh Greifer 1991 *** \n\
Usage: ksort [/R] [/+<n>] [/N] [/S] [/V|v] [/B<n>] [/I<n>] [infile] [outfile]\n\
       options: R	  - reverse sort\n\
		+<column> - start sort at column <n>\n\
		N	  - Don't ignore case\n\
		S	  - Skip and trim leading white space\n\
		v	  - verbose -- screen messages\n\
		V	  - Very verbose -- more screen messages\n\
		B<n>	  - Buffer size (number of lines) -- max %u\n\
		I<n>	  - Use Insertion Sort if fewer than <n> lines";
    register char *c;
    while (--argc) {
	c = *++argv;
	if (*c == '/')
	    switch (*++c) {
		case 's':
		case 'S':   skipspace++;          break;
		case 'v':   verbose=1;            break;
		case 'V':   verbose=2;            break;
		case 'r':
		case 'R':   cmpfn = revcmpfns;    break;
		case 'n':
		case 'N':   cmpfn++;              break;
		case '+':   offset = atoi(++c); break;
		case 'b':
		case 'B':   asize =  atoi(++c);  break;
		case 'i':
		case 'I':   insert =  atoi(++c);  break;
		default:    error(usage, (char *)MAXARRAYSIZE);
	    }
	else if (!inf) {
	    infname = c;
	    if ((inf = fopen(infname, "r")) == NULL)
		error("\nCan't open input file",infname);
	}
	else if (!ouf) {
	    oufname = c;
	    if ((ouf = fopen(oufname, "w")) == NULL)
		error("\nCan't open output file",oufname);
	}
	else error(usage, NULL);
    }
    if (!ouf)
	if (inf) {
	    ouf = fopen(oufname = strdup(tmpnam(NULL)), "w");
	    rename_ = 1;
	} else {
	    inf = stdin;
	    ouf = stdout;
	}

    offset--;
    if (asize <= 1) asize = DEFARRAYSIZE;
    if (insert <= 1) insert = 1;
    if (insert >= asize) insert = asize;
}
