/* rt-mt.c
*/
/* This software is copyrighted as detailed in the LICENSE file. */


#include "EXTERN.h"
#include "common.h"
#include "list.h"
#include "intrp.h"
#include "trn.h"
#include "hash.h"
#include "cache.h"
#include "bits.h"
#include "kfile.h"
#include "ngdata.h"
#include "nntpclient.h"
#include "datasrc.h"
#include "nntp.h"
#include "ng.h"
#include "rcln.h"
#include "util.h"
#include "util2.h"
#include "rthread.h"
#include "rt-process.h"
#include "INTERN.h"
#include "rt-mt.h"
#include "rt-mt.ih"

static FILE* fp;
static bool word_same, long_same;
static BMAP my_bmap, mt_bmap;

static char* strings = NULL;
static WORD* author_cnts = 0;
static WORD* ids = 0;

static ARTICLE** article_array = 0;
static SUBJECT** subject_array = 0;
static char** author_array = 0;

static TOTAL total;
static PACKED_ROOT p_root;
static PACKED_ARTICLE p_article;

/* Initialize our thread code by determining the byte-order of the thread
** files and our own current byte-order.  If they differ, set flags to let
** the read code know what we'll need to translate.
*/
bool
mt_init()
{
    int i;
    long size;
    bool success = TRUE;

    datasrc->flags &= ~DF_TRY_THREAD;

    word_same = long_same = TRUE;
#ifdef SUPPORT_XTHREAD
    if (!datasrc->thread_dir) {
	if (nntp_command("XTHREAD DBINIT") <= 0)
	    return FALSE;
	size = nntp_readcheck();
	if (size >= 0)
	    size = nntp_read((char*)&mt_bmap, (long)sizeof (BMAP));
    }
    else
#endif
    {
	if ((fp = fopen(filexp(DBINIT), FOPEN_RB)) != NULL)
	    size = fread((char*)&mt_bmap, 1, sizeof (BMAP), fp);
	else
	    size = 0;
    }
    if (size >= (long)(sizeof (BMAP)) - 1) {
	if (mt_bmap.version != DB_VERSION) {
	    printf("\nMthreads database is the wrong version -- ignoring it.\n")
		FLUSH;
	    return FALSE;
	}
	mybytemap(&my_bmap);
	for (i = 0; i < sizeof (LONG); i++) {
	    if (i < sizeof (WORD)) {
		if (my_bmap.w[i] != mt_bmap.w[i])
		    word_same = FALSE;
	    }
	    if (my_bmap.l[i] != mt_bmap.l[i])
		long_same = FALSE;
	}
    } else
	success = FALSE;
#ifdef SUPPORT_XTHREAD
    if (!datasrc->thread_dir) {
	while (nntp_read(ser_line, (long)sizeof ser_line))
	    ;		/* trash any extraneous bytes */
    }
    else
#endif
    if (fp != NULL)
	fclose(fp);

    if (success)
	datasrc->flags |= DF_TRY_THREAD;
    return success;
}

/* Open and process the data in the group's thread file.  Returns TRUE unless
** we discovered a bogus thread file, destroyed the cache, and re-built it.
*/
int
mt_data()
{
    int ret = 1;
#ifdef SUPPORT_XTHREAD		/* use remote thread file? */
    long size;

    if (!datasrc->thread_dir) {
	if (nntp_command("XTHREAD THREAD") <= 0)
	    return 0;
	size = nntp_readcheck();
	if (size < 0)
	    return 0;

#ifdef VERBOSE
	IF(verbose)
	    printf("\nGetting thread file."), fflush(stdout);
#endif
	if (nntp_read((char*)&total, (long)sizeof (TOTAL)) < sizeof (TOTAL))
	    goto exit;
    }
    else
#endif
    {
	if ((fp = fopen(mt_name(ngname), FOPEN_RB)) == NULL)
	    return 0;
#ifdef VERBOSE
	IF(verbose)
	    printf("\nReading thread file."), fflush(stdout);
#endif

	if (fread((char*)&total, 1, sizeof (TOTAL), fp) < sizeof (TOTAL))
	    goto exit;
    }

    lp_bmap(&total.first, 4);
    wp_bmap(&total.root, 5);
    if (!total.root) {
	tweak_data();
	goto exit;
    }
#ifdef SUPPORT_NNTP
    if (!datasrc->thread_dir && total.last > lastart)
	total.last = lastart;
#endif

    if (read_authors()
     && read_subjects()
     && read_roots()
     && read_articles()
     && read_ids())
    {
	tweak_data();
	first_cached = absfirst;
	last_cached = (total.last < absfirst ? absfirst-1: total.last);
	cached_all_in_range = TRUE;
	goto exit;
    }
    /* Something failed.  Safefree takes care of checking if some items
    ** were already freed.  Any partially-allocated structures were freed
    ** before we got here.  All other structures are cleaned up now.
    */
    close_cache();
    safefree0(strings);
    safefree0(article_array);
    safefree0(subject_array);
    safefree0(author_array);
    safefree0(ids);
    datasrc->flags &= ~DF_TRY_THREAD;
    build_cache();
    datasrc->flags |= DF_TRY_THREAD;
    ret = -1;

exit:
#ifdef SUPPORT_XTHREAD
    if (!datasrc->thread_dir) {
	while (nntp_read(ser_line, (long)sizeof ser_line))
	    ;		/* trash any extraneous bytes */
    }
    else
#endif
	fclose(fp);

    return ret;
}

/* Change a newsgroup name into the name of the thread data file.  We
** subsitute any '.'s in the group name into '/'s (unless LONG_THREAD_NAMES
** is defined), prepend the path, and append the '/.thread' or '.th' on to
** the end.
*/
static char*
mt_name(group)
char* group;
{
#ifdef LONG_THREAD_NAMES
    sprintf(buf, "%s/%s", datasrc->thread_dir, group);
#else
    register char* cp;

    cp = strcpy(buf, datasrc->thread_dir) + strlen(datasrc->thread_dir);
    *cp++ = '/';
    strcpy(cp, group);
    while ((cp = index(cp, '.')))
	*cp = '/';
    if (datasrc->thread_dir == datasrc->spool_dir)
	strcat(buf, MT_FILE_NAME);
    else
	strcat(buf, ".th");
#endif
    return buf;
}

static char* subject_strings, *string_end;

/* The author information is an array of use-counts, followed by all the
** null-terminated strings crammed together.  The subject strings are read
** in at the same time, since they are appended to the end of the author
** strings.
*/
static int
read_authors()
{
    register int count;
    register char* string_ptr;
    register char** author_ptr;

    if (!read_item((char**)&author_cnts, (MEM_SIZE)total.author*sizeof (WORD)))
	return 0;
    safefree0(author_cnts);   /* we don't need these */

    if (!read_item(&strings, (MEM_SIZE)total.string1))
	return 0;

    string_ptr = strings;
    string_end = string_ptr + total.string1;
    if (string_end[-1] != '\0') {
	/*error("first string table is invalid.\n");*/
	return 0;
    }

    /* We'll use this array to point each article at its proper author
    ** (the packed values were saved as indexes).
    */
    author_array = (char**)safemalloc(total.author * sizeof (char*));
    author_ptr = author_array;

    for (count = total.author; count; count--) {
	if (string_ptr >= string_end)
	    break;
	*author_ptr++ = string_ptr;
	string_ptr += strlen(string_ptr) + 1;
    }
    subject_strings = string_ptr;

    if (count) {
	/*error("author unpacking failed.\n");*/
	return 0;
    }
    return 1;
}

/* The subject values consist of the crammed-together null-terminated strings
** (already read in above) and the use-count array.  They were saved in the
** order that the roots require while being unpacked.
*/
static int
read_subjects()
{
    register int count;
    register char* string_ptr;
    register SUBJECT** subj_ptr;
    WORD* subject_cnts;

    if (!read_item((char**)&subject_cnts,
		   (MEM_SIZE)total.subject * sizeof (WORD))) {
	/* (Error already logged.) */
	return 0;
    }
    free((char*)subject_cnts);		/* we don't need these */

    /* Use this array when unpacking the article's subject offset. */
    subject_array = (SUBJECT**)safemalloc(total.subject * sizeof (SUBJECT*));
    subj_ptr = subject_array;

    string_ptr = subject_strings;	/* string_end is already set */

    for (count = total.subject; count; count--) {
	int len;
	ARTICLE arty;
	if (string_ptr >= string_end)
	    break;
	len = strlen(string_ptr);
	arty.subj = 0;
	set_subj_line(&arty, string_ptr, len);
	if (len == 72)
	    arty.subj->flags |= SF_SUBJTRUNCED;
	arty.subj->thread_link = NULL;
	string_ptr += len + 1;
	*subj_ptr++ = arty.subj;
    }
    if (count || string_ptr != string_end) {
	/*error("subject data is invalid.\n");*/
	return 0;
    }
    return 1;
}

/* Read in the packed root structures to set each subject's thread article
** offset.  This gets turned into a real pointer later.
*/
static int
read_roots()
{
    register SUBJECT** subj_ptr;
    register int i;
    SUBJECT* sp;
    SUBJECT* prev_sp;
    int count;
    int ret;

    subj_ptr = subject_array;

    for (count = total.root; count--; ) {
#ifdef SUPPORT_XTHREAD
	if (!datasrc->thread_dir)
	    ret = nntp_read((char*)&p_root, (long)sizeof (PACKED_ROOT));
	else
#endif
	    ret = fread((char*)&p_root, 1, sizeof (PACKED_ROOT), fp);

	if (ret != sizeof (PACKED_ROOT)) {
	    /*error("failed root read -- %d bytes instead of %d.\n",
		ret, sizeof (PACKED_ROOT));*/
	    return 0;
	}
	wp_bmap(&p_root.articles, 3);	/* converts subject_cnt too */
	if (p_root.articles < 0 || p_root.articles >= total.article) {
	    /*error("root has invalid values.\n");*/
	    return 0;
	}
	i = p_root.subject_cnt;
	if (i <= 0 || (subj_ptr - subject_array) + i > total.subject) {
	    /*error("root has invalid values.\n");*/
	    return 0;
	}
	for (prev_sp = *subj_ptr; i--; prev_sp = sp, subj_ptr++) {
	    sp = *subj_ptr;
	    if (sp->thread_link == NULL) {
		sp->thread_link = prev_sp->thread_link;
		prev_sp->thread_link = sp;
	    }
	    else {
		while (sp != prev_sp && sp->thread_link != *subj_ptr)
		    sp = sp->thread_link;
		if (sp == prev_sp)
		    continue;
		sp->thread_link = prev_sp->thread_link;
		prev_sp->thread_link = *subj_ptr;
	    }
	}
    }
    return 1;
}

static bool invalid_data;

/* A simple routine that checks the validity of the article's subject value.
** A -1 means that it is NULL, otherwise it should be an offset into the
** subject array we just unpacked.
*/
static SUBJECT*
the_subject(num)
int num;
{
    if (num == -1)
	return NULL;
    if (num < 0 || num >= total.subject) {
	/*printf("Invalid subject in thread file: %d [%ld]\n", num, art_num);*/
	invalid_data = TRUE;
	return NULL;
    }
    return subject_array[num];
}

/* Ditto for author checking. */
static char*
the_author(num)
int num;
{
    if (num == -1)
	return NULL;
    if (num < 0 || num >= total.author) {
	/*error("invalid author in thread file: %d [%ld]\n", num, art_num);*/
	invalid_data = TRUE;
	return NULL;
    }
    return savestr(author_array[num]);
}

/* Our parent/sibling information is a relative offset in the article array.
** zero for none.  Child values are always found in the very next array
** element if child_cnt is non-zero.
*/
static ARTICLE*
the_article(relative_offset, num)
int relative_offset;
int num;
{
    union { ARTICLE* ap; int num; } uni;

    if (!relative_offset)
	return NULL;
    num += relative_offset;
    if (num < 0 || num >= total.article) {
	/*error("invalid article offset in thread file.\n");*/
	invalid_data = TRUE;
	return NULL;
    }
    uni.num = num+1;
    return uni.ap;		/* slip them an offset in disguise */
}

/* Read the articles into their trees.  Point everything everywhere. */
static int
read_articles()
{
    register int count;
    register ARTICLE* article;
    register ARTICLE** art_ptr;
    int ret;

    /* Build an array to interpret interlinkages of articles. */
    article_array = (ARTICLE**)safemalloc(total.article * sizeof (ARTICLE*));
    art_ptr = article_array;

    invalid_data = FALSE;
    for (count = 0; count < total.article; count++) {
#ifdef SUPPORT_XTHREAD
	if (!datasrc->thread_dir)
	    ret = nntp_read((char*)&p_article, (long)sizeof (PACKED_ARTICLE));
	else
#endif
	    ret = fread((char*)&p_article, 1, sizeof (PACKED_ARTICLE), fp);

	if (ret != sizeof (PACKED_ARTICLE)) {
	    /*error("failed article read -- %d bytes instead of %d.\n",
		ret, sizeof (PACKED_ARTICLE));*/
	    return 0;
	}
	lp_bmap(&p_article.num, 2);
	wp_bmap(&p_article.subject, 8);

#ifdef SUPPORT_NNTP
	article = *art_ptr++ = allocate_article(p_article.num > lastart?
						0 : p_article.num);
#else
	article = *art_ptr++ = allocate_article(p_article.num);
#endif
	article->date = p_article.date;
#ifndef DBM_XREFS
	if (olden_days < 2 && !(p_article.flags & HAS_XREFS))
	    article->xrefs = nullstr;
#endif
	article->from = the_author(p_article.author);
	article->parent = the_article(p_article.parent, count);
	article->child1 = the_article((WORD)(p_article.child_cnt?1:0), count);
	article->sibling = the_article(p_article.sibling, count);
	article->subj = the_subject(p_article.subject);
	if (invalid_data) {
	    /* (Error already logged.) */
	    return 0;
	}
	/* This is OK because parent articles precede their children */
	if (article->parent) {
	    union { ARTICLE* ap; int num; } uni;
	    uni.ap = article->parent;
	    article->parent = article_array[uni.num-1];
	}
	else
	    article->sibling = NULL;
	if (article->subj) {
	    article->flags |= AF_FROMTRUNCED | AF_THREADED
		    | ((p_article.flags & ROOT_ARTICLE)? 0 : AF_HAS_RE);
	    /* Give this subject to any faked parent articles */
	    while (article->parent && !article->parent->subj) {
		article->parent->subj = article->subj;
		article = article->parent;
	    }
	} else
	    article->flags |= AF_FAKE;
    }

    /* We're done with most of the pointer arrays at this point. */
    safefree0(subject_array);
    safefree0(author_array);
    safefree0(strings);

    return 1;
}

/* Read the message-id strings and attach them to each article.  The data
** format consists of the mushed-together null-terminated strings (a domain
** name followed by all its unique-id prefixes) and then the article offsets
** to which they belong.  The first domain name was omitted, as it is a null
** domain for those truly weird message-id's without '@'s.
*/
static int
read_ids()
{
    register ARTICLE* article;
    register char* string_ptr;
    register int i, count, len, len2;

    if (!read_item(&strings, (MEM_SIZE)total.string2)
     || !read_item((char**)&ids,
		(MEM_SIZE)(total.article+total.domain+1) * sizeof (WORD))) {
	return 0;
    }
    wp_bmap(ids, total.article + total.domain + 1);

    string_ptr = strings;
    string_end = string_ptr + total.string2;

    if (string_end[-1] != '\0') {
	/*error("second string table is invalid.\n");*/
	return 0;
    }

    for (i = 0, count = total.domain + 1; count--; i++) {
	if (i) {
	    if (string_ptr >= string_end) {
		/*error("error unpacking domain strings.\n");*/
		return 0;
	    }
	    sprintf(buf, "@%s", string_ptr);
	    len = strlen(string_ptr) + 1;
	    string_ptr += len;
	} else {
	    *buf = '\0';
	    len = 0;
	}
	if (ids[i] != -1) {
	    if (ids[i] < 0 || ids[i] >= total.article) {
		/*error("error in id array.\n");*/
		return 0;
	    }
	    article = article_array[ids[i]];
	    for (;;) {
		if (string_ptr >= string_end) {
		    /*error("error unpacking domain strings.\n");*/
		    return 0;
		}
		len2 = strlen(string_ptr);
		article->msgid = safemalloc(len2 + len + 2 + 1);
		sprintf(article->msgid, "<%s%s>", string_ptr, buf);
		string_ptr += len2 + 1;
		if (msgid_hash) {
		    HASHDATUM data;
		    data = hashfetch(msgid_hash, article->msgid, len2+len+2);
		    if (data.dat_len) {
			article->autofl = data.dat_len&(AUTO_SELS|AUTO_KILLS);
			if ((data.dat_len & KF_AGE_MASK) == 0)
			    article->autofl |= AUTO_OLD;
			else
			    kf_changethd_cnt++;
			data.dat_len = 0;
			free(data.dat_ptr);
		    }
		    data.dat_ptr = (char*)article;
		    hashstorelast(data);
		}
		if (++i >= total.article + total.domain + !count) {
		    /*error("overran id array unpacking domains.\n");*/
		    return 0;
		}
		if (ids[i] != -1) {
		    if (ids[i] < 0 || ids[i] >= total.article)
			return 0;
		    article = article_array[ids[i]];
		} else
		    break;
	    }
	}
    }
    safefree0(ids);
    safefree0(strings);

    return 1;
}

/* And finally, turn all the links into real pointers and mark missing
** articles as read.
*/
static void
tweak_data()
{
    register int count;
    register ARTICLE* ap;
    register ARTICLE** art_ptr;
    union { ARTICLE* ap; int num; } uni;
    int fl;

    art_ptr = article_array;
    for (count = total.article; count--; ) {
	ap = *art_ptr++;
	if (ap->child1) {
	    uni.ap = ap->child1;
	    ap->child1 = article_array[uni.num-1];
	}
	if (ap->sibling) {
	    uni.ap = ap->sibling;
	    ap->sibling = article_array[uni.num-1];
	}
	if (!ap->parent)
	    link_child(ap);
    }

    art_ptr = article_array;
    for (count = total.article; count--; ) {
	ap = *art_ptr++;
	if (ap->subj && !(ap->flags & AF_FAKE))
	    cache_article(ap);
	else
	    onemissing(ap);
    }

    art_ptr = article_array;
    for (count = total.article; count--; ) {
	ap = *art_ptr++;
	if ((fl = ap->autofl) != 0)
	    perform_auto_flags(ap, fl, fl, fl);
    }

    safefree0(article_array);
}

/* A shorthand for reading a chunk of the file into a malloc'ed array.
*/
static int
read_item(dest, len)
char** dest;
MEM_SIZE len;
{
    long ret;

    *dest = safemalloc(len);
#ifdef SUPPORT_XTHREAD
    if (!datasrc->thread_dir)
	ret = nntp_read(*dest, (long)len);
    else
#endif
	ret = fread(*dest, 1, (int)len, fp);

    if (ret != len) {
	free(*dest);
	*dest = NULL;
	return 0;
    }
    putchar('.');
    fflush(stdout);
    return 1;
}

/* Determine this machine's byte map for WORDs and LONGs.  A byte map is an
** array of BYTEs (sizeof (WORD) or sizeof (LONG) of them) with the 0th BYTE
** being the byte number of the high-order byte in my <type>, and so forth.
*/
static void
mybytemap(map)
BMAP* map;
{
    union {
	BYTE b[sizeof (LONG)];
	WORD w;
	LONG l;
    } u;
    register BYTE *mp;
    register int i, j;

    mp = &map->w[sizeof (WORD)];
    u.w = 1;
    for (i = sizeof (WORD); i > 0; i--) {
	for (j = 0; j < sizeof (WORD); j++) {
	    if (u.b[j] != 0)
		break;
	}
	if (j == sizeof (WORD))
	    goto bad_news;
	*--mp = j;
	while (u.b[j] != 0 && u.w)
	    u.w <<= 1;
    }

    mp = &map->l[sizeof (LONG)];
    u.l = 1;
    for (i = sizeof (LONG); i > 0; i--) {
	for (j = 0; j < sizeof (LONG); j++) {
	    if (u.b[j] != 0)
		break;
	}
	if (j == sizeof (LONG)) {
	  bad_news:
	    /* trouble -- set both to *something* consistent */
	    for (j = 0; j < sizeof (WORD); j++)
		map->w[j] = j;
	    for (j = 0; j < sizeof (LONG); j++)
		map->l[j] = j;
	    return;
	}
	*--mp = j;
	while (u.b[j] != 0 && u.l)
	    u.l <<= 1;
    }
}

/* Transform each WORD's byte-ordering in a buffer of the designated length.
*/
static void
wp_bmap(buf, len)
WORD* buf;
int len;
{
    union {
	BYTE b[sizeof (WORD)];
	WORD w;
    } in, out;
    register int i;

    if (word_same)
	return;

    while (len--) {
	in.w = *buf;
	for (i = 0; i < sizeof (WORD); i++)
	    out.b[my_bmap.w[i]] = in.b[mt_bmap.w[i]];
	*buf++ = out.w;
    }
}

/* Transform each LONG's byte-ordering in a buffer of the designated length.
*/
static void
lp_bmap(buf, len)
LONG* buf;
int len;
{
    union {
	BYTE b[sizeof (LONG)];
	LONG l;
    } in, out;
    register int i;

    if (long_same)
	return;

    while (len--) {
	in.l = *buf;
	for (i = 0; i < sizeof (LONG); i++)
	    out.b[my_bmap.l[i]] = in.b[mt_bmap.l[i]];
	*buf++ = out.l;
    }
}