/* rt-mt.c
*/
/* This software is copyrighted as detailed in the LICENSE file. */
#include "EXTERN.h"
#include "common.h"
#include "list.h"
#include "intrp.h"
#include "trn.h"
#include "hash.h"
#include "cache.h"
#include "bits.h"
#include "kfile.h"
#include "ngdata.h"
#include "nntpclient.h"
#include "datasrc.h"
#include "nntp.h"
#include "ng.h"
#include "rcln.h"
#include "util.h"
#include "util2.h"
#include "rthread.h"
#include "rt-process.h"
#include "INTERN.h"
#include "rt-mt.h"
#include "rt-mt.ih"
static FILE* fp;
static bool word_same, long_same;
static BMAP my_bmap, mt_bmap;
static char* strings = NULL;
static WORD* author_cnts = 0;
static WORD* ids = 0;
static ARTICLE** article_array = 0;
static SUBJECT** subject_array = 0;
static char** author_array = 0;
static TOTAL total;
static PACKED_ROOT p_root;
static PACKED_ARTICLE p_article;
/* Initialize our thread code by determining the byte-order of the thread
** files and our own current byte-order. If they differ, set flags to let
** the read code know what we'll need to translate.
*/
bool
mt_init()
{
int i;
long size;
bool success = TRUE;
datasrc->flags &= ~DF_TRY_THREAD;
word_same = long_same = TRUE;
#ifdef SUPPORT_XTHREAD
if (!datasrc->thread_dir) {
if (nntp_command("XTHREAD DBINIT") <= 0)
return FALSE;
size = nntp_readcheck();
if (size >= 0)
size = nntp_read((char*)&mt_bmap, (long)sizeof (BMAP));
}
else
#endif
{
if ((fp = fopen(filexp(DBINIT), FOPEN_RB)) != NULL)
size = fread((char*)&mt_bmap, 1, sizeof (BMAP), fp);
else
size = 0;
}
if (size >= (long)(sizeof (BMAP)) - 1) {
if (mt_bmap.version != DB_VERSION) {
printf("\nMthreads database is the wrong version -- ignoring it.\n")
FLUSH;
return FALSE;
}
mybytemap(&my_bmap);
for (i = 0; i < sizeof (LONG); i++) {
if (i < sizeof (WORD)) {
if (my_bmap.w[i] != mt_bmap.w[i])
word_same = FALSE;
}
if (my_bmap.l[i] != mt_bmap.l[i])
long_same = FALSE;
}
} else
success = FALSE;
#ifdef SUPPORT_XTHREAD
if (!datasrc->thread_dir) {
while (nntp_read(ser_line, (long)sizeof ser_line))
; /* trash any extraneous bytes */
}
else
#endif
if (fp != NULL)
fclose(fp);
if (success)
datasrc->flags |= DF_TRY_THREAD;
return success;
}
/* Open and process the data in the group's thread file. Returns TRUE unless
** we discovered a bogus thread file, destroyed the cache, and re-built it.
*/
int
mt_data()
{
int ret = 1;
#ifdef SUPPORT_XTHREAD /* use remote thread file? */
long size;
if (!datasrc->thread_dir) {
if (nntp_command("XTHREAD THREAD") <= 0)
return 0;
size = nntp_readcheck();
if (size < 0)
return 0;
#ifdef VERBOSE
IF(verbose)
printf("\nGetting thread file."), fflush(stdout);
#endif
if (nntp_read((char*)&total, (long)sizeof (TOTAL)) < sizeof (TOTAL))
goto exit;
}
else
#endif
{
if ((fp = fopen(mt_name(ngname), FOPEN_RB)) == NULL)
return 0;
#ifdef VERBOSE
IF(verbose)
printf("\nReading thread file."), fflush(stdout);
#endif
if (fread((char*)&total, 1, sizeof (TOTAL), fp) < sizeof (TOTAL))
goto exit;
}
lp_bmap(&total.first, 4);
wp_bmap(&total.root, 5);
if (!total.root) {
tweak_data();
goto exit;
}
#ifdef SUPPORT_NNTP
if (!datasrc->thread_dir && total.last > lastart)
total.last = lastart;
#endif
if (read_authors()
&& read_subjects()
&& read_roots()
&& read_articles()
&& read_ids())
{
tweak_data();
first_cached = absfirst;
last_cached = (total.last < absfirst ? absfirst-1: total.last);
cached_all_in_range = TRUE;
goto exit;
}
/* Something failed. Safefree takes care of checking if some items
** were already freed. Any partially-allocated structures were freed
** before we got here. All other structures are cleaned up now.
*/
close_cache();
safefree0(strings);
safefree0(article_array);
safefree0(subject_array);
safefree0(author_array);
safefree0(ids);
datasrc->flags &= ~DF_TRY_THREAD;
build_cache();
datasrc->flags |= DF_TRY_THREAD;
ret = -1;
exit:
#ifdef SUPPORT_XTHREAD
if (!datasrc->thread_dir) {
while (nntp_read(ser_line, (long)sizeof ser_line))
; /* trash any extraneous bytes */
}
else
#endif
fclose(fp);
return ret;
}
/* Change a newsgroup name into the name of the thread data file. We
** subsitute any '.'s in the group name into '/'s (unless LONG_THREAD_NAMES
** is defined), prepend the path, and append the '/.thread' or '.th' on to
** the end.
*/
static char*
mt_name(group)
char* group;
{
#ifdef LONG_THREAD_NAMES
sprintf(buf, "%s/%s", datasrc->thread_dir, group);
#else
register char* cp;
cp = strcpy(buf, datasrc->thread_dir) + strlen(datasrc->thread_dir);
*cp++ = '/';
strcpy(cp, group);
while ((cp = index(cp, '.')))
*cp = '/';
if (datasrc->thread_dir == datasrc->spool_dir)
strcat(buf, MT_FILE_NAME);
else
strcat(buf, ".th");
#endif
return buf;
}
static char* subject_strings, *string_end;
/* The author information is an array of use-counts, followed by all the
** null-terminated strings crammed together. The subject strings are read
** in at the same time, since they are appended to the end of the author
** strings.
*/
static int
read_authors()
{
register int count;
register char* string_ptr;
register char** author_ptr;
if (!read_item((char**)&author_cnts, (MEM_SIZE)total.author*sizeof (WORD)))
return 0;
safefree0(author_cnts); /* we don't need these */
if (!read_item(&strings, (MEM_SIZE)total.string1))
return 0;
string_ptr = strings;
string_end = string_ptr + total.string1;
if (string_end[-1] != '\0') {
/*error("first string table is invalid.\n");*/
return 0;
}
/* We'll use this array to point each article at its proper author
** (the packed values were saved as indexes).
*/
author_array = (char**)safemalloc(total.author * sizeof (char*));
author_ptr = author_array;
for (count = total.author; count; count--) {
if (string_ptr >= string_end)
break;
*author_ptr++ = string_ptr;
string_ptr += strlen(string_ptr) + 1;
}
subject_strings = string_ptr;
if (count) {
/*error("author unpacking failed.\n");*/
return 0;
}
return 1;
}
/* The subject values consist of the crammed-together null-terminated strings
** (already read in above) and the use-count array. They were saved in the
** order that the roots require while being unpacked.
*/
static int
read_subjects()
{
register int count;
register char* string_ptr;
register SUBJECT** subj_ptr;
WORD* subject_cnts;
if (!read_item((char**)&subject_cnts,
(MEM_SIZE)total.subject * sizeof (WORD))) {
/* (Error already logged.) */
return 0;
}
free((char*)subject_cnts); /* we don't need these */
/* Use this array when unpacking the article's subject offset. */
subject_array = (SUBJECT**)safemalloc(total.subject * sizeof (SUBJECT*));
subj_ptr = subject_array;
string_ptr = subject_strings; /* string_end is already set */
for (count = total.subject; count; count--) {
int len;
ARTICLE arty;
if (string_ptr >= string_end)
break;
len = strlen(string_ptr);
arty.subj = 0;
set_subj_line(&arty, string_ptr, len);
if (len == 72)
arty.subj->flags |= SF_SUBJTRUNCED;
arty.subj->thread_link = NULL;
string_ptr += len + 1;
*subj_ptr++ = arty.subj;
}
if (count || string_ptr != string_end) {
/*error("subject data is invalid.\n");*/
return 0;
}
return 1;
}
/* Read in the packed root structures to set each subject's thread article
** offset. This gets turned into a real pointer later.
*/
static int
read_roots()
{
register SUBJECT** subj_ptr;
register int i;
SUBJECT* sp;
SUBJECT* prev_sp;
int count;
int ret;
subj_ptr = subject_array;
for (count = total.root; count--; ) {
#ifdef SUPPORT_XTHREAD
if (!datasrc->thread_dir)
ret = nntp_read((char*)&p_root, (long)sizeof (PACKED_ROOT));
else
#endif
ret = fread((char*)&p_root, 1, sizeof (PACKED_ROOT), fp);
if (ret != sizeof (PACKED_ROOT)) {
/*error("failed root read -- %d bytes instead of %d.\n",
ret, sizeof (PACKED_ROOT));*/
return 0;
}
wp_bmap(&p_root.articles, 3); /* converts subject_cnt too */
if (p_root.articles < 0 || p_root.articles >= total.article) {
/*error("root has invalid values.\n");*/
return 0;
}
i = p_root.subject_cnt;
if (i <= 0 || (subj_ptr - subject_array) + i > total.subject) {
/*error("root has invalid values.\n");*/
return 0;
}
for (prev_sp = *subj_ptr; i--; prev_sp = sp, subj_ptr++) {
sp = *subj_ptr;
if (sp->thread_link == NULL) {
sp->thread_link = prev_sp->thread_link;
prev_sp->thread_link = sp;
}
else {
while (sp != prev_sp && sp->thread_link != *subj_ptr)
sp = sp->thread_link;
if (sp == prev_sp)
continue;
sp->thread_link = prev_sp->thread_link;
prev_sp->thread_link = *subj_ptr;
}
}
}
return 1;
}
static bool invalid_data;
/* A simple routine that checks the validity of the article's subject value.
** A -1 means that it is NULL, otherwise it should be an offset into the
** subject array we just unpacked.
*/
static SUBJECT*
the_subject(num)
int num;
{
if (num == -1)
return NULL;
if (num < 0 || num >= total.subject) {
/*printf("Invalid subject in thread file: %d [%ld]\n", num, art_num);*/
invalid_data = TRUE;
return NULL;
}
return subject_array[num];
}
/* Ditto for author checking. */
static char*
the_author(num)
int num;
{
if (num == -1)
return NULL;
if (num < 0 || num >= total.author) {
/*error("invalid author in thread file: %d [%ld]\n", num, art_num);*/
invalid_data = TRUE;
return NULL;
}
return savestr(author_array[num]);
}
/* Our parent/sibling information is a relative offset in the article array.
** zero for none. Child values are always found in the very next array
** element if child_cnt is non-zero.
*/
static ARTICLE*
the_article(relative_offset, num)
int relative_offset;
int num;
{
union { ARTICLE* ap; int num; } uni;
if (!relative_offset)
return NULL;
num += relative_offset;
if (num < 0 || num >= total.article) {
/*error("invalid article offset in thread file.\n");*/
invalid_data = TRUE;
return NULL;
}
uni.num = num+1;
return uni.ap; /* slip them an offset in disguise */
}
/* Read the articles into their trees. Point everything everywhere. */
static int
read_articles()
{
register int count;
register ARTICLE* article;
register ARTICLE** art_ptr;
int ret;
/* Build an array to interpret interlinkages of articles. */
article_array = (ARTICLE**)safemalloc(total.article * sizeof (ARTICLE*));
art_ptr = article_array;
invalid_data = FALSE;
for (count = 0; count < total.article; count++) {
#ifdef SUPPORT_XTHREAD
if (!datasrc->thread_dir)
ret = nntp_read((char*)&p_article, (long)sizeof (PACKED_ARTICLE));
else
#endif
ret = fread((char*)&p_article, 1, sizeof (PACKED_ARTICLE), fp);
if (ret != sizeof (PACKED_ARTICLE)) {
/*error("failed article read -- %d bytes instead of %d.\n",
ret, sizeof (PACKED_ARTICLE));*/
return 0;
}
lp_bmap(&p_article.num, 2);
wp_bmap(&p_article.subject, 8);
#ifdef SUPPORT_NNTP
article = *art_ptr++ = allocate_article(p_article.num > lastart?
0 : p_article.num);
#else
article = *art_ptr++ = allocate_article(p_article.num);
#endif
article->date = p_article.date;
#ifndef DBM_XREFS
if (olden_days < 2 && !(p_article.flags & HAS_XREFS))
article->xrefs = nullstr;
#endif
article->from = the_author(p_article.author);
article->parent = the_article(p_article.parent, count);
article->child1 = the_article((WORD)(p_article.child_cnt?1:0), count);
article->sibling = the_article(p_article.sibling, count);
article->subj = the_subject(p_article.subject);
if (invalid_data) {
/* (Error already logged.) */
return 0;
}
/* This is OK because parent articles precede their children */
if (article->parent) {
union { ARTICLE* ap; int num; } uni;
uni.ap = article->parent;
article->parent = article_array[uni.num-1];
}
else
article->sibling = NULL;
if (article->subj) {
article->flags |= AF_FROMTRUNCED | AF_THREADED
| ((p_article.flags & ROOT_ARTICLE)? 0 : AF_HAS_RE);
/* Give this subject to any faked parent articles */
while (article->parent && !article->parent->subj) {
article->parent->subj = article->subj;
article = article->parent;
}
} else
article->flags |= AF_FAKE;
}
/* We're done with most of the pointer arrays at this point. */
safefree0(subject_array);
safefree0(author_array);
safefree0(strings);
return 1;
}
/* Read the message-id strings and attach them to each article. The data
** format consists of the mushed-together null-terminated strings (a domain
** name followed by all its unique-id prefixes) and then the article offsets
** to which they belong. The first domain name was omitted, as it is a null
** domain for those truly weird message-id's without '@'s.
*/
static int
read_ids()
{
register ARTICLE* article;
register char* string_ptr;
register int i, count, len, len2;
if (!read_item(&strings, (MEM_SIZE)total.string2)
|| !read_item((char**)&ids,
(MEM_SIZE)(total.article+total.domain+1) * sizeof (WORD))) {
return 0;
}
wp_bmap(ids, total.article + total.domain + 1);
string_ptr = strings;
string_end = string_ptr + total.string2;
if (string_end[-1] != '\0') {
/*error("second string table is invalid.\n");*/
return 0;
}
for (i = 0, count = total.domain + 1; count--; i++) {
if (i) {
if (string_ptr >= string_end) {
/*error("error unpacking domain strings.\n");*/
return 0;
}
sprintf(buf, "@%s", string_ptr);
len = strlen(string_ptr) + 1;
string_ptr += len;
} else {
*buf = '\0';
len = 0;
}
if (ids[i] != -1) {
if (ids[i] < 0 || ids[i] >= total.article) {
/*error("error in id array.\n");*/
return 0;
}
article = article_array[ids[i]];
for (;;) {
if (string_ptr >= string_end) {
/*error("error unpacking domain strings.\n");*/
return 0;
}
len2 = strlen(string_ptr);
article->msgid = safemalloc(len2 + len + 2 + 1);
sprintf(article->msgid, "<%s%s>", string_ptr, buf);
string_ptr += len2 + 1;
if (msgid_hash) {
HASHDATUM data;
data = hashfetch(msgid_hash, article->msgid, len2+len+2);
if (data.dat_len) {
article->autofl = data.dat_len&(AUTO_SELS|AUTO_KILLS);
if ((data.dat_len & KF_AGE_MASK) == 0)
article->autofl |= AUTO_OLD;
else
kf_changethd_cnt++;
data.dat_len = 0;
free(data.dat_ptr);
}
data.dat_ptr = (char*)article;
hashstorelast(data);
}
if (++i >= total.article + total.domain + !count) {
/*error("overran id array unpacking domains.\n");*/
return 0;
}
if (ids[i] != -1) {
if (ids[i] < 0 || ids[i] >= total.article)
return 0;
article = article_array[ids[i]];
} else
break;
}
}
}
safefree0(ids);
safefree0(strings);
return 1;
}
/* And finally, turn all the links into real pointers and mark missing
** articles as read.
*/
static void
tweak_data()
{
register int count;
register ARTICLE* ap;
register ARTICLE** art_ptr;
union { ARTICLE* ap; int num; } uni;
int fl;
art_ptr = article_array;
for (count = total.article; count--; ) {
ap = *art_ptr++;
if (ap->child1) {
uni.ap = ap->child1;
ap->child1 = article_array[uni.num-1];
}
if (ap->sibling) {
uni.ap = ap->sibling;
ap->sibling = article_array[uni.num-1];
}
if (!ap->parent)
link_child(ap);
}
art_ptr = article_array;
for (count = total.article; count--; ) {
ap = *art_ptr++;
if (ap->subj && !(ap->flags & AF_FAKE))
cache_article(ap);
else
onemissing(ap);
}
art_ptr = article_array;
for (count = total.article; count--; ) {
ap = *art_ptr++;
if ((fl = ap->autofl) != 0)
perform_auto_flags(ap, fl, fl, fl);
}
safefree0(article_array);
}
/* A shorthand for reading a chunk of the file into a malloc'ed array.
*/
static int
read_item(dest, len)
char** dest;
MEM_SIZE len;
{
long ret;
*dest = safemalloc(len);
#ifdef SUPPORT_XTHREAD
if (!datasrc->thread_dir)
ret = nntp_read(*dest, (long)len);
else
#endif
ret = fread(*dest, 1, (int)len, fp);
if (ret != len) {
free(*dest);
*dest = NULL;
return 0;
}
putchar('.');
fflush(stdout);
return 1;
}
/* Determine this machine's byte map for WORDs and LONGs. A byte map is an
** array of BYTEs (sizeof (WORD) or sizeof (LONG) of them) with the 0th BYTE
** being the byte number of the high-order byte in my <type>, and so forth.
*/
static void
mybytemap(map)
BMAP* map;
{
union {
BYTE b[sizeof (LONG)];
WORD w;
LONG l;
} u;
register BYTE *mp;
register int i, j;
mp = &map->w[sizeof (WORD)];
u.w = 1;
for (i = sizeof (WORD); i > 0; i--) {
for (j = 0; j < sizeof (WORD); j++) {
if (u.b[j] != 0)
break;
}
if (j == sizeof (WORD))
goto bad_news;
*--mp = j;
while (u.b[j] != 0 && u.w)
u.w <<= 1;
}
mp = &map->l[sizeof (LONG)];
u.l = 1;
for (i = sizeof (LONG); i > 0; i--) {
for (j = 0; j < sizeof (LONG); j++) {
if (u.b[j] != 0)
break;
}
if (j == sizeof (LONG)) {
bad_news:
/* trouble -- set both to *something* consistent */
for (j = 0; j < sizeof (WORD); j++)
map->w[j] = j;
for (j = 0; j < sizeof (LONG); j++)
map->l[j] = j;
return;
}
*--mp = j;
while (u.b[j] != 0 && u.l)
u.l <<= 1;
}
}
/* Transform each WORD's byte-ordering in a buffer of the designated length.
*/
static void
wp_bmap(buf, len)
WORD* buf;
int len;
{
union {
BYTE b[sizeof (WORD)];
WORD w;
} in, out;
register int i;
if (word_same)
return;
while (len--) {
in.w = *buf;
for (i = 0; i < sizeof (WORD); i++)
out.b[my_bmap.w[i]] = in.b[mt_bmap.w[i]];
*buf++ = out.w;
}
}
/* Transform each LONG's byte-ordering in a buffer of the designated length.
*/
static void
lp_bmap(buf, len)
LONG* buf;
int len;
{
union {
BYTE b[sizeof (LONG)];
LONG l;
} in, out;
register int i;
if (long_same)
return;
while (len--) {
in.l = *buf;
for (i = 0; i < sizeof (LONG); i++)
out.b[my_bmap.l[i]] = in.b[mt_bmap.l[i]];
*buf++ = out.l;
}
}
syntax highlighted by Code2HTML, v. 0.9.1