/*
texpire -- expire old articles

Written by Arnt Gulbrandsen <agulbra@troll.no> and copyright 1995
Troll Tech AS, Postboks 6133 Etterstad, 0602 Oslo, Norway, fax +47
22646949.
Modified by Cornelius Krasel <krasel@wpxx02.toxi.uni-wuerzburg.de>
and Randolf Skerka <Randolf.Skerka@gmx.de>.
Copyright of the modifications 1997.
Modified by Kent Robotti <robotti@erols.com>. Copyright of the
modifications 1998.
Modified by Markus Enzenberger <enz@cip.physik.uni-muenchen.de>.
Copyright of the modifications 1998.
Modified by Cornelius Krasel <krasel@wpxx02.toxi.uni-wuerzburg.de>.
Copyright of the modifications 1998, 1999.
Modified by Kazushi (Jam) Marukawa <jam@pobox.com>.
Copyright of the modifications 1998, 1999.
Modified by Matthias Andree <matthias.andree@gmx.de>.
Copyright of the modifications 2000 - 2006.

See file COPYING for restrictions on the use of this software.
*/

#include "leafnode.h"
#include "ln_log.h"

#ifdef SOCKS
#include <socks.h>
#endif

#include <ctype.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include "system.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <syslog.h>
#include <unistd.h>
#include <errno.h>
#include <signal.h>
#include "mysigact.h"
#include "mastring.h"

static time_t default_expire;

int verbose = 0;
int debug = 0;
static int repair = 0;			/* run expensive checks */

static int use_atime = 1;		/* look for atime on articles to expire */
static int quiet = 0;			/* shut up */

static int eflag;			/* set to 1 if "mids" file based expiry must not take place */

static const char *const MIDSFILE = "mids";

struct exp {
    char *xover;    /* full xover info */
    int kill;
    int exists;
};

static sigjmp_buf jmpbuffer;
static int blocksig;

static RETSIGTYPE
sig_int(int signo)
{
    if (blocksig) return;
    if (signo == SIGINT || signo == SIGTERM) {
	siglongjmp(jmpbuffer, 1);
    }
}

/* hook for traverseidtree */
/* writes "mids" file for reliable expiry without counting hard links
 * to evade local hard link attack DoS */
static int
th(const char *mm) {
    const char *f;
    char *p, *t;
    int fd;
    ssize_t ml;
    char *m;
    struct stat st;
    /*@only@*/ static char *b;
    static size_t b_siz;

    if (mm == NULL)
    {
	b_siz = 0;
	free(b);
	return 0;
    }

    m = critstrdup(mm, "th");
    f = lookup(m);
    p = critmalloc(strlen(f) + 6, "th");
    strcpy(p, f);
    t = strrchr(p, '/');
    if (!t) {
	ln_log(LNLOG_SERR, LNLOG_CTOP, "can't find / - internal error");
	free(m);
	free(p);
	return 1;
    }
    strcpy(++t, "mids");

    fd = open(p, O_WRONLY|O_APPEND|O_CREAT, 0600);
    if (fd < 0) {
	ln_log(LNLOG_SERR, LNLOG_CTOP, "cannot append to file %s: %m", p);
	free(p);
	free(m);
	return 1;
    }
    if (fstat(fd, &st)) {
	ln_log(LNLOG_SERR, LNLOG_CTOP, "cannot fstat fd #%d: %m", fd);
	free(p);
	free(m);
	close(fd);
	return 1;
    }
    /* this file is not portable across endianness, why bother, we're
     * alone - the spool is locked */

    ml = strlen(m);

    /* resize buffer memory, generously */
    if (b_siz < ml + 1 + sizeof(ml)) {
	if (b) free(b);
	b_siz = ml + 128 + sizeof(ml);
	b = critmalloc(b_siz, "th");
    }

    /* make some effort to write the whole record (size + content)
     * atomically, to avoid corruption when we're interrupted */
    memcpy(b, &ml, sizeof(ml));
    for(t = m; *t; t++)
	if (*t == '/')
	    *t = '@';
    strcpy(b + sizeof(ml), m);
    if (write(fd, b, ml + sizeof(ml)) < (ssize_t)(ml + sizeof(ml))) {
	/* short write -> rollback: truncate file to old size */
	ftruncate(fd, st.st_size);
	goto barf;
    }
    if (close(fd) < 0) goto barf;
    free(m);
    free(p);
    return 0;
barf:
    ln_log(LNLOG_SERR, LNLOG_CTOP, "write error on file %s: %m", p);
    close(fd);
    free(m);
    free(p);
    return 1;
}

static void
dogroup(/*@null@*/ struct newsgroup *g, const char *name, int expdays)
{
    char *gdir = NULL;
    size_t s_gdir;
    char *p;
    char *q;
    DIR *d;
    struct dirent *de;
    struct stat st;
    unsigned long first, last, art, dupli = 0;
    struct exp *articles;
    int n;
    int fd;
    char *overview;		/* xover: read then free */

    int deleted, kept;

    deleted = kept = 0;
    clearidtree();

    /* eliminate empty groups */
    if (!chdirgroup(name, FALSE)) {
	if (g) { g->first = g->last + 1; }
	return;
    }
    if (!agetcwd(&gdir, &s_gdir)) {
	ln_log(LNLOG_SERR, LNLOG_CGROUP, "getcwd: %m");
	return;
    }

    /* find low-water and high-water marks */

    d = opendir(".");
    if (!d) {
	ln_log(LNLOG_SERR, LNLOG_CGROUP, "opendir in %s: %m", gdir);
	free(gdir);
	return;
    }

    first = ULONG_MAX;
    last = 0;
    while ((de = readdir(d)) != 0) {
	if (!isdigit((unsigned char)de->d_name[0]) ||
	    stat(de->d_name, &st) || !S_ISREG(st.st_mode))
	    continue;
	art = strtoul(de->d_name, &p, 10);
	if (p && !*p) {
	    if (art < first)
		first = art;
	    if (art > last)
		last = art;
	}
    }
    closedir(d);

    /* update overview info */
    getxover();
    freexover();

    if (last < first) {
	if (verbose > 1) printf("%s: empty group\n", name);
	if (g) g->first = g->last + 1;
	free(gdir);
	return;
    }

    if (verbose > 1)
	printf("%s: low water mark %lu, high water mark %lu\n",
	       name, first, last);
    if (debugmode)
	syslog(LOG_DEBUG,
	       "%s: expire %lu, low water mark %lu, high water mark %lu",
	       name, (unsigned long)expdays, first, last);

    /* allocate and clear article array */
    articles = (struct exp *)critmalloc((last - first + 1) * sizeof(struct exp),
					"Reading articles to expire");
    for (art = 0; art <= last - first; art++) {
	articles[art].xover = NULL;
	articles[art].kill = 0;
	articles[art].exists = 0;
    }

    /* read in overview info, to be purged and written back */
    overview = NULL;

    if (stat(".overview", &st) == 0) {
	overview = critmalloc(st.st_size + 1, "Reading article overview info");
	if ((fd = open(".overview", O_RDONLY)) < 0 ||
	    ((off_t) read(fd, overview, st.st_size) < st.st_size)) {
	    ln_log(LNLOG_SERR, LNLOG_CGROUP, "can't open/read %s/.overview: %m", gdir);
	    *overview = '\0';
	    if (fd > -1)
		close(fd);
	} else {
	    close(fd);
	    overview[st.st_size] = '\0';	/* 0-terminate string */
	}

	p = overview;
	while (p && *p) {
	    while (p && isspace((unsigned char)*p))
		p++;
	    art = strtoul(p, NULL, 10);
	    if (art >= first && art <= last && !articles[art - first].xover) {
		articles[art - first].xover = p;
		articles[art - first].kill = 1;
	    }
	    p = strchr(p, '\n');
	    if (p) {
		*p = '\0';
		if (p[-1] == '\r')
		    p[-1] = '\0';
		p++;
	    }
	}
    }

    /* check the syntax of the .overview info, and delete all illegal stuff */
    for (art = first; art <= last; art++) {
	const char *x;

	if (articles[art - first].xover &&
	    !legalxoverline(articles[art - first].xover, &x)) {
	    articles[art - first].xover = NULL;
	}
    }

    /* insert articles in tree, and clear 'kill' for new or read articles */
    d = opendir(".");
    if (!d) {
	ln_log(LNLOG_SERR, LNLOG_CGROUP, "opendir in %s: %m", gdir);
	free(gdir);
	free(articles);
	return;
    }
    while ((de = readdir(d)) != 0) {
	art = strtoul(de->d_name, &p, 10);
	if (p && !*p && art <= last && art >= first) {
	    articles[art - first].exists = 1;
	    /* mark all articles as to-be-deleted and rescue those
	     * which fulfill certain criteria */
	    articles[art - first].kill = 1;
	    /* save file if it is a regular non-empty file
	     * and has no expire time */
	    if (stat(de->d_name, &st) == 0 &&
		(S_ISREG(st.st_mode)) &&
		(st.st_size != 0) &&
		(expdays < 0
		 || (st.st_mtime > expdays)
		 || (use_atime && (st.st_atime > expdays)))) {
		articles[art - first].kill = 0;
		p = articles[art - first].xover;
		for (n = 0; n < 4; n++)
		    if (p && (p = strchr(p + 1, '\t')))
			p++;
		q = p ? strchr(p, '\t') : NULL;
		if (p && q) {
		    *q = '\0';
		    if (findmsgid(p)) {	/* another file with same msgid? */
			/* kill this article and keep the first to have
			 * that message-id */
			articles[art - first].kill = 1;
			ln_log(LNLOG_SINFO, LNLOG_CARTICLE,
				"%s: removing duplicate article %lu %s",
				name, art, p);
			dupli++;
		    } else {
			int relink = 0;
			const char *t = lookup(p);

			insertmsgid(p);

			if (repair == 0) {
			    /* fast path */
			    if (st.st_nlink < 2) {
				relink = 1;
			    }
			} else {
			    /* slow path */
			    struct stat st2;
			    if (stat(t, &st2)
				    || st2.st_dev != st.st_dev
				    || st2.st_ino != st.st_ino) {
				relink = 1;
			    }
			}

			if (relink) {	/* repair fs damage */
			    if (link(de->d_name, t)
				/* if EEXIST, link reverse
				 * rename first because it is atomic and
				 * guarantees the file de->d_name is
				 * always present. This file is precious.
				 * If we used unlink and link, a lone
				 * message.id/000 file would be deleted
				 * by expiremsgid()!
				 */
				&& (errno != EEXIST
					|| rename(t, de->d_name)
					|| link(de->d_name, t)))
			    {
				    ln_log(LNLOG_SERR, LNLOG_CGROUP,
					   "%s: relink of %s <-> %s failed: %s (%s)",
					   name, p, de->d_name, strerror(errno), t);
			    } else {
				ln_log(LNLOG_SINFO, LNLOG_CARTICLE,
					"%s: relinked message %s <-> %s", name, p, de->d_name);
			    }
			}
			*q = '\t';
		    }
		} else if (articles[art - first].xover) {
		    /* data structure inconsistency: delete and be rid of it */
		    articles[art - first].kill = 1;
		} else {
		    /* possibly read the xover line into memory? */
		}
	    }
	}
    }
    closedir(d);

    /* compute new low-water mark */

    art = first;
    while (art <= last && articles[art - first].kill)
	art++;
    if (g) g->first = art;

    /* remove old postings */

    for (art = first; art <= last; art++) {
	char artname[40]; /* must hold a decimal long + NUL */ /* RATS: ignore */
	if (articles[art - first].exists) {
	    if (articles[art - first].kill) {
		snprintf(artname, sizeof(artname), "%lu", art);
		if (0 == unlink(artname)) {
		    if (debugmode)
			syslog(LOG_DEBUG, "deleted article %s/%lu", gdir, art);
		    deleted++;
		} else if (errno != ENOENT && errno != EEXIST) {
		    /* if file was deleted alredy or it was not a file */
		    /* but a directory, skip error message */
		    kept++;
		    ln_log(LNLOG_SERR, LNLOG_CGROUP, "unlink %s/%lu: %m", gdir, art);
		} else {
		    /* deleted by someone else */
		}
	    } else {
		kept++;
	    }
	}
    }
    free((char *)articles);
    if (overview)
	free(overview);

    if (g && last > g->last)		/* try to correct insane newsgroup info */
	g->last = last;

    if (!quiet)
	printf("%s: %d article%s deleted (%lu duplicate%s), %d kept\n",
		name, deleted, PLURAL(deleted), dupli, PLURAL(dupli), kept);
    syslog(LOG_INFO,
	    "%s: %d article%s deleted (%lu duplicate%s), %d kept",
	    name, deleted, PLURAL(deleted), dupli, PLURAL(dupli), kept);

    if (!kept) {
	if (unlink(".overview") < 0)
	    ln_log(LNLOG_SERR, LNLOG_CGROUP, "unlink %s/.overview: %m", gdir);
	if (!chdir("..") && (isinteresting(name) == 0)) {
	    /* delete directory and empty parent directories */
	    while (rmdir(gdir) == 0) {
		if (!agetcwd(&gdir, &s_gdir)) {
		    ln_log(LNLOG_SERR, LNLOG_CGROUP, "getcwd: %m");
		    break;
		}
		chdir("..");
	    }
	}
    }
    if (gdir)
	free(gdir); /* previous loop may have freed *gdir */

    /* write MIDSFILE */
    if (!eflag)
	eflag |= traverseidtree(th);

    clearidtree();
}

static void
expiregroup(void)
{
    struct newsgroup *g;
    struct stringlist *t, *l = get_grouplist();
    int expdays;

    if (!l) {
	ln_log(LNLOG_SERR, LNLOG_CTOP, "cannot obtain group list\n");
	return;
    }

    for(t = l; t; t = t -> next) {
	char *x = t->string;

	g = findgroup(x);
	if ((expdays = lookup_expiredays(x)) >= 0) {
	    if (expdays == 0 || !(expdays = lookup_expire(x)))
		expdays = default_expire;
	} else {
	    expdays = -1;
	    if (verbose) {
		printf("%s: never expires\n", x);
	    }
	    syslog(LOG_INFO, "%s: never expires", x);
	}
	dogroup(g, x, expdays);
    }
    freelist(l);
}

static void
fixupgroup(/*@null@*/ struct newsgroup *g)
{
    for (/*nil*/ ; g && g->name; g++) {
	if (!chdirgroup(g->name, FALSE))
	    g->first = g->last + 1;
    }
}

static int
readmids(void)
{
    int fd;
    ssize_t l;
    ssize_t r;
    char *buf;
    ssize_t bufsiz = 128;
    int rc = 0;

    fd = open(MIDSFILE, O_RDONLY);
    if (fd < 0) {
	if (errno != ENOENT) {
	    ln_log(LNLOG_SERR, LNLOG_CTOP, "cannot open \"%s\" file: %m",
		    MIDSFILE);
	    return 1;
	}
	return 0;
    }

    /* delete file early so we don't barf again and again if the file is
     * corrupt */
    log_unlink(MIDSFILE, 0);

    buf = critmalloc(bufsiz, "readmids");

    while((r = read(fd, &l, sizeof(l))) == (ssize_t)sizeof(l)) {
	/* length obtained */
	if (l+1 > bufsiz) {
	    free(buf);
	    bufsiz = l + 1;
	    buf = critmalloc(bufsiz, "readmids");
	}
	if ((r = read(fd, buf, l)) < l) {
	    /* short read */
	    rc = -1;
	    break;
	}
	buf[l] = '\0';
	/* sanity check */
	if (strlen(buf) != (size_t)l) {
	    rc = -1;
	    break;
	}
	insertmsgid(buf);
    }
    free(buf);
    (void)close(fd);
    if (rc)
	ln_log(LNLOG_SERR, LNLOG_CTOP, "corrupt \"%s\" file", MIDSFILE);
    if (r < 0) {
	ln_log(LNLOG_SERR, LNLOG_CTOP, "cannot read \"%s\" file: %m", MIDSFILE);
	rc = -1;
    }
    return rc;
}

/* returns 0 for success */
static int
cleanmids(void)
{
    int n, rc = 0;
    mastr *s = mastr_new(256);

    for (n = 0; n < 1000; n++) {
	char buf[4];
	snprintf(buf, sizeof(buf), "%03d", n); /* safe */
	mastr_clear(s);
	mastr_vcat(s, spooldir, "/message.id/", buf, "/", MIDSFILE, NULL);
	if (log_unlink(mastr_str(s), 1))
	    rc = 1;
    }
    mastr_delete(s);
    return rc;
}

static void
expiremsgid(void)
{
    int n, s_len;
    DIR *d;
    struct dirent *de;
    struct stat st;
    int deleted, kept;
    const char *t;
    int nomids = eflag;

    deleted = kept = 0;

    if (verbose)
	puts("Expiring message.id...");

    for (n = 0; n < 1000; n++) {
	char s[SIZE_s+1];

	s_len = xsnprintf(s, SIZE_s, "%s/message.id/%03d/", spooldir, n);
	if (chdir(s)) {
	    if (errno == ENOENT)
		mkdir(s, 0755);	/* file system damage? */
	    if (chdir(s)) {
		ln_log(LNLOG_SERR, LNLOG_CGROUP, "chdir %s: %m", s);
		continue;
	    }
	}

	if (nomids == 0)
	    nomids |= readmids();
	else
	    unlink(MIDSFILE); /* ignore errors */

	d = opendir(".");
	if (!d)
	    continue;
	while ((de = readdir(d)) != 0) {
	    if (stat(de->d_name, &st) == 0 && S_ISREG(st.st_mode)) {
		int ul = 0;
		char *reason = "";
		if (st.st_nlink < 2) ul = 1, reason = "link count below 1";
		if (!nomids && !findmsgid(de->d_name)) ul = 1, reason = "not seen in group scan";
		if (ul) {
		    if (debugmode)
			ln_log(LNLOG_SDEBUG, LNLOG_CARTICLE, "unlinking %03d/%s, %s",
				n, de->d_name, reason);
		    if (0 == log_unlink(de->d_name, 1)
			    && de->d_name[0] == '<' /* only count MID files */)
			deleted++;
		} else {
		    kept++;
		    /* check hash */
		    t = lookup(de->d_name);
		    if (strncmp(t, s, s_len)) {
			/* in wrong directory, move to the right one
			 * note however that if the right file is
			 * already present, we'll leave it in place,
			 * because it may have been relinked from a
			 * group directory and we don't want to break
			 * links again
			 */
			if (link(de->d_name, t) && errno != EEXIST)
			    ln_log(LNLOG_SERR, LNLOG_CARTICLE,
				    "rehash: cannot move %s%s to %s: %m",
				    s, de->d_name, t);
			else {
			    char buf[4];
			    memcpy(buf, t + s_len - 4, 3);
			    buf[3] = '\0';

			    ln_log(LNLOG_SINFO, LNLOG_CARTICLE,
				    "rehashed %s from %03d to %s", de->d_name,
				    n, buf);
			}
			log_unlink(de->d_name, 0);
		    }
		}
	    }
	}
	closedir(d);
	clearidtree();
    }

    if (verbose)
	puts("Done.");

    if (!quiet)
	printf("message.id/: %d article%s deleted, %d kept\n", deleted, PLURAL(deleted), kept);
    syslog(LOG_INFO, "message.id/: %d article%s deleted, %d kept", deleted, PLURAL(deleted), kept);
}


int
main(int argc, char **argv)
{
    int option;
    int rc = 1;

    myopenlog("texpire");
    if (!initvars(argv[0]))
	exit(1);

    while ((option = getopt(argc, argv, "vfqhr")) != -1) {
	switch(option) {
	    case 'v':
		verbose++;
		quiet = 0;
		break;
	    case 'f':
		use_atime = 0;
		break;
	    case 'r':
		repair = 1;
		break;
	    case 'q':
		quiet = 1;
		verbose = 0;
		break;
	    case 'h':
		rc = 0;
		/*FALLTHROUGH*/
	    default:
		if (rc)
		    fprintf(stderr, "texpire: unknown option -%c.\n", optopt);
		fprintf(stderr, "Usage: texpire {[-v[v[v[v]]]]|-q} [-f]\n"
			"  -q: be quiet (cancels -v)\n"
			"  -v: more verbose (cancels -q, may be repeated)\n"
			"  -f: force expire irrespective of access time\n");
		exit(rc);
	}
    }

    expire = 0;
    expire_base = NULL;

    if (!readconfig(0)) {
	fprintf(stderr, "Reading configuration failed, exiting "
	       "(see syslog for more information).\n");
	exit(2);
    }
    freeservers();

    if (verbose || debugmode) {
	printf("texpire %s: verbosity level %d, debugmode %d, %s\n", version,
		verbose, debugmode,
		use_atime ? "check mtime and atime" : "check mtime only");
    }
    syslog(LOG_INFO, "texpire %s: use_atime is %d, verbosity level %d, "
	    "debugmode %d", version, use_atime, verbose, debugmode);

    if (try_lock(timeout_lock)) {
	ln_log(LNLOG_SERR, LNLOG_CTOP, "Cannot obtain lock file, aborting.\n");
	exit(1);
    }

    if (cleanmids()) {
	ln_log(LNLOG_SERR, LNLOG_CTOP, "Cannot weed out MIDS files, aborting.\n");
	unlink(lockfile);
	exit(1);
    }

    readactive();
    if (!active) {
	ln_log(LNLOG_SWARNING, LNLOG_CTOP, "Reading active file failed. Trying to build my own.");
	fakeactive();
    }

    if (expire == 0) {
	fprintf(stderr, "%s: no expire time\n", argv[0]);
	unlink(lockfile);
	exit(2);
    }

    default_expire = expire;

    if (sigsetjmp(jmpbuffer, 1) == 0) {
	/* if we can't catch either signal, don't care,
	 * it's just more work next time */
	(void)mysigact(SIGINT, 0, sig_int, 0);
	(void)mysigact(SIGTERM, 0, sig_int, 0);
	expiregroup();
	fixupgroup(active);
	expiremsgid();
    } else {
	blocksig = 1;
	ln_log(LNLOG_SNOTICE, LNLOG_CTOP,
		"caught interrupt/termination signal, aborting gracefully.");
    }
    if (writeactive())
	ln_log(LNLOG_SERR, LNLOG_CTOP, "error writing groupinfo.");
    freeactive(active);
    unlink(lockfile);
    freeservers();
    freexover();
    freeconfig();
    th(NULL);
    return 0;
}


syntax highlighted by Code2HTML, v. 0.9.1