ports//news/cnews/work/libc/getindate.c

/*
 * getindate - parse the common Internet date case (rfc 822 & 1123) *fast*
 */

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>
#include <sys/timeb.h>
#include "dateconv.h"
#include "datetok.h"

/* STREQ is an optimised strcmp(a,b)==0 */
#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)

#define	PACK_TWO_CHARS(c1, c2)	(((c1)<<8)|(c2))
#define ISSPACE(c) ((c) == ' ' || (c) == '\n' || (c) == '\t')
#define SKIPTOSPC(s) \
	while ((ch = *(s)++), (!ISSPACE(ch) && ch != '\0')) \
		; \
	(s)--			/* N.B.: no semi-colon */
#define SKIPSPC(s) \
	while ((ch = *(s)++), ISSPACE(ch)) \
		; \
	(s)--			/* N.B.: no semi-colon */
#define SKIPOVER(s) \
	SKIPTOSPC(s); \
	SKIPSPC(s)		/* N.B.: no semi-colon */

/* this is fast but dirty.  note the return's in the middle. */
#define GOBBLE_NUM(cp, c, x, ip) \
	(c) = *(cp)++; \
	if ((c) < '0' || (c) > '9') \
		return -1;		/* missing digit */ \
	(x) = (c) - '0'; \
	(c) = *(cp)++; \
	if ((c) >= '0' && (c) <= '9') { \
		(x) = 10*(x) + (c) - '0'; \
		(c) = *(cp)++; \
	} \
	if ((c) != ':' && (c) != '\0' && !ISSPACE(c)) \
		return -1;		/* missing colon */ \
	*(ip) = (x)			/* N.B.: no semi-colon here */

/*
 * If the date is in the form
 *	[Weekday,] dd Mmm [19]yy hh:mm[:ss] Timezone
 * as most dates in news articles are, then we can parse it much quicker than
 * getdate and quite a bit faster than getabsdate.
 *
 * parse and convert Internet date in timestr (the normal interface)
 */
/* ARGSUSED */
time_t
getindate(line, now)
register char *line;			/* can be modified */
struct timeb *now;			/* unused; for getdate compatibility */
{
	int tz = 0;
	struct tm date;

	return prsindate(line, &date, &tz) < 0? -1: dateconv(&date, tz);
}

/*
 * just parse the Internet date in timestr and get back a broken-out date.
 */
int
prsindate(line, tm, tzp)
register char *line;			/* can be modified */
register struct tm *tm;
int *tzp;
{
	register int c;
	register char ch;		/* used by SKIPTOSPC */
	register char *cp;
	register char c2;

	tm->tm_isdst = 0;
	SKIPSPC(line);
	if ((ch = *line) < '0' || ch > '9') {
		cp = line;
		while ((ch = *cp++), (!ISSPACE(ch) && ch != ',' && ch != '\0'))
			;
		cp--;
		if (ch == ',') {
			line = cp;
			SKIPOVER(line);		/* skip weekday */
		} else
			return -1;		/* missing comma after weekday */
	}

	GOBBLE_NUM(line, ch, c, &tm->tm_mday);

	/*
	 * we have to map to canonical case because RFC 822 requires
	 * case independence, so we pay a performance penalty for the sake
	 * of 0.1% of dates actually seen in Date: headers in news.
	 * Way to go, IETF.
	 */
	ch = *line++;
	if (ch == '\0')
		return -1;		/* no month */
	if (isascii(ch) && islower(ch))
		ch = toupper(ch);
	c2 = *line++;
	if (c2 == '\0')
		return -1;		/* month too short */
	if (isascii(c2) && isupper(c2))
		c2 = tolower(c2);
	switch (PACK_TWO_CHARS(ch, c2)) {
	case PACK_TWO_CHARS('J', 'a'):
		tm->tm_mon = 1;
		break;
	case PACK_TWO_CHARS('F', 'e'):
		tm->tm_mon = 2;
		break;
	case PACK_TWO_CHARS('M', 'a'):	/* March, May */
		tm->tm_mon = ((ch = *line) == 'r' || ch == 'R'? 3: 5);
		break;
	case PACK_TWO_CHARS('A', 'p'):
		tm->tm_mon = 4;
		break;
	case PACK_TWO_CHARS('J', 'u'):
		tm->tm_mon = 6;
		if ((ch = *line) == 'l' || ch == 'L')
			tm->tm_mon++;		/* July */
		break;
	case PACK_TWO_CHARS('A', 'u'):
		tm->tm_mon = 8;
		break;
	case PACK_TWO_CHARS('S', 'e'):
		tm->tm_mon = 9;
		break;
	case PACK_TWO_CHARS('O', 'c'):
		tm->tm_mon = 10;
		break;
	case PACK_TWO_CHARS('N', 'o'):
		tm->tm_mon = 11;
		break;
	case PACK_TWO_CHARS('D', 'e'):
		tm->tm_mon = 12;
		break;
	default:
		return -1;		/* bad month name */
	}
	tm->tm_mon--;			/* convert month to zero-origin */
	SKIPOVER(line);			/* skip month */

	tm->tm_year = atoi(line);
	if (tm->tm_year < 0 || !isdigit(*line))
		return -1;		/* year is non-positive or missing */

	if (tm->tm_year < 70)           /* year is 2000 origin? */
		tm->tm_year += 100;     /* make 1900 origin y2k */
	else if (tm->tm_year >= 1900)   /* convert year to 1900 origin, */
		tm->tm_year -= 1900;	/* but 2-digit years need no work */
	SKIPOVER(line);			/* skip year */

	if (parsetime(line, tm) < 0)
		return -1;
	SKIPOVER(line);			/* skip time */

	cp = line;
	if (*cp++ == 'G' && *cp++ == 'M' && *cp++ == 'T' &&
	    (*cp == '\n' || *cp == '\0'))
		*tzp = 0;
	else {				/* weirdo time zone */
		register datetkn *tp;

		cp = line;		/* time zone start */
		SKIPTOSPC(line);
		c = *line;		/* save old delimiter */
		*line = '\0';		/* terminate time zone */

		tp = datetoktype(cp, (int *)NULL);
		switch (tp->type) {
		case DTZ:
#if 0
			tm->tm_isdst++;
#endif
			/* FALLTHROUGH */
		case TZ:
			*tzp = FROMVAL(tp);
			/* FALLTHROUGH */
		case IGNORE:
			break;
		default:
			return -1;	/* bad token type */
		}

		*line = c;		/* restore old delimiter */
		SKIPSPC(line);
		if (*line != '\0') {	/* garbage after the date? */
			if (*line != '(')	/* not even an 822 comment? */
				return -1;
			/*
			 * a full 822 parse of the comment would
			 * be ridiculously complicated, so nested
			 * comments and quotes are not honoured.
			 * just look for a closing paren; it's only
			 * a time zone name.
			 */
			while ((c = *++line) != ')' && c != '\0')
				;
			if (c == ')')
				++line;
			else
				return -1;	/* comment not terminated */
			SKIPSPC(line);
			if (*line != '\0')	/* trash left? */
				return -1;
		}
	}
	return 0;
}

/* return -1 on failure */
int
parsetime(time, tm)
register char *time;
register struct tm *tm;
{
	register char c;
	register int x;

	tm->tm_sec = 0;
	GOBBLE_NUM(time, c, x, &tm->tm_hour);
	if (c != ':')
		return -1;		/* only hour; too short */
	GOBBLE_NUM(time, c, x, &tm->tm_min);
	if (c != ':')
		return 0;		/* no seconds; okay */
	GOBBLE_NUM(time, c, x, &tm->tm_sec);
	/* this may be considered too strict.  garbage at end of time? */
	return (c == '\0' || ISSPACE(c)? 0: -1);
}
syntax highlighted by Code2HTML, v. 0.9.1