/*
* getindate - parse the common Internet date case (rfc 822 & 1123) *fast*
*/
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>
#include <sys/timeb.h>
#include "dateconv.h"
#include "datetok.h"
/* STREQ is an optimised strcmp(a,b)==0 */
#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
#define PACK_TWO_CHARS(c1, c2) (((c1)<<8)|(c2))
#define ISSPACE(c) ((c) == ' ' || (c) == '\n' || (c) == '\t')
#define SKIPTOSPC(s) \
while ((ch = *(s)++), (!ISSPACE(ch) && ch != '\0')) \
; \
(s)-- /* N.B.: no semi-colon */
#define SKIPSPC(s) \
while ((ch = *(s)++), ISSPACE(ch)) \
; \
(s)-- /* N.B.: no semi-colon */
#define SKIPOVER(s) \
SKIPTOSPC(s); \
SKIPSPC(s) /* N.B.: no semi-colon */
/* this is fast but dirty. note the return's in the middle. */
#define GOBBLE_NUM(cp, c, x, ip) \
(c) = *(cp)++; \
if ((c) < '0' || (c) > '9') \
return -1; /* missing digit */ \
(x) = (c) - '0'; \
(c) = *(cp)++; \
if ((c) >= '0' && (c) <= '9') { \
(x) = 10*(x) + (c) - '0'; \
(c) = *(cp)++; \
} \
if ((c) != ':' && (c) != '\0' && !ISSPACE(c)) \
return -1; /* missing colon */ \
*(ip) = (x) /* N.B.: no semi-colon here */
/*
* If the date is in the form
* [Weekday,] dd Mmm [19]yy hh:mm[:ss] Timezone
* as most dates in news articles are, then we can parse it much quicker than
* getdate and quite a bit faster than getabsdate.
*
* parse and convert Internet date in timestr (the normal interface)
*/
/* ARGSUSED */
time_t
getindate(line, now)
register char *line; /* can be modified */
struct timeb *now; /* unused; for getdate compatibility */
{
int tz = 0;
struct tm date;
return prsindate(line, &date, &tz) < 0? -1: dateconv(&date, tz);
}
/*
* just parse the Internet date in timestr and get back a broken-out date.
*/
int
prsindate(line, tm, tzp)
register char *line; /* can be modified */
register struct tm *tm;
int *tzp;
{
register int c;
register char ch; /* used by SKIPTOSPC */
register char *cp;
register char c2;
tm->tm_isdst = 0;
SKIPSPC(line);
if ((ch = *line) < '0' || ch > '9') {
cp = line;
while ((ch = *cp++), (!ISSPACE(ch) && ch != ',' && ch != '\0'))
;
cp--;
if (ch == ',') {
line = cp;
SKIPOVER(line); /* skip weekday */
} else
return -1; /* missing comma after weekday */
}
GOBBLE_NUM(line, ch, c, &tm->tm_mday);
/*
* we have to map to canonical case because RFC 822 requires
* case independence, so we pay a performance penalty for the sake
* of 0.1% of dates actually seen in Date: headers in news.
* Way to go, IETF.
*/
ch = *line++;
if (ch == '\0')
return -1; /* no month */
if (isascii(ch) && islower(ch))
ch = toupper(ch);
c2 = *line++;
if (c2 == '\0')
return -1; /* month too short */
if (isascii(c2) && isupper(c2))
c2 = tolower(c2);
switch (PACK_TWO_CHARS(ch, c2)) {
case PACK_TWO_CHARS('J', 'a'):
tm->tm_mon = 1;
break;
case PACK_TWO_CHARS('F', 'e'):
tm->tm_mon = 2;
break;
case PACK_TWO_CHARS('M', 'a'): /* March, May */
tm->tm_mon = ((ch = *line) == 'r' || ch == 'R'? 3: 5);
break;
case PACK_TWO_CHARS('A', 'p'):
tm->tm_mon = 4;
break;
case PACK_TWO_CHARS('J', 'u'):
tm->tm_mon = 6;
if ((ch = *line) == 'l' || ch == 'L')
tm->tm_mon++; /* July */
break;
case PACK_TWO_CHARS('A', 'u'):
tm->tm_mon = 8;
break;
case PACK_TWO_CHARS('S', 'e'):
tm->tm_mon = 9;
break;
case PACK_TWO_CHARS('O', 'c'):
tm->tm_mon = 10;
break;
case PACK_TWO_CHARS('N', 'o'):
tm->tm_mon = 11;
break;
case PACK_TWO_CHARS('D', 'e'):
tm->tm_mon = 12;
break;
default:
return -1; /* bad month name */
}
tm->tm_mon--; /* convert month to zero-origin */
SKIPOVER(line); /* skip month */
tm->tm_year = atoi(line);
if (tm->tm_year < 0 || !isdigit(*line))
return -1; /* year is non-positive or missing */
if (tm->tm_year < 70) /* year is 2000 origin? */
tm->tm_year += 100; /* make 1900 origin y2k */
else if (tm->tm_year >= 1900) /* convert year to 1900 origin, */
tm->tm_year -= 1900; /* but 2-digit years need no work */
SKIPOVER(line); /* skip year */
if (parsetime(line, tm) < 0)
return -1;
SKIPOVER(line); /* skip time */
cp = line;
if (*cp++ == 'G' && *cp++ == 'M' && *cp++ == 'T' &&
(*cp == '\n' || *cp == '\0'))
*tzp = 0;
else { /* weirdo time zone */
register datetkn *tp;
cp = line; /* time zone start */
SKIPTOSPC(line);
c = *line; /* save old delimiter */
*line = '\0'; /* terminate time zone */
tp = datetoktype(cp, (int *)NULL);
switch (tp->type) {
case DTZ:
#if 0
tm->tm_isdst++;
#endif
/* FALLTHROUGH */
case TZ:
*tzp = FROMVAL(tp);
/* FALLTHROUGH */
case IGNORE:
break;
default:
return -1; /* bad token type */
}
*line = c; /* restore old delimiter */
SKIPSPC(line);
if (*line != '\0') { /* garbage after the date? */
if (*line != '(') /* not even an 822 comment? */
return -1;
/*
* a full 822 parse of the comment would
* be ridiculously complicated, so nested
* comments and quotes are not honoured.
* just look for a closing paren; it's only
* a time zone name.
*/
while ((c = *++line) != ')' && c != '\0')
;
if (c == ')')
++line;
else
return -1; /* comment not terminated */
SKIPSPC(line);
if (*line != '\0') /* trash left? */
return -1;
}
}
return 0;
}
/* return -1 on failure */
int
parsetime(time, tm)
register char *time;
register struct tm *tm;
{
register char c;
register int x;
tm->tm_sec = 0;
GOBBLE_NUM(time, c, x, &tm->tm_hour);
if (c != ':')
return -1; /* only hour; too short */
GOBBLE_NUM(time, c, x, &tm->tm_min);
if (c != ':')
return 0; /* no seconds; okay */
GOBBLE_NUM(time, c, x, &tm->tm_sec);
/* this may be considered too strict. garbage at end of time? */
return (c == '\0' || ISSPACE(c)? 0: -1);
}
syntax highlighted by Code2HTML, v. 0.9.1