/*
* datetok - date tokenisation
*/
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <sys/types.h> /* for dateconv.h */
#include "dateconv.h"
#include "datetok.h"
/* imports */
int dtok_numparsed;
/*
* this table is guaranteed to contain errors; alphabetic time zones
* are poorly-defined, ambiguous and a stupid idea (e.g. given a
* typical name, matching [A-Z][DS]T, the last letter is constant and
* the second letter conveys a whopping 1 bit of information, so all
* the information has to be crammed into the first letter).
* Death to alphabetic time zones!
*
* to keep this table reasonably small, we compact the lexval for TZ and DTZ
* entries and truncate the text field at MAXTOKLEN characters.
* the text field is not guaranteed to be NUL-terminated.
* ST = Standard Time; DT = Daylight Time.
*/
datetkn dateabstoks[] = {
/* text token lexval */
"acsst", DTZ, PACK(630), /* Cent. Australia */
"acst", TZ, PACK(570), /* Cent. Australia */
"adt", DTZ, PACK(-180), /* Atlantic DT */
"aesst", DTZ, PACK(660), /* E. Australia */
"aest", TZ, PACK(600), /* Australia Eastern ST */
"akdt", DTZ, PACK(-480), /* Alaska DT */
"akst", TZ, PACK(-540), /* Alaska ST */
"am", AMPM, AM,
"apr", MONTH, 4,
"april", MONTH, 4,
"ast", TZ, PACK(-240), /* Atlantic ST (Canada) */
"at", IGNORE, 0, /* "at" (throwaway) */
"aug", MONTH, 8,
"august", MONTH, 8,
"awst", TZ, PACK(480), /* W. Australia */
"bst", DTZ, PACK(60), /* British Summer Time */
"cadt", DTZ, PACK(630), /* Central Australian DT */
"cast", TZ, PACK(570), /* Central Australian ST */
"cct", TZ, PACK(480), /* China Coast */
"cdt", DTZ, PACK(-300), /* Central DT */
"cest", DTZ, PACK(120), /* Central Europe Summer Time */
"cet", TZ, PACK(60), /* Central European Time */
"cetdst", DTZ, PACK(120), /* Central European DT */
"cst", TZ, PACK(-360), /* Central ST */
"dec", MONTH, 12,
"decemb", MONTH, 12,
"dnt", TZ, PACK(60), /* Dansk Normal Tid */
/*XX*/ "dst", IGNORE, 0,
"eadt", DTZ, PACK(660), /* East Australian DT */
"east", TZ, PACK(600), /* East Australian ST */
"edt", DTZ, PACK(-240), /* Eastern DT */
"eest", DTZ, PACK(180), /* Eastern Europe Summer */
"eet", TZ, PACK(120), /* Eastern Europe */
"eetdst", DTZ, PACK(180), /* Eastern Europe */
"est", TZ, PACK(-300), /* Eastern ST */
"feb", MONTH, 2,
"februa", MONTH, 2,
"fri", IGNORE, 5,
"friday", IGNORE, 5,
"fst", DTZ, PACK(120), /* French Summer Time */
"fwt", TZ, PACK(60), /* French Winter Time */
"gmt", TZ, PACK(0), /* Greenwich Mean Time */
"gst", TZ, PACK(600), /* Guam ST */
"hadt", DTZ, PACK(-540), /* Hawaii-Aleutian DT */
"hast", TZ, PACK(-600), /* Hawaii-Aleutian ST */
"hkt", TZ, PACK(480), /* Hong Kong Time */
"hst", TZ, PACK(-600), /* Hawaii ST */
"idle", TZ, PACK(720), /* Intl. Date Line, East */
"idlw", TZ, PACK(-720), /* Intl. Date Line, West */
"idt", DTZ, PACK(180), /* Israel DT */
"ist", TZ, PACK(120), /* Israel */
"jan", MONTH, 1,
"januar", MONTH, 1,
"jst", TZ, PACK(540), /* Japan ST */
"jul", MONTH, 7,
"july", MONTH, 7,
"jun", MONTH, 6,
"june", MONTH, 6,
"kdt", DTZ, PACK(600), /* Korea DT */
"kst", TZ, PACK(540), /* Korea ST */
/*XX*/ "ligt", TZ, PACK(600), /* From Melbourne, Australia */
"mar", MONTH, 3,
"march", MONTH, 3,
"may", MONTH, 5,
"mdt", DTZ, PACK(-360), /* Mountain DT */
"mest", DTZ, PACK(120), /* Middle Europe Summer Time */
"mesz", DTZ, PACK(120), /* Mittel-Europaeische Sommerzeit */
"met", TZ, PACK(60), /* Middle Europe Time */
"metdst", DTZ, PACK(120), /* Middle Europe DT */
"mewt", TZ, PACK(60), /* Middle Europe Winter Time */
"mez", TZ, PACK(60), /* Mittel-Europaeische Zeit */
"mon", IGNORE, 1,
"monday", IGNORE, 1,
"mst", TZ, PACK(-420), /* Mountain ST */
"ndt", DTZ, PACK(-150), /* Newfoundland DT */
/*XXN*/ "nft", TZ, PACK(-210), /* Newfoundland ST */
/*XX*/ "nor", TZ, PACK(60), /* Norway ST */
"nov", MONTH, 11,
"novemb", MONTH, 11,
"nst", TZ, PACK(-210), /* Newfoundland ST */
"nzdt", DTZ, PACK(780), /* New Zealand DT */
"nzst", TZ, PACK(720), /* New Zealand ST */
"nzt", TZ, PACK(720), /* New Zealand Time */
"oct", MONTH, 10,
"octobe", MONTH, 10,
"on", IGNORE, 0, /* "on" (throwaway) */
"pdt", DTZ, PACK(-420), /* Pacific DT */
"pm", AMPM, PM,
"pst", TZ, PACK(-480), /* Pacific ST */
"sadt", DTZ, PACK(630), /* S. Australian DT */
"sast", TZ, PACK(570), /* S. Australian ST */
"sat", IGNORE, 6,
"saturd", IGNORE, 6,
"sep", MONTH, 9,
"sept", MONTH, 9,
"septem", MONTH, 9,
"sst", DTZ, PACK(120), /* Swedish Summer Time */
"sun", IGNORE, 0,
"sunday", IGNORE, 0,
"swt", TZ, PACK(60), /* Swedish Winter Time */
"thu", IGNORE, 4,
"thur", IGNORE, 4,
"thurs", IGNORE, 4,
"thursd", IGNORE, 4,
"tue", IGNORE, 2,
"tues", IGNORE, 2,
"tuesda", IGNORE, 2,
"ut", TZ, PACK(0),
"utc", TZ, PACK(0),
"wast", TZ, PACK(480), /* West Australian ST */
"wat", TZ, PACK(-60), /* West Africa Time */
"wed", IGNORE, 3,
"wednes", IGNORE, 3,
"weds", IGNORE, 3,
"west", DTZ, PACK(60), /* Western Europe Summer */
"wet", TZ, PACK(0), /* Western Europe */
"wetdst", DTZ, PACK(60), /* Western Europe */
"wst", TZ, PACK(480), /* West Australian ST */
"ydt", DTZ, PACK(-480), /* Yukon DT */
"yst", TZ, PACK(-540), /* Yukon ST */
};
#if 0
/*
* these time zones are orphans, i.e. the name is also used by a more
* likely-to-appear time zone
*/
"adt", DTZ, PACK(0), /* Azores DT */
"adt", DTZ, PACK(-240), /* Acre DT */
"ast", TZ, PACK(-60), /* Azores ST */
"ast", TZ, PACK(-300), /* Acre ST */
"bst", TZ, PACK(-180), /* Brazil ST */
"cdt", DTZ, PACK(-180), /* Chile DT */
"cdt", DTZ, PACK(-240), /* Cuba DT */
"cdt", DTZ, PACK(540), /* China DT */
"cst", TZ, PACK(-240), /* Chile ST */
"cst", TZ, PACK(-300), /* Cuba ST */
"cst", TZ, PACK(480), /* China ST */
"edt", DTZ, PACK(-300), /* Easter Island DT */
"edt", DTZ, PACK(-120), /* East Brazil DT */
"edt", DTZ, PACK(660), /* Australian Eastern DT */
"est", TZ, PACK(-360), /* Easter Island ST */
"est", TZ, PACK(-180), /* East Brazil ST */
"est", TZ, PACK(600), /* Australian Eastern ST */
"fdt", DTZ, PACK(-60), /* Fernando de Noronha DT */
"fst", TZ, PACK(-120), /* Fernando de Noronha ST */
"ist", TZ, PACK(330), /* Indian ST */
"sst", TZ, PACK(-660), /* Samoa ST */
"sst", TZ, PACK(480), /* Singapore ST */
"wdt", DTZ, PACK(-180), /* Western Brazil DT */
"wet", TZ, PACK(60), /* Western European Time */
"wst", TZ, PACK(-240), /* Western Brazil ST */
/* military timezones are deprecated by RFC 1123 section 5.2.14 */
"a", TZ, PACK(60), /* UTC+1h */
"b", TZ, PACK(120), /* UTC+2h */
"c", TZ, PACK(180), /* UTC+3h */
"d", TZ, PACK(240), /* UTC+4h */
"e", TZ, PACK(300), /* UTC+5h */
"f", TZ, PACK(360), /* UTC+6h */
"g", TZ, PACK(420), /* UTC+7h */
"h", TZ, PACK(480), /* UTC+8h */
"i", TZ, PACK(540), /* UTC+9h */
"k", TZ, PACK(600), /* UTC+10h */
"l", TZ, PACK(660), /* UTC+11h */
"m", TZ, PACK(720), /* UTC+12h */
"n", TZ, PACK(-60), /* UTC-1h */
"o", TZ, PACK(-120), /* UTC-2h */
"p", TZ, PACK(-180), /* UTC-3h */
"q", TZ, PACK(-240), /* UTC-4h */
"r", TZ, PACK(-300), /* UTC-5h */
"s", TZ, PACK(-360), /* UTC-6h */
"t", TZ, PACK(-420), /* UTC-7h */
"u", TZ, PACK(-480), /* UTC-8h */
"v", TZ, PACK(-540), /* UTC-9h */
"w", TZ, PACK(-600), /* UTC-10h */
"x", TZ, PACK(-660), /* UTC-11h */
"y", TZ, PACK(-720), /* UTC-12h */
"z", TZ, PACK(0), /* UTC */
#endif
static unsigned szdateabstoks = sizeof dateabstoks / sizeof dateabstoks[0];
datetkn *
datetoktype(s, bigvalp)
char *s;
int *bigvalp;
{
register char *cp = s;
register char c = *cp;
static datetkn t;
register datetkn *tp = &t;
if (isascii(c) && isdigit(c)) {
register int len = strlen(cp);
if (len > 3 && (cp[1] == ':' || cp[2] == ':'))
tp->type = TIME;
else {
if (bigvalp != NULL)
/* won't fit in tp->value */
*bigvalp = atoi(cp);
if (len == 4)
tp->type = YEAR;
else if (++dtok_numparsed == 1)
tp->type = DAY;
else
tp->type = YEAR;
}
} else if (c == '-' || c == '+') {
register int val = atoi(cp + 1);
register int hr = val / 100;
register int min = val % 100;
val = hr*60 + min;
if (c == '-')
val = -val;
tp->type = TZ;
TOVAL(tp, val);
} else {
char lowtoken[TOKMAXLEN+1];
register char *ltp = lowtoken, *endltp = lowtoken+TOKMAXLEN;
/* copy to lowtoken to avoid modifying s */
while ((c = *cp++) != '\0' && ltp < endltp)
*ltp++ = (isascii(c) && isupper(c)? tolower(c): c);
*ltp = '\0';
tp = datebsearch(lowtoken, dateabstoks, szdateabstoks);
if (tp == NULL) {
tp = &t;
tp->type = IGNORE;
}
}
return tp;
}
/*
* Binary search -- from Knuth (6.2.1) Algorithm B. Special case like this
* is WAY faster than the generic bsearch().
*/
datetkn *
datebsearch(key, base, nel)
register char *key;
register datetkn *base;
unsigned int nel;
{
register datetkn *last = base + nel - 1, *position;
register int result;
while (last >= base) {
position = base + ((last - base) >> 1);
result = key[0] - position->token[0];
if (result == 0) {
result = strncmp(key, position->token, TOKMAXLEN);
if (result == 0)
return position;
}
if (result < 0)
last = position - 1;
else
base = position + 1;
}
return 0;
}
syntax highlighted by Code2HTML, v. 0.9.1