/* * FISG - Logfile parser * Programmed and designed by Matti 'ccr' Hamalainen * (C) Copyright 2003-2004 Tecnic Software productions (TNSP) * * Please read file 'COPYING' for information on license and distribution. */ #include #include #include #include #include "fisg.h" #include "th_util.h" #include "th_config.h" #include "th_string.h" #include "in_formats.h" /* * Parsing functions */ int fisg_parse_int(char *inLine, size_t iLen, size_t *linePos) { int iResult = 0; while (th_isdigit(inLine[*linePos]) && (iLen--)) { iResult *= 10; iResult += (inLine[(*linePos)++] - '0'); } return iResult; } t_user_entry *fisg_parse_user(t_stats *pStats, char *newNick) { t_user_entry *tmpUser; t_str_node *tmpNick; /* Check if nick matches existing user record */ tmpNick = nickhash_search(pStats->nickList, newNick); if (tmpNick) { /* Yes, increase number of uses */ tmpNick->nUsed++; return tmpNick->pData; } else { /* No, we need to create a new one */ tmpUser = user_new(newNick); tmpNick = th_strnode_new(newNick, 1, tmpUser); /* Insert into nicklist */ if (nickhash_insert(pStats->nickList, tmpNick) != 0) { /* Failed, due to hash */ THERR("nickhash_insert() failed, hash: '%s'\n", newNick); user_free(tmpUser); th_strnode_free(tmpNick); return NULL; } /* Insert into userlist */ user_insert(pStats->usersList, tmpUser); return tmpUser; } } int fisg_parse_generic(char *inLine, char *fmt, t_lineinfo *lineInfo, t_stats *pStats) { size_t linePos, i; BOOL isOK, isEnd, tmpNick1S = FALSE, tmpNick2S = FALSE; t_user_entry *tmpUser; char tmpStr[SET_MAX_NICKLEN + 1] = "", tmpNick1[SET_MAX_NICKLEN + 1], tmpNick2[SET_MAX_NICKLEN + 1], tmpDest, c; if (!fmt) return -1; /* Initialize */ linePos = 0; tmpUser = NULL; isOK = TRUE; /* Parse the line via format-string */ while (*fmt && isOK) { if (*fmt == '%') { switch (*(++fmt)) { /* Generic matching */ case '?': /* Match anything */ fmt++; if (inLine[linePos]) linePos++; else isOK = FALSE; break; case '*': /* Match anything until next char */ fmt++; while (inLine[linePos] && (inLine[linePos] != *fmt)) linePos++; break; case '@': /* Match irssi style optional '@|+| ' */ fmt++; if (!inLine[linePos]) isOK = FALSE; if ((inLine[linePos] == '@') || (inLine[linePos] == '+') || th_isspace(inLine[linePos])) linePos++; break; /* Timestamps */ case 'H': lineInfo->ts.iHours = fisg_parse_int(inLine, 2, &linePos); fmt++; break; case 'M': lineInfo->ts.iMinutes = fisg_parse_int(inLine, 2, &linePos); fmt++; break; case 'S': lineInfo->ts.iSeconds = fisg_parse_int(inLine, 2, &linePos); fmt++; break; case 'Y': lineInfo->ts.iYear = fisg_parse_int(inLine, 4, &linePos); fmt++; break; case 'y': /* 2-digit year */ i = fisg_parse_int(inLine, 2, &linePos); if (i < 70) i += 2000; else i += 1900; lineInfo->ts.iYear = i; fmt++; break; case 'd': lineInfo->ts.iDay = fisg_parse_int(inLine, 2, &linePos); fmt++; break; case 'j': lineInfo->ts.iMonth = fisg_parse_int(inLine, 2, &linePos); fmt++; break; /* Special matches */ case 'n': case 'N': /* Nick */ tmpDest = *fmt; fmt++; /* Find the start of the nick */ th_findnext(inLine, &linePos); /* Get the nick to temp buffer */ i = 0; isEnd = FALSE; c = inLine[linePos]; if (!th_isalpha(c) && !th_isspecial(c)) isOK = FALSE; while (isOK && !isEnd) { c = inLine[linePos]; if (!c || (c == *fmt) || th_isspace(c) || (i >= SET_MAX_NICKLEN)) isEnd = TRUE; else { if (th_isalpha(c) || th_isdigit(c) || th_isspecial(c) || (c == '-')) tmpStr[i++] = inLine[linePos++]; else isOK = FALSE; } } tmpStr[i++] = 0; while (inLine[linePos] && th_isspace(inLine[linePos]) && (inLine[linePos] != *fmt)) linePos++; if (inLine[linePos] != *fmt) isOK = FALSE; /* Find user or add new */ if (isOK && (i > 0)) switch (tmpDest) { case 'n': tmpNick1S = TRUE; strcpy(tmpNick1, tmpStr); break; case 'N': tmpNick2S = TRUE; strcpy(tmpNick2, tmpStr); break; } break; case 'm': /* Mode */ fmt++; while (inLine[linePos] && (inLine[linePos] != *fmt)) linePos++; break; case 'c': /* Channel */ fmt++; while (inLine[linePos] && (inLine[linePos] != *fmt)) linePos++; break; case 't': /* Text */ fmt++; i = 0; while (inLine[linePos] && (inLine[linePos] != *fmt) && (i < SET_MAX_BUF)) lineInfo->pText[i++] = inLine[linePos++]; lineInfo->pText[i++] = 0; break; /* Error */ default: THERR("Syntax error in format-string '%s'\n", fmt); return -1; } } else { /* Check matches */ if (*fmt != inLine[linePos]) isOK = FALSE; fmt++; linePos++; } } /* while(*fmt) */ if (isOK) { if (tmpNick1S) lineInfo->pUser = fisg_parse_user(pStats, tmpNick1); if (tmpNick2S) lineInfo->pUser2 = fisg_parse_user(pStats, tmpNick2); } return !isOK; } void fisg_parse_url(char *inLine, t_user_entry *pUser, t_stats *pStats, t_fisgconfig *pCfg) { int linePos; char c, urlStr[SET_MAX_BUF + 1]; t_str_node *tmpS; assert(pUser); /* Get the text of the URL */ linePos = 0; while (*inLine && (linePos < SET_MAX_BUF) && (th_isalnum((c = *inLine)) || (c == '.') || (c == ',') || (c == '/') || (c == '-') || (c == '~') || (c == '?') || (c == '&') || (c == '%') || (c == '_') || (c == '=') || (c == ';') || (c == ':') || (c == '^') || (c == '[') || (c == ']') || (c == '-') || (c == '(') || (c == ')') || (c == '#') )) urlStr[linePos++] = *(inLine++); urlStr[linePos] = 0; /* Delete non-relevant last character */ if (th_isspace(*inLine)) { linePos--; while ((linePos > 0) && ((urlStr[linePos] == ',')|| (urlStr[linePos] == '.'))) urlStr[linePos--] = 0; } /* Add the URL in list */ tmpS = th_strhash_search(pStats->urlList, urlStr, FALSE); if (tmpS) { /* Increase number of references */ tmpS->nUsed++; } else { /* Add a new string */ tmpS = th_strnode_new(urlStr, 1, NULL); th_strhash_insert(pStats->urlList, tmpS, FALSE); } /* Increase number of URLs said by user */ pUser->nURLs++; } t_user_entry *fisg_parse_public(char *infLine, char *fmt, t_stats *pStats, t_fisgconfig *pCfg) { t_lineinfo lineInfo; t_uint nWords, nQuestions, nYelling; char *tmpStr; int linePos; BOOL isWord; if (!fmt) return NULL; /* Try to parse the line */ if (fisg_parse_generic(infLine, fmt, &lineInfo, pStats)) return NULL; /* If the text is empty, we don't need to analyze it */ if (!lineInfo.pText[0]) return lineInfo.pUser; /* Detect HTTP-URLs */ tmpStr = strstr(lineInfo.pText, "http://"); if (tmpStr) { tmpStr += strlen("http://"); fisg_parse_url(tmpStr, lineInfo.pUser, pStats, pCfg); } /* Statisticize the actual public message-line */ linePos = 0; isWord = FALSE; nQuestions = nYelling = nWords = 0; while (lineInfo.pText[linePos]) { if (isWord && th_isspace(lineInfo.pText[linePos])) { nWords++; isWord = FALSE; } else if ((!isWord) && !th_isspace(lineInfo.pText[linePos])) { isWord = TRUE; switch (lineInfo.pText[linePos]) { case '=': case ':': case ';': switch (lineInfo.pText[linePos + 1]) { case ')': /* :) */ case 'D': /* :D */ case 'P': /* :P */ case '>': /* :> */ case ']': /* :] */ lineInfo.pUser->fHappiness++; break; case '(': /* :( */ case '[': /* :[ */ case '/': /* :/ */ case 'I': /* :I */ lineInfo.pUser->fHappiness--; break; } break; case '(': case '<': switch (lineInfo.pText[linePos + 1]) { case ':': case ';': lineInfo.pUser->fHappiness++; break; case '3': lineInfo.pUser->nLove++; break; } break; case ')': case '>': switch (lineInfo.pText[linePos + 1]) { case ':': case ';': lineInfo.pUser->fHappiness--; break; } break; } } if (th_isupper(lineInfo.pText[linePos])) lineInfo.pUser->nCaps++; switch (lineInfo.pText[linePos]) { case '!': nYelling++; break; case '?': nQuestions++; break; } lineInfo.pUser->nChars++; linePos++; } /* Add to user's stats */ if (nYelling) lineInfo.pUser->nYelling++; if (nQuestions) lineInfo.pUser->nQuestions++; lineInfo.pUser->nWords += nWords; lineInfo.pUser->nPublics++; if ((lineInfo.ts.iHours >= 0) && (lineInfo.ts.iHours < SET_HOURS_DAY)) { lineInfo.pUser->nWordsPerHour[lineInfo.ts.iHours] += nWords; lineInfo.pUser->nPublicsPerHour[lineInfo.ts.iHours]++; if (lineInfo.pUser->nWords >= (lineInfo.pUser->nWordsPerHour[lineInfo.ts.iHours] / (lineInfo.pUser->nPublicsPerHour[lineInfo.ts.iHours]+1))) { if ((!lineInfo.pUser->sComment) || (random() < (RAND_MAX / 3))) if ((strlen(lineInfo.pText) >= pCfg->commentMinLength) && (strlen(lineInfo.pText) <= pCfg->commentMaxLength)) { if (pCfg->stripCtrlChars) th_strip_ctrlchars(lineInfo.pText); th_strcpy(&lineInfo.pUser->sComment, lineInfo.pText); } } } /* Done, ok. */ return lineInfo.pUser; } int fisg_parse_nickchange(char *infLine, char *fmt, t_stats *pStats, t_fisgconfig *pCfg) { t_lineinfo lineInfo; int i; if (!fmt) return -1; /* Try to parse the line */ if (fisg_parse_generic(infLine, fmt, &lineInfo, pStats)) return -1; /* Let's see if we can autofollow the nick-changes */ if (pCfg->autoFollowNicks && (lineInfo.pUser != lineInfo.pUser2)) { NDMSG("['%s' -> '%s'] -- ", lineInfo.pUser->userHandle, lineInfo.pUser2->userHandle); if (lineInfo.pUser->isManaged && !lineInfo.pUser2->isManaged) { NDPRINT("'%s' is alias to '%s'\n", lineInfo.pUser2->userHandle, lineInfo.pUser->userHandle); th_strhash_change_pdata(pStats->nickList, lineInfo.pUser2, lineInfo.pUser); user_delete(pStats->usersList, lineInfo.pUser2); user_free(lineInfo.pUser2); lineInfo.pUser->nNickChanges++; } else if (!lineInfo.pUser->isManaged && lineInfo.pUser2->isManaged) { NDPRINT("'%s' is alias to '%s'\n", lineInfo.pUser->userHandle, lineInfo.pUser2->userHandle); th_strhash_change_pdata(pStats->nickList, lineInfo.pUser, lineInfo.pUser2); user_delete(pStats->usersList, lineInfo.pUser); user_free(lineInfo.pUser); lineInfo.pUser2->nNickChanges++; } else if (pCfg->autoHeuristics) { /* * Let's try to determine the "real" user with simple heuristics */ NDPRINT("guessing... %i - ", pCfg->autoHeuristics); i = 0; if (strlen(lineInfo.pUser->userHandle) < strlen(lineInfo.pUser2->userHandle)) i--; else i++; if (th_strmatch(lineInfo.pUser2->userHandle, lineInfo.pUser->userHandle)) i--; if (th_strmatch(lineInfo.pUser->userHandle, lineInfo.pUser2->userHandle)) i++; if (th_strmatch(lineInfo.pUser2->userHandle, "*^*") || th_strmatch(lineInfo.pUser2->userHandle, "*_*")) i -= 2; if (th_strmatch(lineInfo.pUser->userHandle, "*^*") || th_strmatch(lineInfo.pUser->userHandle, "*_*")) i += 2; if (i <= 0) { NDPRINT("'%s' is alias to '%s'\n", lineInfo.pUser2->userHandle, lineInfo.pUser->userHandle); th_strhash_change_pdata(pStats->nickList, lineInfo.pUser2, lineInfo.pUser); user_delete(pStats->usersList, lineInfo.pUser2); user_free(lineInfo.pUser2); lineInfo.pUser->nNickChanges++; lineInfo.pUser->isManaged = TRUE; } else { NDPRINT("'%s' is alias to '%s'\n", lineInfo.pUser->userHandle, lineInfo.pUser2->userHandle); th_strhash_change_pdata(pStats->nickList, lineInfo.pUser, lineInfo.pUser2); user_delete(pStats->usersList, lineInfo.pUser); user_free(lineInfo.pUser); lineInfo.pUser2->nNickChanges++; lineInfo.pUser2->isManaged = TRUE; } } else NDPRINT("fail.\n"); } else { /* Update the stats */ lineInfo.pUser->nNickChanges++; lineInfo.pUser2->nNickChanges++; } /* Done, ok. */ return 0; } t_user_entry *fisg_parse_misc(char *infLine, char *fmt, t_stats *pStats, t_fisgconfig *pCfg) { t_lineinfo lineInfo; if (!fmt) return NULL; /* Try to parse the line */ if (fisg_parse_generic(infLine, fmt, &lineInfo, pStats)) return NULL; /* Done, ok. */ return lineInfo.pUser; } int fisg_parse_kick(char *infLine, char *fmt, t_stats *pStats, t_fisgconfig *pCfg) { t_lineinfo lineInfo; if (!fmt) return -1; /* Try to parse the line */ if (fisg_parse_generic(infLine, fmt, &lineInfo, pStats)) return -1; /* Add to user's stats */ lineInfo.pUser->nGotKicked++; lineInfo.pUser2->nKicks++; /* Done, ok. */ return 0; } int fisg_parse_topicchange(char *infLine, char *fmt, t_stats *pStats, t_fisgconfig *pCfg) { t_lineinfo lineInfo; t_str_node *tmpS; if (!fmt) return -1; /* Try to parse the line */ if (fisg_parse_generic(infLine, fmt, &lineInfo, pStats)) return -2; /* Add to user's stats and topic list */ lineInfo.pUser->nTopics++; tmpS = th_strnode_new(lineInfo.pText, 1, (void *) lineInfo.pUser); th_strlist_insert(&pStats->topicList, tmpS); /* Done, ok. */ return 0; } /* * A generic logfile parser */ int fisg_parse_log(FILE *inFile, t_stats *pStats, t_logformat *logFmt, t_fisgconfig *pCfg) { char inLine[SET_MAX_BUF + 1]; size_t lineNum, linePos; t_user_entry *tmpUser; /* Initial stats */ pStats->nLogFiles++; /* Read and parse the data */ lineNum = 0; while (fgets(inLine, SET_MAX_BUF, inFile) != NULL) { linePos = 0; while (inLine[linePos] && !th_iscrlf(inLine[linePos])) linePos++; inLine[linePos] = 0; pStats->nChars += linePos; pStats->nLines++; lineNum++; linePos = 0; /* Check if the line is OK and what type it is */ if (inLine[0]) { if (!fisg_parse_public(inLine, logFmt->fmtPublic, pStats, pCfg)) if (!fisg_parse_public(inLine, logFmt->fmtNotice, pStats, pCfg)) { if ((tmpUser = fisg_parse_public(inLine, logFmt->fmtAction, pStats, pCfg))) tmpUser->nActions++; else if ((tmpUser = fisg_parse_public(inLine, logFmt->fmtNotice, pStats, pCfg))) tmpUser->nNotices++; else if ((tmpUser = fisg_parse_misc(inLine, logFmt->fmtJoin, pStats, pCfg))) tmpUser->nJoins++; else if (fisg_parse_topicchange(inLine, logFmt->fmtTopicChange, pStats, pCfg)) if (fisg_parse_kick(inLine, logFmt->fmtKick, pStats, pCfg)) if (fisg_parse_nickchange(inLine, logFmt->fmtNickChange, pStats, pCfg)) { } } } } /* while */ return 0; }