/* * FISG - Fast IRC Statistic Generator * Programmed and designed by Matti 'ccr' Hamalainen * (C) Copyright 2003-2004 Tecnic Software productions (TNSP) * * Please read file 'COPYING' for information on license and distribution. */ #include #include #include #include #include #include #include #include "fisg.h" #include "parser.h" #include "th_util.h" #include "th_args.h" #include "th_config.h" #include "in_formats.h" #include "out_formats.h" /* * Misc globals */ int setOutputFormat = 0; int setCurrInputFormat = -1; t_uint nsourceFileNames = 0, nconfigFileNames = 0; int sourceFileFormats[SET_MAX_INFILES]; char *sourceFileNames[SET_MAX_INFILES], *destFileName = NULL, *userFilename = NULL, *dumpUserFileName = NULL; char *configFileNames[SET_MAX_INFILES]; char *progName = NULL; /* * Options and help */ t_opt optList[] = { { 0, '?', "help", "Show this help and exit", 0 }, { 1, 'V', "version", "Show version and exit", 0 }, { 2, 'o', "output", "Specify output file (default: stdout)", 1 }, { 3, 'u', "user-file", "Users file (default: users.cfg)", 1 }, { 4, 'f', "format", "Specify input format (no default)", 1 }, { 5, 'O', "output-format","Specify output format", 1 }, { 6, 'L', "list-formats", "Show list of predefined log- and output-formats", 0 }, { 7, 'c', "config", "Specify configuration file (no default)", 1 }, { 8, 'q', "quiet", "Use multiple times for more silence", 0 }, { 9, 'd', "dump-userfile","Dump internal user information to given file", 1 }, }; const int optListN = (sizeof(optList) / sizeof(t_opt)); /*#define NICKDEBUG*/ void NDMSG(const char *pcFormat, ...) { #ifdef NICKDEBUG va_list ap; va_start(ap, pcFormat); fprintf(stderr, "ND: "); vfprintf(stderr, pcFormat, ap); va_end(ap); #endif } void NDPRINT(const char *pcFormat, ...) { #ifdef NICKDEBUG va_list ap; va_start(ap, pcFormat); vfprintf(stderr, pcFormat, ap); va_end(ap); #endif } void showHelp() { th_showHelp(stdout, optList, optListN, progName, "[options] [source#1] [source#2...]"); } void handleOpt(const int optN, char *optArg, char *currArg) { int i; BOOL isFound; switch (optN) { case 0: showHelp(); exit(0); break; case 1: printf("%s v%s %s\n", th_prog_name, th_prog_version, th_prog_author); exit(0); break; case 2: /* Specify output filename */ if (optArg) destFileName = optArg; else { THERR("No output filename specified!\n"); exit(2); } break; case 3: /* Specify user-file filename */ if (optArg) userFilename = optArg; else { THERR("No userfile filename specified!\n"); exit(2); } break; case 4: /* Specify input format */ if (optArg) { /* Go through the list */ isFound = FALSE; i = 0; while ((i < nInputFormats) && (!isFound)) { if (strcmp(optArg, inputFormats[i].ifName) == 0) isFound = TRUE; else i++; } /* Check if found from predefined formats */ if (!isFound) { THERR("Invalid input (log-file) format '%s'\n", optArg); exit(2); } setCurrInputFormat = i; } else { THERR("No input (logfile) format specified!\n"); exit(2); } break; case 5: /* Specify output format */ if (optArg) { /* Go through the list */ isFound = FALSE; i = 0; while ((i < nOutputFormats) && (!isFound)) { if (strcmp(optArg, outputFormats[i].ofName) == 0) isFound = TRUE; else i++; } /* Check */ if (!isFound) { THERR("Invalid output format '%s'\n", optArg); exit(2); } setOutputFormat = i; } else { THERR("No output format specified!\n"); exit(2); } break; case 6: /* Show list of input and output formats */ THERR("Available pre-defined INPUT (log) formats:\n"); for (i = 0; i < nInputFormats; i++) { fprintf(stderr, " %-8s - %s %s\n", inputFormats[i].ifName, inputFormats[i].ifDescription, (i == 0) ? "(default)" : ""); } fprintf(stderr, "\n"); THERR("Available OUTPUT formats:\n"); for (i = 0; i < nOutputFormats; i++) { fprintf(stderr, " %-8s - %s %s\n", outputFormats[i].ofName, outputFormats[i].ofDescription, (i == setOutputFormat) ? "(default)" : ""); } fprintf(stderr, "\n"); exit(0); break; case 7: /* Specify configuration filename */ if (optArg) { if (nconfigFileNames < SET_MAX_INFILES) { configFileNames[nconfigFileNames] = optArg; nconfigFileNames++; } } else { THERR("No configuration filename specified!\n"); exit(2); } break; case 8: /* Quiet -- lessen verbosity */ th_verbosityLevel--; break; case 9: /* Specify dump-userfile filename */ if (optArg) { dumpUserFileName = optArg; } else { THERR("No user-dump filename specified!\n"); exit(2); } break; default: /* Error */ THERR("Unknown argument '%s'.\n", currArg); break; } } void handleFile(char *currArg) { /* Check if current input format has been set */ if (setCurrInputFormat < 0) { THERR("You need to specify an input log-format! (-f )\n"); exit(3); } /* Add a filename */ if (nsourceFileNames < SET_MAX_INFILES) { sourceFileNames[nsourceFileNames] = currArg; sourceFileFormats[nsourceFileNames] = setCurrInputFormat; nsourceFileNames++; } else { THERR("Maximum number of input files (%i) exceeded!\n", SET_MAX_INFILES); exit(3); } } /* * Allocates and initializes a new stats structure */ t_stats *fisg_stats_new(void) { t_stats *pStats; /* Allocate memory for new node */ pStats = (t_stats *) calloc(1, sizeof(t_stats)); if (pStats == NULL) return NULL; /* Initialize fields */ pStats->usersList = user_data_new(); pStats->usersIgnored = user_data_new(); pStats->usersRemoved = user_data_new(); /* Return result */ return pStats; } /* * Deallocates stats structure */ void fisg_stats_free(t_stats *pStats) { assert(pStats); /* Free user data */ user_data_free(pStats->usersList); user_data_free(pStats->usersIgnored); user_data_free(pStats->usersRemoved); /* Free hashlists */ th_strhash_free(pStats->nickList); th_strhash_free(pStats->urlList); th_strlist_free(pStats->topicList); /* Free indexes */ th_strindex_free(pStats->urlIndex); th_strindex_free(pStats->topicIndex); /* Free stats structure */ free(pStats); } /* * Cleaning pass: * - move ignored users to "ignored list" * - move users who have stats below specified limits to "removed list" */ void fisg_stats_clean(t_stats *pStats) { t_user_data *pData; t_user_entry *pCurr, *pNext; assert(pStats); /* Go through main linked list of users, moving nodes * to other respective lists according to wanted rules */ pData = user_data_new(); if (!pData) { THERR("Could not allocate temporary t_user_data structure!\n"); return; } pCurr = pStats->usersList->pList; while (pCurr) { pNext = pCurr->pNext; if (pCurr->isIgnored) { /* Move ignored user to ignored list */ user_insert(pStats->usersIgnored, pCurr); } else if ((pCurr->nChars == 0) && (pCurr->nPublics == 0)) { /* Remove user who does not pass qualifications */ user_insert(pStats->usersRemoved, pCurr); } else { /* Move to new list */ user_insert(pData, pCurr); } pCurr = pNext; } /* Move the new list to current list */ pStats->usersList->pList = pData->pList; /* We can't use user_data_free() here because that would delete all nodes */ free(pData); } /* * Compute stats from userlist */ int fisg_stats_pass1(t_stats *pStats, t_fisgconfig *pCfg) { t_user_entry *pCurr; int i; t_float iTotalActivity, iMaxActivity; assert(pStats); /* Go through the userlist */ pCurr = pStats->usersList->pList; while (pCurr) { /* Activity */ iTotalActivity = 0; for (i = 0; i < SET_HOURS_DAY; i++) { if (pCfg->usePisgScoring) pCurr->fActivityPerHour[i] = pCurr->nPublicsPerHour[i]; else pCurr->fActivityPerHour[i] = ((t_float) pCurr->nWordsPerHour[i]) * ((t_float) pCurr->nPublicsPerHour[i]); iTotalActivity += pCurr->fActivityPerHour[i]; pStats->fActivityPerHour[i] += pCurr->fActivityPerHour[i]; } /* Compute activity-% for each hour */ for (i = 0; i < SET_HOURS_DAY; i++) { if (iTotalActivity > 0) { pCurr->fActivityPerHour[i] = (pCurr->fActivityPerHour[i] / iTotalActivity) * 100.0f; } else pCurr->fActivityPerHour[i] = 0.0f; } /* Compute W/P and C/W */ if (pCurr->nPublics > 0) pCurr->fWordsPerPublic = ((t_float) pCurr->nWords / (t_float) pCurr->nPublics); else pCurr->fWordsPerPublic = 0; if (pCurr->nWords > 0) pCurr->fCharsPerWord = ((t_float) pCurr->nChars / (t_float) pCurr->nWords); else pCurr->fCharsPerWord = 0; /* Compute total score */ if (pCfg->usePisgScoring) pCurr->fTotalScore = pCurr->nPublics; else pCurr->fTotalScore = (pCurr->fWordsPerPublic + pCurr->fCharsPerWord) * pCurr->nPublics; /* Next node */ pCurr = pCurr->pNext; } /* Compute total activity percent */ pStats->activityPeak = -1; iMaxActivity = -1; iTotalActivity = 0.0f; for (i = 0; i < SET_HOURS_DAY; i++) { iTotalActivity += pStats->fActivityPerHour[i]; if (pStats->fActivityPerHour[i] > iMaxActivity) { iMaxActivity = pStats->fActivityPerHour[i]; pStats->activityPeak = i; } } if (iTotalActivity > 0) { for (i = 0; i < SET_HOURS_DAY; i++) { pStats->fActivityPerHour[i] = (pStats->fActivityPerHour[i] / iTotalActivity) * 100.0f; } } return 0; } int fisg_stats_pass2(t_stats *pStats, t_fisgconfig *pCfg) { t_user_entry *pCurr; t_ulint statMax, nUser; assert(pStats); /* Get configuration options */ if (pCfg->statOnlyListed) { statMax = pCfg->showTopUserMax; if (statMax > pStats->usersList->n) statMax = pStats->usersList->n; } else statMax = pStats->usersList->n; /* Initialize */ pStats->mostStupid = pStats->mostLoud = pStats->mostActions = pStats->mostModes = pStats->mostKicks = pStats->mostKicked = pStats->mostCaps = pStats->mostHappy = pStats->mostSad = pStats->mostURLs = pStats->mostJoins = pStats->mostTopics = pStats->usersList->ppIndex[0]; /* Go through the userlist */ for (nUser = 0; nUser < statMax; nUser++) { pCurr = pStats->usersList->ppIndex[nUser]; /* More stupid */ if (pCurr->nQuestions >= pStats->mostStupid->nQuestions) pStats->mostStupid = pCurr; /* More loud? */ if (pCurr->nYelling >= pStats->mostLoud->nYelling) pStats->mostLoud = pCurr; /* More actions? */ if (pCurr->nActions >= pStats->mostActions->nActions) pStats->mostActions = pCurr; /* More kicks? */ if (pCurr->nKicks >= pStats->mostKicks->nKicks) pStats->mostKicks = pCurr; /* More kicked? */ if (pCurr->nGotKicked >= pStats->mostKicked->nGotKicked) pStats->mostKicked = pCurr; /* More caps per chars? */ if (pCurr->nCaps > 0) pCurr->fCapsPercent = ((t_float) pCurr->nCaps / (t_float) pCurr->nChars) * 100.0f; if (pCurr->fCapsPercent > pStats->mostCaps->fCapsPercent) pStats->mostCaps = pCurr; /* More happy? */ if (pCurr->fHappiness > pStats->mostHappy->fHappiness) pStats->mostHappy = pCurr; /* More sad? */ if (pCurr->fHappiness < pStats->mostSad->fHappiness) pStats->mostSad = pCurr; /* More URLs pasted? */ if (pCurr->nURLs >= pStats->mostURLs->nURLs) pStats->mostURLs = pCurr; /* More Joins? */ if (pCurr->nJoins >= pStats->mostJoins->nJoins) pStats->mostJoins = pCurr; /* More Topics? */ if (pCurr->nTopics >= pStats->mostTopics->nTopics) pStats->mostTopics = pCurr; } return 0; } /* Compare function for qsort() that compares 2 nodes for fTotalScore */ int stats_index_cmp(const void *pNode1, const void *pNode2) { t_user_entry *pUser1, *pUser2; pUser1 = * (t_user_entry **) pNode1; pUser2 = * (t_user_entry **) pNode2; if (pUser1->fTotalScore > pUser2->fTotalScore) return -1; else if (pUser1->fTotalScore < pUser2->fTotalScore) return 1; else return 0; } /* * Output current user information to a give file */ void fisg_output_userfile_user(FILE *f, t_stats *pStats, t_user_entry *pNode) { t_str_node *pNick; int i; assert(f); assert(pNode); /* If ignored ... */ if (pNode->isIgnored) fprintf(f, "!"); /* Username/handle */ fprintf(f, "%s:", pNode->userHandle); /* Nicks */ for (i = 0; i < SET_HASH_MAXINDEX; i++) { /* Find from linked list */ pNick = pStats->nickList[i]; while (pNick) { if (pNick->pData == pNode) fprintf(f, "%s ", pNick->pcStr); pNick = pNick->pNext; } } /* Rest of information */ fprintf(f, ":"); if (pNode->picPath) fprintf(f, "%s:", pNode->picPath); else if (pNode->linkURL) fprintf(f, ":"); if (pNode->linkURL) fprintf(f, "%s", pNode->linkURL); fprintf(f, "\n"); } void fisg_output_userfile(FILE *f, t_stats *pStats) { t_user_entry *pCurr; assert(f); assert(pStats); fprintf(f, "# Userfile for %s (%s)\n", th_prog_name, th_prog_fullname); /* Output ignored users */ fprintf(f, "# Ignored users\n"); pCurr = pStats->usersIgnored->pList; while (pCurr) { fisg_output_userfile_user(f, pStats, pCurr); pCurr = pCurr->pNext; } /* Output normal users */ fprintf(f, "\n\n# Normal users\n"); pCurr = pStats->usersList->pList; while (pCurr) { fisg_output_userfile_user(f, pStats, pCurr); pCurr = pCurr->pNext; } } BOOL fisg_warn_deprecated(t_config_item *pNode) { THERR("Setting '%s' is deprecated, update your configuration file!\n", pNode->itemName); return TRUE; } /* * NOTICE: Since this utility is designed to be "one pass/shot" program, * we don't free any memory used here. If you take this code, remember it * and add possible *free()'s where appropriate. */ int main(int argc, char *argv[]) { FILE *tmpFile; t_uint i, j; int iResult, n; t_fisgconfig genSet; t_config *genConfig = NULL; t_stats *genStats = NULL; time_t myTime1, myTime2; char tmpStr[1024] = ""; /* * Initialize fundamentals */ time(&myTime1); srandom(myTime1); progName = argv[0]; th_init(FISG_NAME, FISG_FULLNAME, FISG_VERSION, FISG_COPYRIGHT, NULL); /* * Allocate stats */ genStats = fisg_stats_new(); if (genStats == NULL) { THERR("Could not allocate memory for statistics!\n"); return -11; } /* * Parse arguments */ th_processArgs(argc, argv, optList, optListN, handleOpt, handleFile); if (nsourceFileNames <= 0) { THERR("No input files specified!\n"); return 0; } /* * Initialize configuration, read and parse configuration files */ /* Initialize configuration for main program */ genConfig = th_config_new(); th_config_add_str(genConfig, "gen_channel", NULL, &genSet.ircChannel, "#????"); th_config_add_str(genConfig, "gen_ircnet", NULL, &genSet.ircNetwork, NULL); th_config_add_str(genConfig, "gen_message", NULL, &genSet.message, NULL); th_config_add_str(genConfig, "gen_dateformat", NULL, &genSet.dateFormat, "%c"); th_config_add_bool(genConfig,"gen_auto_follow_nicks", NULL, &genSet.autoFollowNicks, FALSE); th_config_add_bool(genConfig,"gen_auto_follow_heuristics",NULL,&genSet.autoHeuristics, FALSE); th_config_add_str(genConfig, "gen_user_file", NULL, &genSet.userFilename, "users.genSet"); th_config_add_bool(genConfig,"gen_use_pisg_scoring", NULL, &genSet.usePisgScoring, FALSE); th_config_add_bool(genConfig,"gen_strip_ctrlchars", NULL, &genSet.stripCtrlChars, FALSE); th_config_add_bool(genConfig,"gen_stat_active_times", NULL, &genSet.statActiveTimes, TRUE); th_config_add_bool(genConfig,"gen_stat_top_users", NULL, &genSet.statTopUsers, TRUE); th_config_add_bool(genConfig,"gen_stat_almost_top", NULL, &genSet.statAlmostTop, TRUE); th_config_add_bool(genConfig,"gen_stat_urls", NULL, &genSet.statURLs, TRUE); th_config_add_bool(genConfig,"gen_stat_topics", NULL, &genSet.statTopics, TRUE); th_config_add_bool(genConfig,"gen_stat_big_numbers", NULL, &genSet.statBigNumbers, TRUE); th_config_add_bool(genConfig,"gen_stat_only_listed", NULL, &genSet.statOnlyListed, FALSE); th_config_add_bool(genConfig,"gen_stat_show_happiness",NULL, &genSet.showHappy, TRUE); th_config_add_bool(genConfig,"gen_stat_show_comment", NULL, &genSet.showComment, TRUE); th_config_add_bool(genConfig,"gen_stat_show_picture", NULL, &genSet.showPicture, FALSE); th_config_add_bool(genConfig,"gen_stat_show_url", NULL, &genSet.showURL, FALSE); th_config_add_uint(genConfig,"gen_showmax", fisg_warn_deprecated, &genSet.showTopUserMax, 35); th_config_add_uint(genConfig,"gen_showrest", fisg_warn_deprecated, &genSet.showAlmostMax, 20); th_config_add_uint(genConfig,"gen_show_topmax", NULL, &genSet.showTopUserMax, 35); th_config_add_uint(genConfig,"gen_show_restmax", NULL, &genSet.showAlmostMax, 20); th_config_add_uint(genConfig,"gen_show_urlsmax", NULL, &genSet.showURLsMax, 10); th_config_add_uint(genConfig,"gen_show_topicsmax", NULL, &genSet.showTopicsMax, 10); th_config_add_uint(genConfig,"gen_min_comment_length", NULL, &genSet.commentMinLength, 10); th_config_add_uint(genConfig,"gen_max_comment_length", NULL, &genSet.commentMaxLength, 60); /* th_config_add_str(genConfig, "", NULL, &, ); th_config_add_bool(genConfig,"", NULL, &, ); th_config_add_int(genConfig, "", NULL, &, ); */ /* Initialize modules configuration */ for (n = 0; n < nOutputFormats; n++) if (outputFormats[n].ofInit(genConfig) != 0) { THERR("Error initializing output module #%d (%s).\n", n, outputFormats[n].ofName); } /* Read configuration files */ if (nconfigFileNames <= 0) { THERR("No configuration file(s) specified.\n"); } for (i = 0; i < nconfigFileNames; i++) { THMSG(1, "Configuration '%s'\n", configFileNames[i]); if (th_config_read(configFileNames[i], genConfig) != 0) return -12; } /* * Parse the users file */ if (userFilename) parse_userfile(userFilename, genStats, &genSet); else parse_userfile(genSet.userFilename, genStats, &genSet); /* * Read the source-file(s) */ THMSG(1, "Parsing %d sources. Please wait...\n", nsourceFileNames); THMSG(2, "Processed "); for (i = 0; i < nsourceFileNames; i++) { /* Try to open the logfile */ if ((tmpFile = fopen(sourceFileNames[i], "ra")) == NULL) { THERR("Error opening input file '%s' (%s)\n", sourceFileNames[i], strerror(errno)); return -1; } /* Parse with selected parser */ iResult = fisg_parse_log(tmpFile, genStats, &inputFormats[sourceFileFormats[i]], &genSet); if (th_verbosityLevel >= 2) { /* Show progress meter */ for (j = 0; j < strlen(tmpStr); j++) fputc('\b', stderr); snprintf(tmpStr, sizeof(tmpStr), "%i%%", (((i+1) * 100) / nsourceFileNames)); fputs(tmpStr, stderr); } /* Close file, report errors */ fclose(tmpFile); if (iResult < 0) { THERR("Error #%i reading file (%s)\n", iResult, strerror(errno)); return 2; } } THPRINT(2, "\n"); /* * Calculate rank and stats for sorting */ THMSG(2, "Computing statistics...\n"); if (fisg_stats_pass1(genStats, &genSet) < 0) { THERR("Error while computing rankings!\n"); return -10; } /* * Perform cleaning pass */ THMSG(2, "Cleaning up...\n"); fisg_stats_clean(genStats); /* * Index userlists, check number of users */ THMSG(2, "Indexing...\n"); if (user_data_makeindex(genStats->usersList) != 0 || user_data_makeindex(genStats->usersIgnored) != 0 || user_data_makeindex(genStats->usersRemoved) != 0) { THERR("Error while indexing userlists!\n"); return -9; } if (genStats->usersList->n <= 0) { THERR("No users found? Check that you really specified correct log-format!\n"); return -10; } /* * Sort the indexes by score */ THMSG(1, "%ld users (%ld total, %ld ignored, %ld removed), sorting...\n", genStats->usersList->n, (genStats->usersList->n + genStats->usersIgnored->n + genStats->usersRemoved->n), genStats->usersIgnored->n, genStats->usersRemoved->n); qsort(genStats->usersList->ppIndex, genStats->usersList->n, sizeof(t_user_entry *), stats_index_cmp); /* * Compute rest of user-related statistics */ THMSG(1, "Computing user-related stats...\n"); if (fisg_stats_pass2(genStats, &genSet) < 0) { THERR("Error while computing statistics!\n"); return -10; } /* * Create sorted indexes */ THMSG(1, "Computing rest of stats...\n"); if (genStats->urlList) { genStats->urlIndex = th_strhash_makeindex(genStats->urlList); if (genStats->urlIndex) th_strindex_sort_nused(genStats->urlIndex); } if (genStats->topicList) { genStats->topicIndex = th_strlist_makeindex(genStats->topicList); if (!genStats->topicIndex) { THERR("Could not create topicIndex!\n"); return -10; } } /* * Get current and calculate elapsed time */ time(&myTime2); genStats->nTimeElapsed = (myTime2 - myTime1); /* * Output statistics in wanted format */ if ((setOutputFormat < 0) || (setOutputFormat >= nOutputFormats)) { THERR("Invalid output format, falling back to default.\n"); setOutputFormat = 0; } THMSG(1, "Using %s\n", outputFormats[setOutputFormat].ofDescription); if (destFileName == NULL) tmpFile = stdout; else if ((tmpFile = fopen(destFileName, "wa")) == NULL) { THERR("Error opening output file '%s' (%s)\n", destFileName, strerror(errno)); return -1; } iResult = outputFormats[setOutputFormat].ofFunction(tmpFile, genStats, &genSet); fclose(tmpFile); /* * Output userfile, if wanted */ if (dumpUserFileName) { if ((tmpFile = fopen(dumpUserFileName, "wa")) == NULL) { THERR("Error opening dump-userfile '%s' (%s)\n", dumpUserFileName, strerror(errno)); } else { /* Output userfile */ fisg_output_userfile(tmpFile, genStats); fclose(tmpFile); } } /* * OK! Show final stats */ THMSG(1, "%ld lines in %ld logfile(s), total of %1.2f MB\n", genStats->nLines, genStats->nLogFiles, ((t_float) genStats->nChars) / (1024.0f*1024.0f) ); if (iResult == 0) { THMSG(1, "Done. Time elapsed %ld hours, %ld minutes and %ld seconds.\n", (genStats->nTimeElapsed / (60*60)), (genStats->nTimeElapsed % (60*60)) / 60, (genStats->nTimeElapsed % (60*60)) % 60 ); } else THERR("Error creating output file! Return code #%i.\n", iResult); /* * Free allocated memory */ if (genStats) fisg_stats_free(genStats); if (genConfig) th_config_free(genConfig); return 0; }