/* $Id: cssclean.c,v 1.7 2006/05/13 01:13:01 jonz Exp $ */ /* DSPAM COPYRIGHT (C) 2002-2006 JONATHAN A. ZDZIARSKI This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* cssclean.c - rebuild a hash database, omitting hapaxes */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #ifdef TIME_WITH_SYS_TIME # include # include #else # ifdef HAVE_SYS_TIME_H # include # else # include # endif #endif #define READ_ATTRIB(A) _ds_read_attribute(agent_config, A) #define MATCH_ATTRIB(A, B) _ds_match_attribute(agent_config, A, B) int DO_DEBUG #ifdef DEBUG = 1 #else = 0 #endif ; #include "read_config.h" #include "hash_drv.h" #include "error.h" #include "language.h" #define SYNTAX "syntax: cssclean [filename]" int cssclean(const char *filename); int main(int argc, char *argv[]) { int r; if (argc<2) { fprintf(stderr, "%s\n", SYNTAX); exit(EXIT_FAILURE); } agent_config = read_config(NULL); if (!agent_config) { LOG(LOG_ERR, ERR_AGENT_READ_CONFIG); exit(EXIT_FAILURE); } r = cssclean(argv[1]); if (r) { fprintf(stderr, "cssclean failed on error %d\n", r); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); } int cssclean(const char *filename) { int i; hash_drv_header_t header; void *offset; struct _hash_drv_map old, new; hash_drv_spam_record_t rec; unsigned long filepos; char newfile[128]; unsigned long hash_rec_max = HASH_REC_MAX; unsigned long max_seek = HASH_SEEK_MAX; unsigned long max_extents = 0; unsigned long extent_size = HASH_EXTENT_MAX; int flags = 0; if (READ_ATTRIB("HashRecMax")) hash_rec_max = strtol(READ_ATTRIB("HashRecMax"), NULL, 0); if (READ_ATTRIB("HashExtentSize")) extent_size = strtol(READ_ATTRIB("HashExtentSize"), NULL, 0); if (READ_ATTRIB("HashMaxExtents")) max_extents = strtol(READ_ATTRIB("HashMaxExtents"), NULL, 0); if (MATCH_ATTRIB("HashAutoExtend", "on")) flags = HMAP_AUTOEXTEND; if (READ_ATTRIB("HashMaxSeek")) max_seek = strtol(READ_ATTRIB("HashMaxSeek"), NULL, 0); snprintf(newfile, sizeof(newfile), "/tmp/%u.css", (unsigned int) getpid()); if (_hash_drv_open(filename, &old, 0, max_seek, max_extents, extent_size, flags)) { return EFAILURE; } if (_hash_drv_open(newfile, &new, hash_rec_max, max_seek, max_extents, extent_size, flags)) { _hash_drv_close(&old); return EFAILURE; } filepos = sizeof(struct _hash_drv_header); header = old.addr; while(filepos < old.file_len) { for(i=0;ihash_rec_max;i++) { rec = old.addr+filepos; if (rec->hashcode && rec->nonspam + rec->spam > 1) { if (_hash_drv_set_spamrecord(&new, rec)) { LOG(LOG_WARNING, "aborting on error"); _hash_drv_close(&new); _hash_drv_close(&old); unlink(newfile); return EFAILURE; } } filepos += sizeof(struct _hash_drv_spam_record); } offset = old.addr + filepos; header = offset; filepos += sizeof(struct _hash_drv_header); } _hash_drv_close(&new); _hash_drv_close(&old); rename(newfile, filename); return 0; }