/* $Id: tokenizer.h,v 1.5 2006/05/13 01:12:59 jonz Exp $ */
/*
DSPAM
COPYRIGHT (C) 2002-2006 JONATHAN A. ZDZIARSKI
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2
of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _TOKENIZER_H
# define _TOKENIZER_H
#include "diction.h"
#include "nodetree.h"
#include "error.h"
#include "storage_driver.h"
#include "decode.h"
#define SBPH_SIZE 5
int _ds_tokenize(
DSPAM_CTX * CTX,
char *headers,
char *body,
ds_diction_t diction);
int _ds_tokenize_sbph(
DSPAM_CTX * CTX,
char *headers,
char *body,
ds_diction_t diction);
int _ds_tokenize_ngram(
DSPAM_CTX * CTX,
char *headers,
char *body,
ds_diction_t diction);
/* _ds_process: ngram token generation routines */
int _ds_process_header_token(
DSPAM_CTX * CTX,
char *joined_token,
const char *previous_token,
ds_diction_t diction,
const char *heading);
int _ds_process_body_token(
DSPAM_CTX * CTX,
char *joined_token,
const char *previous_token,
ds_diction_t diction);
/* _ds_map: sbph token generation routines */
int _ds_map_header_token(
DSPAM_CTX * CTX,
char *token,
char **previous_tokens,
ds_diction_t diction,
const char *heading);
int _ds_map_body_token(
DSPAM_CTX * CTX,
char *token,
char **previous_tokens,
ds_diction_t diction);
int _ds_degenerate_message(
DSPAM_CTX *CTX,
buffer *header,
buffer *body);
int _ds_url_tokenize(
ds_diction_t diction,
char *body,
const char *key);
void _ds_sbph_clear
(char **previous_tokens);
char * _ds_truncate_token
(const char *token);
#endif
syntax highlighted by Code2HTML, v. 0.9.1