/*
    gutenfetch - a small utility to list and fetch books available through
	project gutenberg

    Copyright (C) 2001, 2002, 2003, 2004 Russell Francis 

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the

	Free Software Foundation, Inc.
	59 Temple Place, Suite 330
	Boston, MA  02111-1307  USA

Last updated on $Date: 2004/07/07 02:41:22 $ by $Author: johntabularasa $.
*/
#include "stddefs.h"
#include "gutenfetch.h"
#include "libgutenfetch_init.h"
#include "libgutenfetch_fileinfo.h"
#include "libgutenfetch_servers.h"
#include "libgutenfetch_utility.h"
#include "libgutenfetch_filter.h"
#include "libgutenfetch_listing.h"
#include "libgutenfetch_etext.h"
#include "libgutenfetch_cache.h"
#include "rb.h"
#ifdef USE_ZLIB
#	if (HAVE_ZLIB_H == 1)
#		include <zlib.h>
#	endif
#endif
#ifdef HAVE_ASSERT_H
#	include <assert.h>
#endif
#ifdef HAVE_STDLIB_H
#	include <stdlib.h>
#endif
#ifdef HAVE_STRING_H
#	include <string.h>
#endif
#ifdef HAVE_STRINGS_H
#	include <strings.h>
#endif
#ifdef HAVE_TIME_H
#	include <time.h>
#endif
#ifdef HAVE_PTHREAD
	#ifdef HAVE_PTHREAD_H
		#include <pthread.h>
	#endif /* HAVE_PTHREAD_H */
#endif /* HAVE_PTHREAD */	

/** Private definitions **/
#if 0
typedef struct {
	char *directory;
	list_t *contents;
} detailed_data_t;	

#define LINE_LENGTH 4096
typedef struct {
	char *buffer;
	size_t bufsize;
	char line[LINE_LENGTH];
	size_t line_index;
	list_t *list;
} build_etext_data_t;

typedef struct {
#ifdef USE_ZLIB
	gzFile gzf;
#else
	FILE *fp;
#endif	
	int fd;
	char line[LINE_LENGTH];
	size_t line_index;
	list_t *list;
	unsigned int first_call : 1;
} detail_etext_data_t;	

typedef struct {
	char *directory;
	list_t *file;
} directory_data_t;
#endif

/** Private variables **/
static list_t *etext_catalog = NULL;
static unsigned int etext_catalog_count = 0;
gutenfetch_etext_t **etext_catalog_block_alloc = NULL;

/** Private Functions **/
#if 0
gutenfetch_error_t
	gutenfetch_detail_etexts(
		detail_etext_data_t *ddata, 
		int (*)(void *, double, double, double, const char *),
		void *);
#endif

/**
 * gutenfetch_line_is_old_ebook_entry
 *
 * @param line The line which may or may not be an ebook entry.
 * @return NULL if it isn't, a valid gutenfetch_etext_t *
 */
gutenfetch_etext_t *
gutenfetch_line_is_old_ebook_entry(char *line)
{
	gutenfetch_etext_t *etext = NULL;
	list_t *match = NULL;
	list_t *lt = NULL;
	char temp_filebase[9];
	int i;

	match = gutenfetch_ifilter_match(IFILTER_GUTINDEX_OLD, line);
	if (match != NULL) { /* we have a match */
		etext = gutenfetch_etext_new();
		if (etext == NULL) {
			list_remove_all(match, free);
			return NULL;
		}

		lt = list_first(match);
		if (lt == NULL) {
			gutenfetch_etext_free(etext);
			list_remove_all(match, free);
			return NULL;
		}	

		/* fill in the full line. */
		etext->full = strdup(lt->data);

		/* skip the month field. */
		lt = list_next(lt);

		/* get the directory */
		lt = list_next(lt);
		if (lt == NULL) {
			gutenfetch_etext_free(etext);
			list_remove_all(match, free);
			return NULL;
		}

		i = (int)strtol((char*)lt->data, NULL, 10);
		if ((i < 90) && (i >= 60)) {
			/* This deals with the new case where they list
			 *	the real date it was put into etext in front
			 *  so we don't get the directory right on the first
			 *  10~ etexts
			 *	!!!! DAMN IT !!!! STOP CHANGING YOUR FUCKING
			 *	FORMAT!
			 */	
			etext->directory = strdup("etext90");
		} else {		
			etext->directory = gutenfetch_util_strcat(
				"etext", lt->data, NULL);
		}		
		
		/* get title and author */
		lt = list_next(lt);
		if (lt == NULL) {
			gutenfetch_etext_free(etext);
			list_remove_all(match, free);
			return NULL;
		}
		etext->author = gutenfetch_util_get_author(lt->data);
		etext->title = gutenfetch_util_get_title(lt->data);
		
		/* get filebase */
		lt = list_next(lt);
		if (lt == NULL) {
			gutenfetch_etext_free(etext);
			list_remove_all(match, free);
			return NULL;
		}
		memcpy(&temp_filebase[0], lt->data, 8);
		temp_filebase[8] = '\0';
		for (i = 7;((temp_filebase[i] == 'x') && (i > 0)); --i)
			temp_filebase[i] = '\0';
		etext->filebase = strdup(temp_filebase);

		/* Get the unique integer id. */
		lt = list_next(lt);
		if (lt == NULL) {
			gutenfetch_etext_free(etext);
			list_remove_all(match, free);
			return NULL;
		}
		etext->id = (int)strtol((char*)lt->data, NULL, 10);

		/* Get the copyright/reserved/australia tag. */
		lt = list_next(lt);
		if (lt == NULL) {
			gutenfetch_etext_free(etext);
			list_remove_all(match, free);
			return NULL;
		}
		if (lt->data != NULL) {
			if(((char*)lt->data)[0] == '*') {
				etext->cflag.reserved = 1;
			} else if (((char*)lt->data)[0] == 'C') {
				etext->cflag.copyright = 1;
			} else if (((char*)lt->data)[0] == 'A') {
				etext->cflag.australia = 1;
			}
		}	
		list_remove_all(match, free);
	}
	return etext;
}

/**
 * gutenfetch_line_is_new_ebook_entry
 *
 * Given a NULL terminated text string, determine
 * if it describes a new ebook listing >= 10000 in
 * the GUTINDEX.ALL file.
 *
 * @param line The line which may or may not be a new
 * 		ebook listing.
 * @return NULL if it isn't or a valid and filled out
 *		gutenfetch_etext_t * structure.
 */
gutenfetch_etext_t *
gutenfetch_line_is_new_ebook_entry(char *line)
{
	gutenfetch_etext_t *etext = NULL;
	list_t *match = NULL;
	list_t *lt = NULL;

	match = gutenfetch_ifilter_match(IFILTER_GUTINDEX_NEW, line);

	if (match != NULL) {
		/* initialize the etext. */
		etext = gutenfetch_etext_new();
		if (etext == NULL) {
			list_remove_all(match, free);
			return NULL;
		}
		
		lt = list_first(match);
		assert (lt != NULL);

		/* fill in full line. */
		etext->full = strdup(lt->data);

		/* fill in author and title */
		lt = list_next(lt);
		if (lt != NULL) {
			etext->author = gutenfetch_util_get_author(lt->data);
			etext->title = gutenfetch_util_get_title(lt->data);;
		}

		/* fill in id, directory, filebase */
		lt = list_next(lt);
		if (lt != NULL) {
			if (strlen(lt->data) > 4) {
				etext->id = (int)strtol((char*)lt->data, NULL, 10);
				etext->filebase = strdup(lt->data);
				etext->directory = malloc(sizeof(char) * 15);
				assert(etext->directory != NULL);
				snprintf(etext->directory, 15, "%c/%c/%c/%c/%d",
					((char*)lt->data)[0],
					((char*)lt->data)[1],
					((char*)lt->data)[2],
					((char*)lt->data)[3],
					etext->id);
			}		
		}

		/* fill in condition flags */
		lt = list_next(lt);
		if (lt != NULL) {
			if (lt->data != NULL) {
				if(((char*)lt->data)[0] == 'A') { /* aussie */
					etext->cflag.australia = 1;
				} else if (((char*)lt->data)[0] == 'C') {/* copyright */
					etext->cflag.copyright = 1;
				} else if (((char*)lt->data)[0] == '*') { /* reserved */
					etext->cflag.reserved = 1;
				}
				
			}
		}
		list_remove_all(match, free);
	}

	return etext;
}

/**
 * gutenfetch_line_is_ebook_entry
 *
 * @param line The line of text which may be an etext entry
 * 		from the GUTINDEX.ALL file.
 * @return NULL if it isn't or a valid and as filled in as
 *		we can get gutenfetch_etext_t.
 */
gutenfetch_etext_t *
gutenfetch_line_is_ebook_entry(char *line)
{
	gutenfetch_etext_t *etext = NULL;

	etext = gutenfetch_line_is_old_ebook_entry(line);
	if (etext == NULL) { /* Not an old entry, maybe a new one? */
		etext = gutenfetch_line_is_new_ebook_entry(line);
	}	
	return etext;
}

/** Semi-Global Functions */
/**
 * gutenfetch_listing_init
 *
 * Initialize resources used by this module.
 */
void
gutenfetch_listing_init(void)
{
}

/**
 * gutenfetch_listing_shutdown
 *
 * Release resources used by this module.
 */
void
gutenfetch_listing_shutdown(void)
{
	if (etext_catalog != NULL)	
		list_remove_all(etext_catalog, (void (*)(void*))gutenfetch_etext_free);
	FREE_NULL(etext_catalog_block_alloc);
}
 
/** Exported Global Functions **/
/**
 * gutenfetch_get_raw_listing
 * 
 * This routine puts a buffer which has
 * the entire contents of the GUTINDEX.ALL or
 * GUTINDEX.AUS file or both concatenated together.
 *
 */
gutenfetch_error_t
gutenfetch_get_raw_listing(
	char **buffer,
	listing_type_t type,
	int (*pfunc)(void *, double, double, double, const char *),
	void *pfunc_data)
{
	int fd = -1;
	char *a = NULL;
	char *b = NULL;
	assert(buffer != NULL);

	if (*buffer != NULL)
		FREE_NULL(*buffer);
		
	if ((type == LIST_ALL) || (type == LIST_NON_AUSTRALIAN)) {
		fd = gutenfetch_cache_fetch(NON_AUSTRALIAN, "GUTINDEX.ALL", pfunc, pfunc_data);
		if (fd != -1) {
			b = gutenfetch_util_read_file_to_buffer(fd);
		}	
	} 
	if ((type == LIST_ALL) || (type == LIST_AUSTRALIAN)) {
		fd = gutenfetch_cache_fetch(AUSTRALIAN, "GUTINDEX.AUS", pfunc, pfunc_data);
		if (fd != -1) {
			a = gutenfetch_util_read_file_to_buffer(fd);
		}	
	}

	/* now our internal buffers should have all the info we need. */
	if ((a != NULL) && (b != NULL)) {
		*buffer = gutenfetch_util_strcat(b, a, NULL);
		FREE_NULL(a);
		FREE_NULL(b);
	} else if (a != NULL) {
		*buffer = a;
	} else if (b != NULL) {
		*buffer = b;
	} else { /* Unable to fetch gutindex.all or gutindex.aus. */
		/* make an empty null-terminated buffer. */
		*buffer = malloc( sizeof( char ) * 1);
		*buffer[0] = '\0';
	}

	return GUTENFETCH_OK;
}
#if 0
/**
 * gutenfetch_detail_listing
 *
 * Detail a list of electronic texts.  This function
 * does nothing unless gutenfetch_get_listing is called
 * first.
 */
gutenfetch_error_t
gutenfetch_detail_listing(
	int (*pfunc)(void *, double, double, double, const char *),
	void *pfunc_data)
{
	int fd;
	detail_etext_data_t ddata;
	
	#ifdef USE_ZLIB
		fd = gutenfetch_cache_fetch(
			NON_AUSTRALIAN, "ls-lR.gz", pfunc, pfunc_data);
	#else
		fd = gutenfetch_cache_fetch(
			NON_AUSTRALIAN, "ls-R", pfunc, pfunc_data);
	#endif

	if (fd != -1) {
		ddata.fd = fd;
		ddata.list = NULL;
		ddata.line_index = 0;
		ddata.first_call = TRUE;
		gutenfetch_detail_etexts(&ddata, pfunc, pfunc_data);
		close(fd);
	}	
	return GUTENFETCH_OK;
}

/**
 * gutenfetch_detail_etexts
 *
 * This function takes the detail_etext_data_t structure
 * to get the temporary file which the ls-R or ls-lR.gz file
 * has been written to.  It then scans this file for detailed
 * information regarding the etexts we obtained from GUTINDEX.ALL
 *
 * @param ddata The detail_etext_data_t structure.
 *
 */
gutenfetch_error_t
gutenfetch_detail_etexts(
	detail_etext_data_t *ddata,
	int (*progress_func)(void *, double, double, double, const char *),
	void *progress_func_data)
{
	char msg[4096];
	unsigned int loops = 0;
	unsigned int count = 0, i; 
	list_t *entry_lt = NULL;
	gutenfetch_etext_entry_t *entry = NULL;
	gutenfetch_etext_t *etext;
	file_info_t *file_info = NULL;
	directory_data_t *dir_data = NULL;
	directory_data_t *zip_dir_data = NULL;
	directory_data_t static_directory_data;
	char *line = NULL;
	list_t *ftemp = NULL;
	list_t *match = NULL;
	list_t *lt = NULL;
	list_t *cat_lt = NULL;
	
#ifdef USE_ZLIB
	size_t size;

	ddata->gzf = gzdopen(ddata->fd, "rb");
	if (ddata->gzf == NULL) {
		return GUTENFETCH_UNABLE_TO_DETAIL_LISTING;
	}
	/* Build a red-black tree from the ls-lR file */
	while ( gzgets(ddata->gzf, ddata->line, LINE_LENGTH) != Z_NULL) {
		match = gutenfetch_ifilter_match(
			IFILTER_LS_LR_DETAIL_DIRECTORY,
			ddata->line);

		if (match != NULL) {
			lt = list_next(list_first(match));
			if (lt != NULL) {
				add_directory_to_tree((const char *)lt->data,
					dir_tree, &dir_data);
				add_directory_to_tree((const char *)lt->data,
					zip_dir_tree, &zip_dir_data);
			}
			list_remove_all(match, free);
		} else if (dir_data != NULL) {
			match = gutenfetch_ifilter_match(
				IFILTER_LS_LR_DETAIL_ENTRY,
				ddata->line);

			if (match != NULL) {
				lt = list_next(list_first(match));
				if (lt != NULL) {
					size = strtol((char*)lt->data, NULL, 10);
				}	
				
				lt = list_next(list_next(list_next(lt)));
				if (lt != NULL) {
					file_info = file_info_new((char*)lt->data, size);
					assert(file_info != NULL);
					if (gutenfetch_util_extension_is("zip", (char *)lt->data)) {
						zip_dir_data->file = list_prepend(zip_dir_data->file,
							file_info);
					} else {
						dir_data->file = list_prepend(dir_data->file, file_info);
					}
				}
			}
		}
	}

	
#else
	/* Build a red-black tree from the ls-R file */
	ddata->fp = fdopen(ddata->fd, "r");
	if (ddata->fp == NULL) {
		return GUTENFETCH_UNABLE_TO_DETAIL_LISTING;
	}
	fseek(ddata->fp, 0, SEEK_SET);
	for (line = fgets(ddata->line, LINE_LENGTH, ddata->fp);
		line != NULL;
		line = fgets(ddata->line, LINE_LENGTH, ddata->fp))
	{	
		match = gutenfetch_filter_match(ls_R_detail_directory_filter,
			ddata->line);
		if (match != NULL) {
			lt = list_next(list_first(match)); /* Get the second element. */
			if (lt != NULL) {
				/* add an entry to the directory rb tree. */
				add_directory_to_tree((const char *)lt->data, dir_tree, &dir_data);

				/* add an entry to the zipfile rb tree. */
				add_directory_to_tree((const char *)lt->data, zip_dir_tree, &zip_dir_data);
			}
			list_remove_all(match, free);
		} else if (dir_data != NULL) {
			match = gutenfetch_filter_match(ls_R_detail_entry_filter,
				ddata->line);
			if (match != NULL) {
				lt = list_first(match);
				if (lt != NULL) {
					file_info = file_info_new((char*)lt->data, 0);
					assert(file_info != NULL);
					if (gutenfetch_util_extension_is("zip", (char*)lt->data)) {
						zip_dir_data->file = list_prepend(zip_dir_data->file, file_info);
					} else {
						dir_data->file = list_prepend(dir_data->file, file_info);
					}
				}
			}
		}
	}

#endif /* USE_ZLIB */

	/* Iterate through the etext_catalog and look for entries in the tree.
	 * As they are found, add them to the gutenfetch_etext_entry_t ** array
	 * within the etext_catalog.
	 */
	cat_lt = list_first(etext_catalog);
	while (cat_lt != NULL) {
		etext = (gutenfetch_etext_t*)cat_lt->data;
		if (etext != NULL) {
			if ((etext->directory != NULL) && (etext->filebase != NULL)) {
				if (progress_func != NULL) {
					if (etext->title != NULL) {
						snprintf(msg, 4096, "Detailing etext '%s'.", etext->title);
					} else {
						snprintf(msg, 4096, "Detailing etext #:%d.", etext->id);
					}
					progress_func(
						progress_func_data,
						(double)loops / (double)etext_catalog_count,
						etext_catalog_count,
						loops,
						msg);
				}		
					
				static_directory_data.directory = etext->directory;
				if (static_directory_data.directory[strlen(static_directory_data.directory)-1] == '/')
					static_directory_data.directory[strlen(static_directory_data.directory)-1] = '\0';
				static_directory_data.file = NULL;
				dir_data = rb_find(dir_tree, &static_directory_data);
				zip_dir_data = rb_find(zip_dir_tree, &static_directory_data);
				if (dir_data != NULL) {
					lt = list_first(dir_data->file);
					while (lt != NULL) {
						file_info = (file_info_t*)lt->data;
						/* This is the most common case. */
						if (strncmp(file_info->filename, etext->filebase, strlen(etext->filebase)) == 0) {
							entry = gutenfetch_etext_entry_build_new(
								etext->directory,
								file_info->filename,
								file_info->filesize,
								zip_dir_data->file);
								
							/* Add entry to our entry list and increment the counter. */
							if (entry != NULL) {
								entry_lt = list_prepend(entry_lt, entry);	
								count++;
							}	
						} else if ((etext->filebase[0] == '?') && 
						((file_info->filename[0] == '7') || (file_info->filename[0] == '8'))) { 
							if (strncmp(&file_info->filename[1], &etext->filebase[1], strlen(etext->filebase) - 1) == 0) {
								entry = gutenfetch_etext_entry_build_new(
									etext->directory,
									file_info->filename,
									file_info->filesize,
									zip_dir_data->file);

								if (entry != NULL) {
									entry_lt = list_prepend(entry_lt, entry);
									++count;
								}	
							}
						}
						lt = list_next(lt);
					}
					/* Free any etext_entries which may be around. */
					if (etext->entry != NULL) {
						i = 0;
						while(etext->entry[i] != NULL) {
							gutenfetch_etext_entry_free(etext->entry[i]);
							++i;
						}
					}
					/* allocate new ones. */
					count++;
					etext->entry = malloc(sizeof(gutenfetch_etext_entry_t*) * count);
					assert(etext->entry != NULL);
					ftemp = list_first(entry_lt);
					i = 0;
					while (ftemp != NULL) {
						etext->entry[i++] = (gutenfetch_etext_entry_t*)ftemp->data;
						ftemp = list_next(ftemp);
					}
					etext->entry[i] = NULL;
				}
			}
		}
		cat_lt = list_next(cat_lt);
		count = 0;
		list_remove_all(entry_lt, NULL);
		entry_lt = NULL;
		entry = NULL;
		loops++;
	}

	/* return success. */
	return GUTENFETCH_OK;
}
#endif

int
tagged_entries_compare(const void *a, const void *b, void *data)
{
	return strcmp((char*)a, (char *)b);
}

/**
 * gutenfetch_add_entries_in_file_to_catalog
 *
 * Read in a file in the format of GUTINDEX.???
 * and add the valid entries to the etext_catalog.
 *
 * @param file The GUTINDEX.??? file.
 * @param pfunc the User supplied progress function.
 * @param pfunc_data the user supplied data for the progress function.
 */
void
gutenfetch_add_entries_in_file_to_catalog(	
	const char *file,
	int (*pfunc)(void *, double, double, double, const char *),
	void *pfunc_data)
{
	struct rb_table *tagged_entries;
	char msg[4096];
	char *str;
	char *line;
	char **probe_ret;
	gutenfetch_etext_t *text = NULL;
	int fd;
	FILE *fp;

	fd = gutenfetch_cache_fetch(NON_AUSTRALIAN, file, pfunc, pfunc_data);
	if (fd != -1) {
		fp = fdopen(fd, "r");
		if (fp != NULL) {
			tagged_entries = rb_create(tagged_entries_compare, NULL, NULL);
			while( (line = gutenfetch_util_getline(fp)) != NULL) {
				text = gutenfetch_line_is_ebook_entry(line);
				if (text != NULL) {
					/* check that it isn't a duplicate. */
					str = gutenfetch_util_strcat(
						text->directory, "+", text->filebase, NULL);
					probe_ret = (char**)rb_probe(tagged_entries, str);	
					if (*(char**)probe_ret == str) {	
						etext_catalog = list_prepend(etext_catalog, text);
						etext_catalog_count++;
						if (pfunc != NULL) {
							snprintf(msg, 4096, 
								"Found %d etexts in %s.",
								etext_catalog_count, file);
							pfunc(
								pfunc_data,
								0.0,
								etext_catalog_count,
								etext_catalog_count,
								msg);
						}		
					} else {
						FREE_NULL(str);
						gutenfetch_etext_free(text);
					}
				}
				FREE_NULL(line);
			}
			rb_destroy(tagged_entries, (void(*)(void*, void*))free);
			fclose(fp);
		}	
	}
}


/**
 * gutenfetch_get_listing
 *
 * Retreive a detailed listing of all available
 * electronic texts from the active PG server.
 *
 * @param etext A pointer to return the list.
 * @param type Which ebook listings we should concern
 * 		ourselves with.
 * @param pfunc A user provided function which is called
 *		to update the progress of the operation or NULL.
 * @param pfunc_data A user provided pointer which is 
 *		passed as the first argument to pfunc.
 * @return GUTENFETCH_OK or an error code.
 */
gutenfetch_error_t
gutenfetch_get_listing(
	gutenfetch_etext_t ***etext,
	listing_type_t type,
	int (*pfunc)(void *, double, double, double, const char *),
	void *pfunc_data)
{
	list_t *lt = NULL;
	size_t list_size;

	if ((	(type != LIST_NON_AUSTRALIAN) && 
			(type != LIST_AUSTRALIAN) &&
			(type != LIST_ALL) )  || (etext == NULL))
	{
		return GUTENFETCH_BAD_PARAM;
	}	

	if ((type == LIST_ALL) || (type == LIST_NON_AUSTRALIAN))
		gutenfetch_add_entries_in_file_to_catalog("GUTINDEX.ALL", pfunc, pfunc_data);
	
	if ((type == LIST_ALL) || (type == LIST_AUSTRALIAN))
		gutenfetch_add_entries_in_file_to_catalog("GUTINDEX.AUS", pfunc, pfunc_data);
	

	/* convert the etext catalog into a more standard array. */
	if (*etext != NULL)
		FREE_NULL(*etext);
	*etext = malloc(sizeof(gutenfetch_etext_t *) * (etext_catalog_count + 1));
	etext_catalog_block_alloc = *etext;
	if (*etext == NULL)
		return GUTENFETCH_NOMEM;

	list_size = etext_catalog_count;
	(*etext)[list_size--] = NULL;
	lt = list_first(etext_catalog);
	while (lt != NULL) { /* fill up the array with our etexts */
		(*etext)[list_size--] = (gutenfetch_etext_t*)lt->data;
		lt = list_next(lt);
		if (pfunc != NULL) {
			pfunc(
				pfunc_data,
				(double)(etext_catalog_count - list_size) / (double)etext_catalog_count,
				etext_catalog_count,
				(etext_catalog_count - list_size),
				"Building etexts");
		}
	}

	return GUTENFETCH_OK;
}


syntax highlighted by Code2HTML, v. 0.9.1