/*
    gutenfetch - a small utility to list and fetch books available through
	project gutenberg

    Copyright (C) 2001, 2002, 2003, 2004 Russell Francis 

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the

	Free Software Foundation, Inc.
	59 Temple Place, Suite 330
	Boston, MA  02111-1307  USA

Last updated on $Date: 2004/07/03 19:06:50 $ by $Author: johntabularasa $.
*/

#include "stddefs.h"
#include "libgutenfetch_detail.h"
#include "libgutenfetch_fileinfo.h"
#include "libgutenfetch_filter.h"
#include "libgutenfetch_etext.h"
#include "libgutenfetch_utility.h"
#include "libgutenfetch_cache.h"
#include "rb.h"
#include "list.h"
#ifdef HAVE_STDIO_H
#	include <stdio.h>
#endif /* HAVE_STDIO_H */
#ifdef HAVE_STDLIB_H
#	include <stdlib.h>
#endif /* HAVE_STDLIB_H */
#ifdef HAVE_STRING_H
#	include <string.h>
#endif /* HAVE_STRING_H */
#ifdef HAVE_STRINGS_H
#	include <strings.h>
#endif /* HAVE_STRINGS_H */
#ifdef HAVE_ASSERT_H
#	include <assert.h>
#endif /* HAVE_ASSERT_H */
#ifdef HAVE_PTHREAD
	#ifdef HAVE_PTHREAD_H
		#include <pthread.h>
	#endif /* HAVE_PTHREAD_H */
#endif /* HAVE_PTHREAD */	

/** DATA-TYPE DEFINITIONS */
typedef struct {
	char *directory;
	list_t *files;
} directory_data_t;

/** PRIVATE VARIABLES **/
#ifdef HAVE_PTHREAD
static pthread_mutex_t directory_trees_done_mutex;
#endif /* HAVE_PTHREAD */

static int directory_trees_done = FALSE;
static struct rb_table *detail_dir_tree = NULL;
static struct rb_table *detail_zip_dir_tree = NULL;

/* NULL terminated array of gutenfetch_etext_t's,
 * 	defined in libgutenfetch_listing.c.
 */
extern gutenfetch_etext_t **etext_catalog_block_alloc;

/** PRIVATE FUNCTIONS **/


/**
 * rb_directory_compare
 *
 * This is a comparison function which is provided to the 
 * red-black tree implementation for comparing nodes in the 
 * tree.
 *
 * @param a The data from a node of the red-black tree.
 * @param b The data from another node of the red-black tree.
 * @param param The user provided data.
 * @return 0 if the nodes are equal -1 if a < b 1 if a > b.
 */
int
directory_data_compare(const void *a, const void *b, void *param)
{
	directory_data_t *da;
	directory_data_t *db;
	assert( a != NULL );
	assert( b != NULL );
	da = (directory_data_t*)a;
	db = (directory_data_t*)b;
	return strcmp(da->directory, db->directory);
}


/**
 * rb_directory_free
 *
 * This is used to free data from a node.
 *
 * @param a The element to free.
 * @param p The user provided data.
 */
void
directory_data_free(directory_data_t *d, void *p) 
{
	if (d != NULL) {
		list_remove_all(d->files, (void(*)(void*))file_info_free);
		FREE_NULL(d->directory);
		FREE_NULL(d);
	}
}


/**
 * directory_data_new
 *
 * Given a directory name, return a new
 * valid directory_data_t structure.
 *
 * @param dir The directory name.
 * @erturn A valid directory_data_t structure.
 */
directory_data_t *
directory_data_new(char *dir)
{
	directory_data_t *dir_data;
	assert(dir != NULL);

	dir_data = malloc(sizeof(directory_data_t));
	assert(dir_data != NULL);
	
	dir_data->directory = strdup(dir);
	dir_data->files = NULL;
	return dir_data;
}


/**
 * gutenfetch_line_is_detail_file_entry
 *
 * Scans a line of ls-R and returns a file
 * info structure if the line refers to a valid
 * etext entry.
 *
 * @param line The line of text which may or may not match.
 * @return NULL on failure, A valid file_info_t structure
 *  	on success.
 */
file_info_t *
gutenfetch_line_is_detail_file_entry(char *line)
{
	file_info_t *file_info = NULL;
	list_t *match = NULL;
	list_t *lt = NULL;

	if (line == NULL) {
		return NULL;
	}

/* #ifdef USE_ZLIB

	This has been removed due to concerns over processing
	time, but left in the code as it would be nice if it 
	could be enabled later if the processing time could
	be brought into a reasonable time frame.
	
	size_t size;
	match = gutenfetch_ifilter_match(
		IFILTER_LS_LR_DETAIL_ENTRY,
		line);
			
	if (match != NULL) {
		lt = list_next(list_first(match));
		if (lt != NULL) {
			size = strtol((char*)lt->data, NULL, 10);
		}

		lt = list_next(list_next(list_next(lt)));
		if (lt != NULL) {
			file_info = file_info_new((char*)lt->data, size);
		}
		list_remove_all(match, free);
	}
#else */
	match = gutenfetch_ifilter_match(
		IFILTER_LS_R_DETAIL_ENTRY,
		line);
	
	if (match != NULL) {
		lt = list_first(match);
		if (lt != NULL) {
			if (lt->data != NULL) {
				file_info = file_info_new((char*)lt->data, 0);
			}	
		}
		list_remove_all(match, free);
	}
// #endif /* USE_ZLIB */

	return file_info;
}


/**
 * gutenfetch_line_is_detail_directory_entry
 *
 * Tests a line from ls-R file and returns a valid
 * directory_data_t structure if it refers to a 
 * directory, NULL if not.
 *
 * @param line The line we wish to check.
 * @return NULL on failure, directory_data_t * on success.
 */
directory_data_t *
gutenfetch_line_is_detail_directory_entry(char *line)
{
	list_t *lt = NULL;
	list_t *match = NULL;
	directory_data_t *dir_data = NULL;
	directory_data_t *zip_dir_data = NULL;

	if (line == NULL) {
		return NULL;
	}

	match = gutenfetch_ifilter_match(
		IFILTER_LS_LR_DETAIL_DIRECTORY,
		line);

	if (match != NULL) {
		lt = list_next(list_first(match));
		if (lt != NULL) {
			if (lt->data != NULL) {
				/* Create two new directory data structures. */
				dir_data = directory_data_new((char*)lt->data);
				zip_dir_data = directory_data_new((char*)lt->data);

				/* Insert them into the directory trees, respectively. */
				rb_insert(detail_dir_tree, dir_data);
				rb_insert(detail_zip_dir_tree, zip_dir_data);
			}	
		}
		list_remove_all(match, (void(*)(void*))free);
	}

	return dir_data;
}


/**
 * gutenfetch_build_directory_trees
 *
 * Given a file descriptor to ls-R, build directory trees
 * to use in the detailing of electronic texts later.
 *
 * ** NOTE **
 *
 *    	The zlib stuff has been commented out over concerns
 *		of efficiency, it is hoped that it may be used in
 *		the future.
 *
 * @param fd The file descriptor to ls-R.
 */
gutenfetch_error_t
gutenfetch_build_directory_trees(int fd)
{
#define LINE_LENGTH 4096	
	char line[LINE_LENGTH];
	directory_data_t *last_dir_data = NULL;
	directory_data_t *last_zip_dir_data = NULL;
	directory_data_t *dir_data = NULL;
	file_info_t *file_info;
//#ifdef USE_ZLIB
//	gzFile gzf;
//#else
	FILE *fp = NULL;
	char *l = NULL;
//#endif

	if (fd == -1) {
		return GUTENFETCH_BAD_PARAM;
	}	

//#ifdef USE_ZLIB
//	if( (gzf = gzdopen(fd, "rb")) == NULL)
//		return;
//#else		
	if( (fp = fdopen(fd, "rb")) == NULL) {
		return GUTENFETCH_SEE_ERRNO;
	}	
//#endif		

//#ifdef USE_ZLIB
//	while (gzgets(gzf, line, LINE_LENGTH) != Z_NULL)
//	{
//#else
	for(
	l = fgets( line, LINE_LENGTH, fp);
	l != NULL;
	l = fgets( line, LINE_LENGTH, fp))
	{		 
//#endif
		dir_data = gutenfetch_line_is_detail_directory_entry(line);
		
		if (dir_data != NULL) {
			/* This is true if gutenfetch_line_is_detail_directory_entry
			 * is true, in this case, we record the current directory
			 * entries in the last_dir_data and last_zip_dir_data variables
			 * we will then fill these directory entries up with the files
			 * which are contained within the directory.
			 */
			last_dir_data = dir_data;
			last_zip_dir_data = rb_find(detail_zip_dir_tree, dir_data);
			assert(last_zip_dir_data != NULL);
			
		} else if (last_dir_data != NULL) {
			/* We are scanning a directory an looking for files to insert
			 * into it's record.
			 */
			file_info = gutenfetch_line_is_detail_file_entry(line);
			if (file_info != NULL) {
				if(gutenfetch_util_extension_is("zip", file_info->filename)){
					/* If it is a zip file, add it to the zip_dir_data list. */
					last_zip_dir_data->files = list_prepend(
						last_zip_dir_data->files, file_info);
				} else {
					/* If it is a regular file, add it to this list. */
					last_dir_data->files = list_prepend(
						last_dir_data->files, file_info);
				}	
			}
		}
	}

	return GUTENFETCH_OK;
}


/**
 * gutenfetch_prime_directory_trees
 *
 * This must be called once initially before any
 * etexts can be detailed.  It is automatically
 * called the first time a book is detailed and
 * shouldn't need to be called again.
 *
 * It takes no paramters.
 *
 * @return GUTENFETCH_OK on success, GUTENFETCH_UNABLE_TO_DETAIL_LISTING
 * 		on failure.
 */
gutenfetch_error_t
gutenfetch_prime_directory_trees(void)
{
	int fd;

	/* This assures us that only one thread will 
	 * prime the directory_data trees.
	 */
	#ifdef HAVE_PTHREAD
	pthread_mutex_lock(&directory_trees_done_mutex);
	#endif /* HAVE_PTHREAD */

	if (directory_trees_done == FALSE) {
		//#ifdef USE_ZLIB
		//	fd = gutenfetch_cache_fetch(
		//		NON_AUSTRALIAN, "ls-lR.gz", NULL, NULL);
		//#else
		fd = gutenfetch_cache_fetch(
			NON_AUSTRALIAN, "ls-R", NULL, NULL);
		//#endif /* USE_ZLIB */

		if (fd == -1) {
			return GUTENFETCH_UNABLE_TO_DETAIL_LISTING;
		}	

		gutenfetch_build_directory_trees(fd);
		close( fd );

		/* This flag indicates that we have finished building the
		 * directory trees.
		 */
		directory_trees_done = TRUE;
	}
	
	#ifdef HAVE_PTHREAD
	pthread_mutex_unlock(&directory_trees_done_mutex);
	#endif /* HAVE_PTHREAD */

	return  GUTENFETCH_OK;
}

/**
 * gutenfetch_detail_init
 *
 * This initializes the private data used by the etext detailing
 * routines and must be called once at startup.
 */
void
gutenfetch_detail_init(void)
{
	pthread_mutex_init(&directory_trees_done_mutex, NULL);
	detail_dir_tree = rb_create(directory_data_compare, NULL, NULL);
	detail_zip_dir_tree = rb_create(directory_data_compare, NULL, NULL);
}

/**
 * gutenfetch_detail_shutdown
 *
 * This is called before the application exits to release any resources
 * held by the detailing code.
 */
void
gutenfetch_detail_shutdown(void)
{
	rb_destroy(detail_zip_dir_tree, (void(*)(void*, void*))directory_data_free);
	rb_destroy(detail_dir_tree, (void(*)(void*, void*))directory_data_free);
	pthread_mutex_destroy(&directory_trees_done_mutex);
}


/** PUBLIC FUNCTION **/

/**
 * gutenfetch_detail_all_etexts
 *
 * This function, iterates through all electronic texts in the 
 * and details any whose entry value is NULL.
 */
gutenfetch_error_t
gutenfetch_detail_all_etexts(
	int (*progress_func)(void *, double, double, double, const char *),
	void *progress_func_data)
{
#define LINE_LENGTH 4096
#define UPDATE_PROGRESS 10
	char str[LINE_LENGTH];
	unsigned int i;
	gutenfetch_error_t err;
	unsigned int pcount = 0;
	
	if (etext_catalog_block_alloc == NULL) {
		return GUTENFETCH_OK;
	}
	
	for( i = 0; etext_catalog_block_alloc[i] != NULL; ++i) {
		if (etext_catalog_block_alloc[i]->entry == NULL) {
			/* Display Progress */
			if ((progress_func != NULL) && ((pcount % UPDATE_PROGRESS) == 0)) {
				assert(etext_catalog_block_alloc[i]->filebase != NULL);
				snprintf(str, LINE_LENGTH,
					"Detailing: %s", etext_catalog_block_alloc[i]->filebase);
				progress_func(progress_func_data, 
					0.0, (double)pcount, (double)pcount, str);
			}

			/* Detail etext */
			err = gutenfetch_detail_etext(etext_catalog_block_alloc[i]);
			if (err != GUTENFETCH_OK) {
				return err;
			}
		}
		pcount++;
	}

	/* Return SUCCESS */
	return GUTENFETCH_OK;
}

/**
 * gutenfetch_detail_etext
 *
 * This function details a specific electronic texts
 * and fills in it's entry field with valid etext_entry_t's
 *
 * @param etext The etext to detail.
 * @return GUTENFETCH_OK on success, something else on failure.
 */
gutenfetch_error_t
gutenfetch_detail_etext(
	gutenfetch_etext_t *etext)
{
	gutenfetch_etext_entry_t *entry = NULL;
	directory_data_t *zip_dir_data = NULL;
	directory_data_t *dir_data = NULL;
	directory_data_t static_dir_data;

	file_info_t *file_info = NULL;
	list_t *lt = NULL;
	list_t *entry_lt = NULL;

	size_t length, count = 0;
	int i;
	
	if( etext == NULL ) {
		return GUTENFETCH_BAD_PARAM;
	}
		
	if( (etext->directory == NULL) || (etext->filebase == NULL) ) {
		return GUTENFETCH_BAD_PARAM;
	}

	static_dir_data.directory = strdup(etext->directory);
	if (static_dir_data.directory == NULL) {
		return GUTENFETCH_NOMEM;
	}

	length = strlen(static_dir_data.directory) - 1;
	if (static_dir_data.directory[length] == '/') {
		static_dir_data.directory[length] = '\0';
	}	
	static_dir_data.files = NULL;

	/* Prime the directory trees if it hasn't been done already. */
	gutenfetch_prime_directory_trees();

	/* Find the directory which this etext should be located in. */
	dir_data = rb_find(detail_dir_tree, &static_dir_data);
	if (dir_data != NULL) {
	
		/* Fetch the list of zipfiles in this directory. */
		zip_dir_data = rb_find(detail_zip_dir_tree, &static_dir_data);
		assert( zip_dir_data != NULL );

		
		lt = list_first(dir_data->files);
		while (lt != NULL) {
			file_info = (file_info_t*)lt->data;

			assert( file_info != NULL );
		 
			/* If the etexts filebase matches the prefix of a file
			 * in the directory_data_t, add it as a matched file.
			 */
			if (
			strncmp(file_info->filename, etext->filebase,
			strlen(etext->filebase)) == 0)
			{
				/* Build a new entry. */
				entry = gutenfetch_etext_entry_build_new(
					etext->directory,
					file_info->filename,
					file_info->filesize,
					zip_dir_data->files);
				if (entry != NULL) {
					entry_lt = list_prepend(entry_lt, entry);
					count++;
				}
			} else {
				/* This deals with the case of older texts found
				 * in both 7bit and 8bit formats, this is signified 
				 * by prefixing the filebase with a '?' character.
				 */
				if ((etext->filebase[0] == '?') && 
					(	(file_info->filename[0] == '7') ||
						(file_info->filename[0] == '8')))
				{
					if (strncmp(
					&file_info->filename[1], &etext->filebase[1],
					strlen(etext->filebase) - 1) == 0)
					{
						/* Build our entry */
						entry = gutenfetch_etext_entry_build_new(
							etext->directory,
							file_info->filename,
							file_info->filesize,
							zip_dir_data->files);

						/* Attach it to the list. */	
						if (entry != NULL) {
							entry_lt = list_prepend(entry_lt, entry);
							++count;
						}
					}			
				}
			}	
			lt = list_next(lt);
		}
	}
	FREE_NULL(static_dir_data.directory);

	/* Put entries in the etext->entry field. */
	if (count > 0) {
		/* Free any old electronic text entries. */
		if( etext->entry != NULL ) {
			for(i = 0;etext->entry[i] != NULL; ++i) {
				gutenfetch_etext_entry_free(etext->entry[i]);
			}
			FREE_NULL(etext->entry);
		}
		
		/* Add the new ones. */
		etext->entry = malloc(
			sizeof(gutenfetch_etext_entry_t *) * (count + 1));

		assert(etext->entry != NULL);	
		for(lt = list_first(entry_lt), i = 0; lt != NULL; lt = list_next(lt), ++i) {
			assert(i < count);
			assert(lt->data != NULL);
			etext->entry[i] = lt->data;
		}
		assert(i == count); /* Should be true! */
		
		/* Free memory for the list, don't free the data as it
		 * is now held by the etext->entry array.
		 */
		list_remove_all(entry_lt, NULL);

		/* NULL-terminate our entry array. */
		etext->entry[count] = NULL;
	} else {
		etext->entry = malloc(
			sizeof(gutenfetch_etext_entry_t *));

		assert(etext->entry != NULL);	
		etext->entry[0] = NULL;
	}	
	return GUTENFETCH_OK;
}



syntax highlighted by Code2HTML, v. 0.9.1