ports//devel/libgutenfetch/work/libgutenfetch-1.2/src/libgutenfetch

/*
    gutenfetch - a small utility to list and fetch books available through
	project gutenberg

    Copyright (C) 2001, 2002, 2003, 2004 Russell Francis 

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the

	Free Software Foundation, Inc.
	59 Temple Place, Suite 330
	Boston, MA  02111-1307  USA

Last updated on $Date: 2004/07/20 00:23:59 $ by $Author: johntabularasa $.
*/
#include "stddefs.h"
#ifdef HAVE_STDIO_H
#	include <stdio.h>
#endif
#ifdef HAVE_STDLIB_H
#	include <stdlib.h>
#endif
#ifdef HAVE_UNISTD_H
#	include <unistd.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#	include <sys/types.h>
#endif
#ifdef HAVE_SYS_UIO_H
#	include <sys/uio.h>
#endif
#ifdef HAVE_SYS_STAT_H
#	include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H 
#	include <fcntl.h>
#endif
#ifdef HAVE_STRING_H
#	include <string.h>
#endif
#ifdef HAVE_STRINGS_H
#	include <strings.h>
#endif
#ifdef HAVE_STDARG_H
#	include <stdarg.h>
#endif
#ifdef HAVE_ASSERT_H
#	include <assert.h>
#endif
#ifdef HAVE_DIRENT_H
#	include <dirent.h>
#endif
#ifdef HAVE_ERRNO_H
#	include <errno.h>
#endif
#include "list.h"
#include "libgutenfetch_filter.h"
#include "libgutenfetch_utility.h"
#include "gutenfetch.h"
#ifndef HAVE_STRSEP
/**
 *   The strsep() function locates, in the string referenced by *stringp, the
 *   first occurrence of any character in the string delim (or the terminating
 *   `\0' character) and replaces it with a `\0'.  The location of the next
 *   character after the delimiter character (or NULL, if the end of the
 *   string was reached) is stored in *stringp.  The original value of
 *   *stringp is returned.
 * 
 *   An ``empty'' field (i.e., a character in the string delim occurs as the
 *   first character of *stringp) can be detected by comparing the location
 *   referenced by the returned pointer to `\0'.
 *                                                                              
 *   If *stringp is initially NULL, strsep() returns NULL.
 *
 * @param stringp The string to separate.
 * @param delim The character to separate the string stringp on.
 * @return NULL or the next character after the first found delim.
 */
char *
strsep(char **stringp, const char *delim)
{
	char *res;

	if (!stringp || !*stringp || !**stringp)
		return NULL;
	
	res = *stringp;
	while (**stringp && !strchr(delim, **stringp))
		++(*stringp);
	
	if (**stringp) {
		**stringp = '\0';
		++(*stringp);
	}

	return res;
}
#endif /* HAVE_STRSEP */


#ifndef HAVE_MERGESORT
/*-
 * Copyright (c) 1992, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Peter McIlroy.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
/*
 * The following mergesort function and supporting functions were taken
 * from FreeBSD to provide a sorting routine on machines which do not 
 * provide mergesort().  
 *
 * Russell Francis <johntabularasa@users.sf.net> 11-15-03
 */
#ifndef u_char
#	define u_char unsigned char
#endif
#define ISIZE sizeof(int)
#define PSIZE sizeof(u_char *)
#define ICOPY_LIST(src, dst, last)				\
	do							\
	*(int*)dst = *(int*)src, src += ISIZE, dst += ISIZE;	\
	while(src < last)
#define ICOPY_ELT(src, dst, i)					\
	do							\
	*(int*) dst = *(int*) src, src += ISIZE, dst += ISIZE;	\
	while (i -= ISIZE)

#define CCOPY_LIST(src, dst, last)		\
	do					\
		*dst++ = *src++;		\
	while (src < last)
#define CCOPY_ELT(src, dst, i)			\
	do					\
		*dst++ = *src++;		\
	while (i -= 1)

/*
 * Find the next possible pointer head.  (Trickery for forcing an array
 * to do double duty as a linked list when objects do not align with word
 * boundaries.
 */
/* Assumption: PSIZE is a power of 2. */
#define EVAL(p) (u_char **)						\
	((u_char *)0 +							\
	    (((u_char *)p + PSIZE - 1 - (u_char *) 0) & ~(PSIZE - 1)))

static void
mergesort_insertionsort(u_char *, size_t, size_t, int (*)(const void *, const void *));

void
mergesort_setup(u_char *, u_char *, size_t, size_t, int(*)(const void *, const void *));

int
mergesort(void *, size_t, size_t, int(*)(const void *, const void *));

/*
 * Arguments are as for qsort.
 */
int
mergesort(base, nmemb, size, cmp)
	void *base;
	size_t nmemb;
	size_t size;
	int (*cmp)(const void *, const void *);
{
	int i, sense;
	int big, iflag;
	u_char *f1, *f2, *t, *b, *tp2, *q, *l1, *l2;
	u_char *list2, *list1, *p2, *p, *last, **p1;

	if (size < PSIZE / 2) {		/* Pointers must fit into 2 * size. */
		errno = EINVAL;
		return (-1);
	}

	if (nmemb == 0)
		return (0);

	/*
	 * XXX
	 * Stupid subtraction for the Cray.
	 */
	iflag = 0;
	if (!(size % ISIZE) && !(((char *)base - (char *)0) % ISIZE))
		iflag = 1;

	if ((list2 = malloc(nmemb * size + PSIZE)) == NULL)
		return (-1);

	list1 = base;
	mergesort_setup(list1, list2, nmemb, size, cmp);
	last = list2 + nmemb * size;
	i = big = 0;
	while (*EVAL(list2) != last) {
	    l2 = list1;
	    p1 = EVAL(list1);
	    for (tp2 = p2 = list2; p2 != last; p1 = EVAL(l2)) {
	    	p2 = *EVAL(p2);
	    	f1 = l2;
	    	f2 = l1 = list1 + (p2 - list2);
	    	if (p2 != last)
	    		p2 = *EVAL(p2);
	    	l2 = list1 + (p2 - list2);
	    	while (f1 < l1 && f2 < l2) {
	    		if ((*cmp)(f1, f2) <= 0) {
	    			q = f2;
	    			b = f1, t = l1;
	    			sense = -1;
	    		} else {
	    			q = f1;
	    			b = f2, t = l2;
	    			sense = 0;
	    		}
	    		if (!big) {	/* here i = 0 */
				while ((b += size) < t && cmp(q, b) >sense)
	    				if (++i == 6) {
	    					big = 1;
	    					goto EXPONENTIAL;
	    				}
	    		} else {
EXPONENTIAL:	    		for (i = size; ; i <<= 1)
	    				if ((p = (b + i)) >= t) {
	    					if ((p = t - size) > b &&
						    (*cmp)(q, p) <= sense)
	    						t = p;
	    					else
	    						b = p;
	    					break;
	    				} else if ((*cmp)(q, p) <= sense) {
	    					t = p;
	    					if (i == size)
	    						big = 0;
	    					goto FASTCASE;
	    				} else
	    					b = p;
				while (t > b+size) {
	    				i = (((t - b) / size) >> 1) * size;
	    				if ((*cmp)(q, p = b + i) <= sense)
	    					t = p;
	    				else
	    					b = p;
	    			}
	    			goto COPY;
FASTCASE:	    		while (i > size)
	    				if ((*cmp)(q,
	    					p = b + (i >>= 1)) <= sense)
	    					t = p;
	    				else
	    					b = p;
COPY:	    			b = t;
	    		}
	    		i = size;
	    		if (q == f1) {
	    			if (iflag) {
	    				ICOPY_LIST(f2, tp2, b);
	    				ICOPY_ELT(f1, tp2, i);
	    			} else {
	    				CCOPY_LIST(f2, tp2, b);
	    				CCOPY_ELT(f1, tp2, i);
	    			}
	    		} else {
	    			if (iflag) {
	    				ICOPY_LIST(f1, tp2, b);
	    				ICOPY_ELT(f2, tp2, i);
	    			} else {
	    				CCOPY_LIST(f1, tp2, b);
	    				CCOPY_ELT(f2, tp2, i);
	    			}
	    		}
	    	}
	    	if (f2 < l2) {
	    		if (iflag)
	    			ICOPY_LIST(f2, tp2, l2);
	    		else
	    			CCOPY_LIST(f2, tp2, l2);
	    	} else if (f1 < l1) {
	    		if (iflag)
	    			ICOPY_LIST(f1, tp2, l1);
	    		else
	    			CCOPY_LIST(f1, tp2, l1);
	    	}
	    	*p1 = l2;
	    }
	    tp2 = list1;	/* swap list1, list2 */
	    list1 = list2;
	    list2 = tp2;
	    last = list2 + nmemb*size;
	}
	if (base == list2) {
		memmove(list2, list1, nmemb*size);
		list2 = list1;
	}
	free(list2);
	return (0);
}

#define	swap(a, b) {					\
		s = b;					\
		i = size;				\
		do {					\
			tmp = *a; *a++ = *s; *s++ = tmp; \
		} while (--i);				\
		a -= size;				\
	}
#define reverse(bot, top) {				\
	s = top;					\
	do {						\
		i = size;				\
		do {					\
			tmp = *bot; *bot++ = *s; *s++ = tmp; \
		} while (--i);				\
		s -= size2;				\
	} while(bot < s);				\
}

/*
 * Optional hybrid natural/pairwise first pass.  Eats up list1 in runs of
 * increasing order, list2 in a corresponding linked list.  Checks for runs
 * when THRESHOLD/2 pairs compare with same sense.  (Only used when NATURAL
 * is defined.  Otherwise simple pairwise merging is used.)
 */
void
mergesort_setup(list1, list2, n, size, cmp)
	size_t n, size;
	int (*cmp)(const void *, const void *);
	u_char *list1, *list2;
{
	int i, length, size2, tmp, sense;
	u_char *f1, *f2, *s, *l2, *last, *p2;

	size2 = size*2;
	if (n <= 5) {
		mergesort_insertionsort(list1, n, size, cmp);
		*EVAL(list2) = (u_char*) list2 + n*size;
		return;
	}
	/*
	 * Avoid running pointers out of bounds; limit n to evens
	 * for simplicity.
	 */
	i = 4 + (n & 1);
	mergesort_insertionsort(list1 + (n - i) * size, i, size, cmp);
	last = list1 + size * (n - i);
	*EVAL(list2 + (last - list1)) = list2 + n * size;

#ifdef NATURAL
	p2 = list2;
	f1 = list1;
	sense = (cmp(f1, f1 + size) > 0);
	for (; f1 < last; sense = !sense) {
		length = 2;
					/* Find pairs with same sense. */
		for (f2 = f1 + size2; f2 < last; f2 += size2) {
			if ((cmp(f2, f2+ size) > 0) != sense)
				break;
			length += 2;
		}
		if (length < THRESHOLD) {		/* Pairwise merge */
			do {
				p2 = *EVAL(p2) = f1 + size2 - list1 + list2;
				if (sense > 0)
					swap (f1, f1 + size);
			} while ((f1 += size2) < f2);
		} else {				/* Natural merge */
			l2 = f2;
			for (f2 = f1 + size2; f2 < l2; f2 += size2) {
				if ((cmp(f2-size, f2) > 0) != sense) {
					p2 = *EVAL(p2) = f2 - list1 + list2;
					if (sense > 0)
						reverse(f1, f2-size);
					f1 = f2;
				}
			}
			if (sense > 0)
				reverse (f1, f2-size);
			f1 = f2;
			if (f2 < last || cmp(f2 - size, f2) > 0)
				p2 = *EVAL(p2) = f2 - list1 + list2;
			else
				p2 = *EVAL(p2) = list2 + n*size;
		}
	}
#else		/* pairwise merge only. */
	for (f1 = list1, p2 = list2; f1 < last; f1 += size2) {
		p2 = *EVAL(p2) = p2 + size2;
		if (cmp (f1, f1 + size) > 0)
			swap(f1, f1 + size);
	}
#endif /* NATURAL */
}

/*
 * This is to avoid out-of-bounds addresses in sorting the
 * last 4 elements.
 */
static void
mergesort_insertionsort(a, n, size, cmp)
	u_char *a;
	size_t n, size;
	int (*cmp)(const void *, const void *);
{
	u_char *ai, *s, *t, *u, tmp;
	int i;

	for (ai = a+size; --n >= 1; ai += size)
		for (t = ai; t > a; t -= size) {
			u = t - size;
			if (cmp(u, t) <= 0)
				break;
			swap(u, t);
		}
}
#endif /* HAVE_MERGESORT */

/**
 * gutenfetch_util_strcat concatenates strings into one large string.
 *
 * This function takes a NULL terminated list of NULL terminated
 * strings and concatenates them together.
 *
 * @param a A NULL terminated string.
 * @return A string which must be freed when finished.  It will
 *      be either the concatenation of all the strings passed in
 *  	or will be NULL.
 */
char *
gutenfetch_util_strcat(char *a, ...)
{
	va_list ap;
	size_t len = 1; /* space for null terminator */
	list_t *l = NULL;
	list_t *head = NULL;
	char *item;

	assert (a != NULL);
	len += strlen(a);
	l = list_append(l, a);

	va_start(ap, a);
	while ( (item = va_arg(ap, char *)) != NULL ) {
		len += strlen(item);
		l = list_append(l, item);
	}
	va_end(ap);
	item = malloc(sizeof(char) * len);
	if ( item == NULL ) {
		fprintf(stderr, _("Unable to allocate %u bytes of memory."),
			(sizeof(char) * len));
		abort();
	}
	
	head = l = list_first(l);
	strcpy(item, l->data);

	for(l = list_next(l); l != NULL ; l = list_next(l))
		strcat(item, l->data);
	
	list_remove_all(head, NULL);
	return item;
}
/**
 * Get the next line from a file.
 *
 * This function returns the characters in a file up to the '\n'
 * character.  The returned result is a NULL terminated string.
 *
 * @param fp The file pointer of the file to read from.
 * @return a NULL terminated string which contains the next line
 * 			in the file.
 */
char *
gutenfetch_util_getline(FILE *fp)
{
	char *line = NULL;
	char *temp = NULL;
	size_t total_size;
	size_t i;

	for (i = 0, total_size = 0; TRUE ; ++i) {
		/* allocate more memory for the line if needed. */
		if(i == total_size){
			total_size += BLOCK_SIZE;
			temp = realloc(line, sizeof(char) * total_size);
			if (temp == NULL) {
				FREE_NULL(line);
				/*if (opt_get_verbose()) {
					fprintf(stderr, 
					_("Unable to allocate %u bytes of memory."),
					sizeof(char) * total_size);
				}*/
				break;
			}
			line = temp;
		}

		/* Read the next character into the line */
		if((line[i] = fgetc(fp)) == '\n'){
			line[i] = '\0';	
			break;
		}else if(line[i] == EOF){
			if(i == 0) {
				FREE_NULL(line);
			} else {
				line[i] = '\0';
			}	
			break;
		}
	}
	return line;
}

/**
 * gutenfetch_ms_strip_text_buffer
 *
 * Strip the Windows world 0D 0A from the end of lines and
 * replace with the unix friendly, 0A.
 *
 * @param buffer The text buffer we wish to modify.  This must be
 *  NULL terminated or else!
 * @return An error code or GUTENFETCH_OK
 */
gutenfetch_error_t
gutenfetch_ms_strip_text_buffer(char *buffer)
{
	size_t writing_to = 0;
	size_t reading_from = 0;
	unsigned char current_byte = 0x00;
	unsigned char last_byte = 0x00;

	if (buffer == NULL)
		return GUTENFETCH_BAD_PARAM;

	/* This routine should be pretty quick and only
	   require one pass over the data. */
	while (buffer[reading_from] != '\0') {
		last_byte = current_byte;
		current_byte = buffer[reading_from];
		if ((last_byte == 0x0D) && (current_byte == 0x0A)) 
			writing_to--;
		buffer[writing_to] = current_byte;
		reading_from++;
		writing_to++;
	}
	
	buffer[writing_to] = '\0'; /* NULL terminate the result. */
	return GUTENFETCH_OK;
}

/**
 * gutenfetch_ms_strip_text_file
 *
 * This routine strips the MS <CR><LF> from a text file and replaces 
 * them with the UNIX friendly <LF>.
 *
 * @param filename The filename of the text file we wish to modify.
 * @return GUTENFETCH_OK or an error code indicating the error.
 */
gutenfetch_error_t
gutenfetch_ms_strip_text_file(char *filename)
{
	gutenfetch_error_t retval = GUTENFETCH_OK;
	int fd;

	if (filename == NULL)
		return GUTENFETCH_BAD_PARAM;

	fd = open(filename, O_RDWR);
	if (fd >= 0) {
		retval = gutenfetch_ms_strip_text_fd(fd);
		close(fd);
	} else {	
		retval = GUTENFETCH_SEE_ERRNO;
	}
	
	return retval;
}

/**
 * gutenfetch_ms_strip_fd
 *
 * Strip Windows <CR><LF> from a text file.
 *
 * @param fd The file descriptor of an open r/w file to modify.
 * @return GUTENFETCH_OK or an error.
 */
gutenfetch_error_t
gutenfetch_ms_strip_text_fd(int fd)
{
#define READ_BUFFER_SIZE 4096
#define WRITE_BUFFER_SIZE 4096
	char read_buffer[READ_BUFFER_SIZE], write_buffer[WRITE_BUFFER_SIZE];
	size_t read_buf_count = 0, write_buf_count = 0;
	size_t read_buf_index = 0, write_buf_index = 0;
	size_t read_fd_index = 0, write_fd_index = 0;
	char last_byte = 0x00, current_byte = 0x00;
	gutenfetch_error_t reterr = GUTENFETCH_OK;	

	read_buf_index = lseek(fd, 0, SEEK_SET); /* make sure it is seekable. */
	if (read_buf_index == -1) 
		return GUTENFETCH_SEE_ERRNO;

	while (TRUE) {
		if (read_buf_index == read_buf_count) {
			if (lseek(fd, read_fd_index, SEEK_SET) == -1) {
				reterr = GUTENFETCH_SEE_ERRNO;
				break;
			}	
			read_buf_index = 0;	
			read_buf_count = read(fd, read_buffer, READ_BUFFER_SIZE);
			if (read_buf_count < 0) { /* Error condition */
				reterr = GUTENFETCH_SEE_ERRNO;
				break;
			}	
			read_fd_index += read_buf_count;
			if (read_buf_count == 0) { /* EOF and clean exit */
				break;
			}	
		}
		
		last_byte = current_byte;
		current_byte = read_buffer[read_buf_index++];
		if ((last_byte == 0x0D) && (current_byte == 0x0A)) 
			write_buf_index--;
		write_buffer[write_buf_index++] = current_byte;	
		
		if (write_buf_index == WRITE_BUFFER_SIZE) { /* flush the output */
			if (lseek(fd, write_fd_index, SEEK_SET) == -1) {
				reterr = GUTENFETCH_SEE_ERRNO;
				break;
			}	
			write_buf_count = write(fd, write_buffer, write_buf_index);
			if (write_buf_count < 0) {
				reterr = GUTENFETCH_SEE_ERRNO;
				break;
			}	
			write_fd_index += write_buf_count;
			if (write_buf_count < write_buf_index) {
				memmove(write_buffer, &write_buffer[write_buf_count],
					write_buf_index - write_buf_count);	
				write_buf_index = write_buf_count;
			} else {
				write_buf_index = 0;
			}
		}	
	}

	if (write_buf_index != 0) { /* We need to flush the write buffer. */
		if (lseek(fd, write_fd_index, SEEK_SET) == -1) {
			reterr = GUTENFETCH_SEE_ERRNO;
		} else {
			while ((write_buf_index != 0) && (reterr == GUTENFETCH_OK)) {
				write_buf_count = write(fd, write_buffer, write_buf_index);
				if (write_buf_count < 0) {
					reterr = GUTENFETCH_SEE_ERRNO;
				} else {	
					write_fd_index += write_buf_count;
					if (write_buf_count < write_buf_index) {
						memmove(write_buffer, &write_buffer[write_buf_count],
							write_buf_index - write_buf_count);
						write_buf_index = write_buf_count;
					} else {
						write_buf_index = 0;
					}
				}	
			}	
		}
	}

	if( ftruncate(fd, write_fd_index) == -1 ) {
		reterr = GUTENFETCH_SEE_ERRNO;
	}
	
	return reterr;
}

/**
 * gutenfetch_ms_clothe_text_buffer
 *
 * Replace the UNIX <lf> with the windows <cr><lf> in
 * text documents.  This function takes a pointer to a
 * NULL terminated text buffer.  It will probably change
 * the address of the buffer if successful, otherwise, it
 * will be unchanged.
 *
 * @param buffer A pointer to a text buffer to modify.
 * @return GUTENFETCH_OK or an error code.
 */
gutenfetch_error_t
gutenfetch_ms_clothe_text_buffer(char **buffer)
{
	char *new_buffer, *temp;
	char current_byte = 0x01, previous_byte = 0x00;
	size_t new_size;
	size_t i, new_index;

	if (buffer == NULL)
		return GUTENFETCH_BAD_PARAM;
	if (*buffer == NULL)
		return GUTENFETCH_BAD_PARAM;
	
	new_size = 4096;

	new_buffer = malloc(sizeof(char) * new_size);
	if (new_buffer == NULL)
		return GUTENFETCH_NOMEM;
	
	new_index = 0;
	i = 0;
	while (current_byte != '\0') {
		previous_byte = current_byte;
		current_byte = (*buffer)[i++];
		
		if ((previous_byte != 0x0D) && (current_byte == 0x0A)) {
			/* we need to insert the ms <cr><lf> here. */
			new_buffer[new_index++] = 0x0D;
			if (new_index == new_size) { /* we need more memory. */
				new_size *= 2;
				temp = realloc(new_buffer, new_size);
				if (temp == NULL) {
					FREE_NULL(new_buffer);
					return GUTENFETCH_NOMEM;
				}
				new_buffer = temp;
			}
		}
		new_buffer[new_index++] = current_byte;
		if (new_index == new_size) {
			new_size *= 2;
			temp = realloc(new_buffer, new_size);
			if (temp == NULL) {
				FREE_NULL(new_buffer);
				return GUTENFETCH_NOMEM;
			}
			new_buffer = temp;
		}
	}
	/* shrink held memory block to match data.
	   Should always release memory. The result should
	   be NULL terminated allready. */
	temp = realloc(new_buffer, new_index);
	if (temp == NULL) {
		FREE_NULL(new_buffer);
		return GUTENFETCH_NOMEM;
	}
	FREE_NULL(*buffer);
	*buffer = new_buffer = temp;
	return GUTENFETCH_OK;
}

/**
 * gutenfetch_ms_clothe_text_file
 *
 * Given a filename, replace unix <lf> with the
 * windows <cr><lf> newline convention.
 *
 * @param filename The file we wish to modify.
 * @return GUTENFETCH_OK or an error code.
 */
gutenfetch_error_t
gutenfetch_ms_clothe_text_file(char *filename)
{
	gutenfetch_error_t retval = GUTENFETCH_OK;
	int fd;

	if (filename == NULL)
		return GUTENFETCH_BAD_PARAM;

	fd = open(filename, O_RDWR);
	if (fd >= 0) {
		retval = gutenfetch_ms_clothe_text_fd(fd);
		close(fd);
	} else {	
		retval = GUTENFETCH_SEE_ERRNO;
	}
	
	return retval;
}

/**
 * gutenfetch_ms_clothe_text_fd
 *
 * Given a file descriptor, replace the UNIX <LF> with
 * the longer windows <cr><lf>
 *
 * @param fd The filedescriptor
 * @return GUTENFETCH_OK or and error code.
 */
gutenfetch_error_t
gutenfetch_ms_clothe_text_fd(int fd)
{
	gutenfetch_error_t reterr;
	char *buffer = NULL, *temp;
	size_t bufsize = 0;
	size_t bytes_read = 0;
	size_t read_val;
	
	/* start at the beginning of the file. */
	if(lseek(fd, 0, SEEK_SET) == -1)
		return GUTENFETCH_SEE_ERRNO;	

	/* read file into buffer. */
	while (TRUE) {
		if (bytes_read == bufsize) { /* allocate a large buffer. */
			bufsize += 4096;
			temp = realloc(buffer, bufsize);
			if (temp == NULL) {
				FREE_NULL(buffer);
				return GUTENFETCH_NOMEM;
			}
			buffer = temp;
		}
		read_val = read(fd, &buffer[bytes_read], bufsize - bytes_read);
		if (read_val < 0) { /* Error condition */
			FREE_NULL(buffer);
			return GUTENFETCH_SEE_ERRNO;
		} else if (read_val == 0) { /* EOF */
			if (bytes_read == bufsize) { /* allocate a large buffer. */
				bufsize += 1;
				temp = realloc(buffer, bufsize);
				if (temp == NULL) {
					FREE_NULL(buffer);
					return GUTENFETCH_NOMEM;
				}
				buffer = temp;
			}
			buffer[bytes_read] = '\0';
			reterr = gutenfetch_ms_clothe_text_buffer(&buffer);
			break;
		} else { /* more input to read in. */
			bytes_read += read_val;
		}	
	}	
	
	if (reterr == GUTENFETCH_OK) {
		bufsize = 0;
		while (buffer[bufsize] != '\0') /* determine length of buffer */
			bufsize++;
		lseek(fd, 0, SEEK_SET);
		bytes_read = 0;
		while (bytes_read < bufsize) {
			read_val = write(fd, &buffer[bytes_read], bufsize);
			if (read_val == -1) {
				FREE_NULL(buffer);
				reterr = GUTENFETCH_SEE_ERRNO;
				break;
			}
			bytes_read += read_val;
		}	
	}
	
	return reterr;
}

/**
 * gutenfetch_util_get_author
 *
 * Given a one line description of the book, strip out and
 * return the author.
 *
 * @param str The one line description of the book
 * 	from GUTINDEX.ALL
 * @return The author if we can guess it or NULL.
 *	the result must be freed.
 */
char *
gutenfetch_util_get_author(char *str)
{
	char *author = NULL;
	list_t *match = NULL, *lt = NULL;
	
	match = gutenfetch_ifilter_match(IFILTER_AUTHOR, str);
	if (match != NULL) {
		lt = list_next(list_first(match));
		if (lt != NULL)
			author = strdup(lt->data);
		list_remove_all(match, free);
	}
	
	return author;
}

/**
 * gutenfetch_util_get_title
 * 
 * This function takes the one line description from
 * GUTINDEX.ALL and returns the title or NULL.
 *
 * @param str The one line description of the book.
 * @return NULL or the title, if ! NULL, the result
 * 	must be freed.
 */
char *
gutenfetch_util_get_title(char *str)
{
	char *title = NULL;
	list_t *match = NULL, *lt = NULL;
	

	/* Try the old one first as it is stricter and more likely to fail! */
	match = gutenfetch_ifilter_match(IFILTER_OLD_TITLE, str);
	if (match != NULL) {
		lt = list_next(list_first(match));
		if (lt != NULL) {
			title = strdup(lt->data);
		}
		list_remove_all(match, free);
	} else { /* maybe it is a new book entry. */
		match = gutenfetch_ifilter_match(IFILTER_NEW_TITLE, str);
		if (match != NULL) {
			lt = list_next(list_first(match));
			if (lt != NULL) {
				title = strdup(lt->data);
			}
			list_remove_all(match, free);
		}
	}

	return title;
}

/**
 * gutenfetch_util_build_URL
 *
 * Given a server and a file, append the two and return a string.
 * This is sort of a stupid function.
 *
 * @param server The server we will get the file from.
 * @param file The file we will get from the server.
 * @return NULL on failure or a valid URL.
 */
char *
gutenfetch_util_build_URL(gutenfetch_server_t *server, const char *file)
{
	char *url = NULL;
	
	if ((server != NULL) && (file != NULL)) {
		if (strlen(server->host) >= 1) {
			if (server->host[strlen(server->host) - 1] != '/') {
				url = gutenfetch_util_strcat(
					server->host, "/", file, NULL);
			} else {
				url = gutenfetch_util_strcat(
					server->host, file, NULL);
			}
		}
	}

	return url;
}

/**
 * gutenfetch_util_get_temp_dir
 *
 * Return the temporary directory we should use
 * when creating temporary files.
 *
 * @return A NULL-terminated string which should
 * be used as the temporary directory.  This
 * string must be freed after use.  This function
 * will return NULL if a suitable temporary
 * directory could not be found.
 */
char *
gutenfetch_util_get_temp_dir(void)
{
	static int been_called = FALSE;
	static char directory[1024];
	static char *dir = NULL;

	if (been_called == FALSE) {
		been_called = TRUE;
#ifdef WIN32
		// XXX implement for windows.
		dir = NULL;	
#else		
		snprintf(&directory[0], 1024, "/tmp/libgutenfetch%d.XXXX", getpid());
		dir = mkdtemp(directory);
#endif
	}
	if (dir == NULL)
		return NULL;	
	return strdup(dir);
}

/**
 * gutenfetch_util_get_temp_file
 * 
 * Generate a temporary filename and return
 * a file descriptor to it.
 *
 * @param temp_name A pointer to the filename which will be
 *	filled in by this function.
 * @return A valid file descriptor open for read & write
 * 	or -1 on failure.
 */
int
gutenfetch_util_get_temp_file(char **temp_name)
{
	int fd;
	char *dir = gutenfetch_util_get_temp_dir();
	char *file = strdup("ilovekif.XXXX");
	char *full = gutenfetch_util_strcat(dir, DIR_SEPARATOR, file, NULL);
	fd = mkstemp(full);
	if (fd != -1) {
		if (temp_name != NULL) {
			FREE_NULL( *temp_name );
			*temp_name = strdup(full);
		}
	}
	FREE_NULL(dir);
	FREE_NULL(file);
	FREE_NULL(full);
	return fd;
}

/**
 * gutenfetch_util_free_temp_dir
 *
 * This function removes the temporary directory
 * and all files below it.  It is called once by
 * gutenfetch_shutdown before an application exits.
 */
void
gutenfetch_util_free_temp_dir(void)
{
	char *temp_dir = gutenfetch_util_get_temp_dir();
	
	if (temp_dir != NULL) {
		gutenfetch_util_rmdir(temp_dir);
		FREE_NULL(temp_dir);
	}
}

/**
 * gutenfetch_util_rmdir
 *
 * This function takes a directory and removes all files
 * below it as well as the directory itself.
 *
 * @param temp_dir The name of a directory to remove.
 */
void
gutenfetch_util_rmdir(const char *temp_dir)
{
	if (temp_dir == NULL) 
		return;

	gutenfetch_util_rm_below_dir(temp_dir);
	rmdir(temp_dir);
}


/** 
 * gutenfetch_util_rm_below_dir
 *
 * This function takes a directory and removes all files
 * and directories recursively below it.
 *
 * @param temp_dir The name of the directory to remove
 *  all contents from.
 */
void
gutenfetch_util_rm_below_dir(const char *temp_dir)
{
	DIR *dir;
	struct dirent *entry;
	char path[4096];	

	if (temp_dir == NULL)
		return;
	
	dir = opendir(temp_dir);
	if (dir != NULL) {
		while ((entry = readdir(dir)) != NULL) {
			if ( 	(strcmp(entry->d_name, ".") != 0) &&
					(strcmp(entry->d_name, "..") != 0))
			{
				if (entry->d_type == DT_DIR) {
					gutenfetch_util_rmdir(entry->d_name);
				} else {
					snprintf(path, 4096, "%s%s%s", 
						temp_dir,
						DIR_SEPARATOR,
						entry->d_name);
					unlink(path);	
				}
			}
		}
		closedir(dir);
	}
	return;
}

/**
 * gutenfetch_util_rm_old_below_dir
 * 
 * This removes all files below a certain directory
 * whose time has expired.  This is used by the cache
 * functions.
 *
 * @param expires The time_t that files expire.
 * @param temp_dir The directory to scan for old
 *	files.  
 */
void
gutenfetch_util_rm_old_below_dir(
	time_t expires,
	const char *temp_dir)
{
	char path[4096];
	DIR *dir;
	struct dirent *entry;
	struct stat sb;
	time_t now;
	
	if (temp_dir == NULL)
		return;
	
	dir = opendir(temp_dir);
	if (dir != NULL) {
		now = time(NULL);
		while ((entry = readdir(dir)) != NULL) {
			if (	(strcmp(entry->d_name, ".") != 0) &&
					(strcmp(entry->d_name, "..") != 0))
			{
				if (entry->d_type == DT_DIR) {
					gutenfetch_util_rm_old_below_dir(
						expires, entry->d_name);
				} else {
					snprintf(path, 4096, "%s%s%s", 
						temp_dir,
						DIR_SEPARATOR,
						entry->d_name);
					if (stat(path, &sb) == 0) {
						if ( (now - sb.st_atime) > expires ) {
							unlink(path);	
						}	
					}	
				}
			}	
		}
	}
}

/**
 * gutenfetch_util_get_mime_from_filename
 * 
 * Given a filename, return the MIME type of the
 * file.
 *
 * @param filename The filename of the file to find
 *	the mime type of.
 * @return A NULL terminated string which describes the
 * 	mime type of the file.  It must be freed after
 * 	use.
 */
char *
gutenfetch_util_get_mime_from_filename(const char *filename)
{
	char *mime = NULL;
	char *ext = NULL;
	
	if (filename == NULL)
		return NULL;
	
	gutenfetch_util_get_base_ext(NULL, &ext, filename);

	if (ext != NULL) {
		if (strcmp(ext, "zip") == 0) {
			mime = strdup("application/zip");
		} else if (strcmp(ext, "txt") == 0) { 
			mime = strdup("text/plain");		
		} else if (strcmp(ext, "htm") == 0) {
			mime = strdup("text/html");
		} else if (strcmp(ext, "tex") == 0) {
			mime = strdup("tex/plain");
		} else if (strcmp(ext, "xml") == 0) {
			mime = strdup("text/xml");
		} else if (strcmp(ext, "mp3") == 0) {
			mime = strdup("audio/mpeg");
		} else if (strcmp(ext, "rtf") == 0) {
			mime = strdup("text/richtext");
		} else if (strcmp(ext, "pdf") == 0) {
			mime = strdup("application/pdf");
		} else if (strcmp(ext, "lit") == 0) {
			mime = strdup("application/octet-stream");
		} else if (strcmp(ext, "doc") == 0) {
			mime = strdup("application/octet-stream");
		} else if (strcmp(ext, "pdb") == 0) {
			mime = strdup("application/octet-stream");
		} else if (strcmp(ext, "prc") == 0) {
			mime = strdup("application/octet-stream");
		} else {
			mime = strdup("application/octet-stream");
		}
	}
	FREE_NULL(ext);
	return mime;
}

/**
 * gutenfetch_util_extension_is
 *
 * Test the extension of a filename.  Return true if 
 * the extension matches, false if it doesn't
 *
 * @param ext The extension we think might match.
 * @param filename The filename whose extension we 
 * 	are curious about.
 * @return TRUE or FALSE.
 */
int
gutenfetch_util_extension_is(char *ext, char *filename)
{
	int retval = FALSE;
	char *fext = NULL;

	gutenfetch_util_get_base_ext(NULL, &fext, filename);
	if (strcmp(ext, fext) == 0) {
		retval = TRUE;
	}	
	FREE_NULL(fext);
	return retval;
}

void
gutenfetch_util_get_base_ext(char **base, char **ext, const char *filename)
{
	list_t *lt = NULL;
	list_t *match = NULL;
	
	if (filename == NULL) {
		return;
	}
	
	match = gutenfetch_ifilter_match(IFILTER_FILENAME_BASE_EXT, filename);

	if (match != NULL) {
		lt = list_next(list_first(match));
		if (lt != NULL) {
			if (base != NULL) {
				*base = strdup((char*)lt->data);
			}	
		}
		lt = list_next(lt);
		if (lt != NULL) {
			if (ext != NULL) {	
				*ext = strdup((char*)lt->data);
			}	
		}	
		list_remove_all(match, free);
	}
	return;
}


char *
gutenfetch_util_get_home_directory(void)
{
	char *home_dir = NULL;
#ifdef WIN32
	/* Currently, we write a configuration file in the
	   current working directory if we are on windows.
	   This is wrong and a bit of a hack I think, perhaps
	   things should go in the registry?  I really am not
	   sure, if some Windows guru know, feel free to let 
	   me know.
	*/   
#ifndef PATH_MAX
#	define PATH_MAX 256
#endif /* PATH_MAX */
	char hdir[PATH_MAX];
	home_dir = getcwd(home_dir, PATH_MAX);
#else
	home_dir = getenv("HOME");
#endif

	return home_dir;
}

void
gutenfetch_util_build_path(const char *filename)
{
	list_t *lt = NULL;
	list_t *path_list = NULL;
	struct stat sb;
	char *current_dir = NULL;
	char *f = NULL;
	char *g = NULL;
	char *ptr = NULL;
	int stat_val, err = FALSE;

	assert(filename != NULL);

	/* This chops off the initial '/' from the path
	 */ 
	if (strlen(filename) < 2)
		return;
	g = f = strdup(&filename[1]);
	assert(f != NULL);
	
	/* Build the chunks of the path. */
	while((ptr = strsep(&f, DIR_SEPARATOR)) != NULL) {
		path_list = list_append(path_list, ptr);
	}

	/* Remove the last element as the should be the filename. */
	lt = list_last(path_list);
	lt = path_list = list_remove_node(lt, NULL);

	/* iterate through them and ensure that they exist or create them. */
	lt = list_first(lt);
	while ((lt != NULL) && (err == FALSE)){
		if (current_dir == NULL) {
			current_dir = gutenfetch_util_strcat(DIR_SEPARATOR, lt->data, NULL);
		} else {
			f = current_dir;
			current_dir = gutenfetch_util_strcat(current_dir,
				DIR_SEPARATOR, lt->data, NULL);
			FREE_NULL(f);	
		}
		lt = list_next(lt);

		stat_val = stat(current_dir, &sb);
		if (stat_val == 0) {
			if ((sb.st_mode | S_IFDIR) == 0) {
				/* path is not a directory !!! */
				err = TRUE;
			}
		} else {
			if (errno == ENOENT) {
				if(mkdir(current_dir, S_IRWXU | S_IRGRP | S_IXGRP) == -1) { 
					/* Error creating directory. */
					err = TRUE;
				}
			} else {
				/* Some other error with stat ??? */
				err = TRUE;
			}
		}
	}
	
	list_remove_all(path_list, NULL);
	FREE_NULL(current_dir);
	FREE_NULL(g);
}

/**
 * gutenfetch_util_move
 *
 * Given two filenames, copy the first one to the second one.
 * It will first try to rename them, which should work if 
 * they are on the same partition, if not, it will manually
 * copy all the bytes.
 *
 * @param from The full path/filename of the file to move.
 * @param to The full path/filename of the destination.
 * @return 1 on success, -1 on failure.
 */
int
gutenfetch_util_move(const char *from, const char *to)
{
#define BUF_SIZE 8192
	char buf[BUF_SIZE];
	int from_fd, to_fd;
	ssize_t bytes_read, bytes_written;

	if ((from == NULL) || (to == NULL))
		return( -1 );
	
	/* Build the path to the to file if it doesn't exist. */
	gutenfetch_util_build_path(to);

	/* First try to just rename the file as this will be 
	 * much quicker if it is allowed.  If not, we will manually
	 * copy the files.
	 */
	if (rename(from, to) == -1) {
		from_fd = open(from, O_RDONLY);

		if (from_fd == -1) {
			return( -1 );
		}
	
		to_fd = open(to, 
			O_WRONLY | O_CREAT | O_TRUNC,
			S_IRWXU | S_IRGRP | S_IXGRP);
		if (to_fd == -1) {
			close(from_fd);
			return( -1 );
		}

		while ((bytes_read = read(from_fd, buf, BUF_SIZE)) > 0) {
			bytes_written = write(to_fd, buf, bytes_read);
			if (bytes_read != bytes_written) {
				close(from_fd);
				close(to_fd);
				return( -1 );
			}
		}
		close(from_fd);
		close(to_fd);

		if (bytes_read != 0)
			return( -1 );
		/* Remove the from file, this makes it have
		 * the same semantics as rename.
		 */
		unlink(from);	
	}	
	return( 1 );	
}

/**
 * gutenfetch_util_read_file_to_buffer
 *
 * Given a valid file descriptor, read the contents into
 * a buffer and return the result.
 *
 *  ** NOTE **
 *
 * 		This should only be used to read text files,
 * 		if it is used on a binary file, it may result
 *		in not being able to read after the first zero.
 *
 * @param fd The file descriptor to read from.
 * @return A NULL-terminated buffer with the contents of the 
 * 	file.
 */
char *
gutenfetch_util_read_file_to_buffer(int fd)
{
	ssize_t rw = -1;
	size_t i = 0;
	size_t buf_size = 0;
	size_t block_size = 4096;
	char *buffer = NULL;
	char *temp = NULL;

	if (fd != -1) {
		lseek(fd, 0, SEEK_SET);
		do {
			if (i + block_size > buf_size) {
				temp = realloc(buffer, buf_size + block_size);
				if (temp != NULL) {
					buffer = temp;
					buf_size += block_size;
				} else {
					FREE_NULL(buffer);
					return( NULL );
				}	
			}
			rw = read(fd, &buffer[i], block_size);
			if (rw > 0) {
				i += rw;
			}
		} while(rw > 0);
	}
	
	if( rw < 0 ) {
		FREE_NULL(buffer);
		return( NULL );
	}

	/* NULL-terminate the result. */
	if (buffer != NULL) {
		temp = realloc(buffer, i + 1);
		if (temp != NULL) {
			temp[i] = '\0';
			buffer = temp;
		} else {
			FREE_NULL(buffer);
		}
	}	
	return buffer;
}

/**
 * gutenfetch_util_read_binary_file_to_buffer
 *
 * Given a valid file descriptor, read the contents into
 * a buffer user supplied buffer.  Use this function when
 * the source may or may not be a binary file or contain
 * '\0' within the text body.
 *
 * @param fd The file descriptor to read from.
 * @param buf The buffer to return.
 * @param size The number of bytes returned in the buffer.
 * @return GUTENFETCH_OK on success, something else on error.
 */
gutenfetch_error_t 
gutenfetch_util_read_binary_file_to_buffer(
	int fd,
	char **buf,
	size_t *size)
{
	ssize_t rw = -1;
	size_t i = 0;
	size_t buf_size = 0;
	size_t block_size = 4096;
	char *buffer = NULL;
	char *temp = NULL;

	assert(size != NULL);
	assert(buf != NULL);

	FREE_NULL(*buf);
		
	if (fd != -1) {
		if( lseek(fd, 0, SEEK_SET) == -1) {
			return( GUTENFETCH_SEE_ERRNO );
		}
		do {
			if (i + block_size > buf_size) {
				temp = realloc(buffer, buf_size + block_size);
				if (temp != NULL) {
					buffer = temp;
					buf_size += block_size;
				} else {
					FREE_NULL(buffer);
					return( GUTENFETCH_NOMEM );
				}	
			}
			rw = read(fd, &buffer[i], block_size);
			if (rw > 0) {
				i += rw;
			}
		} while(rw > 0);
	}

	if( rw < 0 ) {
		FREE_NULL( buffer );
		return( GUTENFETCH_SEE_ERRNO );
	}

	/* Shrink memory block and set return values
	 * to correct values. */
	if (buffer != NULL) {
		temp = realloc(buffer, i + 1);
		temp[i] = '\0';
		if (temp != NULL) {
			*buf = buffer = temp;
			*size = i;
		} else {
			FREE_NULL(buffer);
			return( GUTENFETCH_NOMEM );
		}
	}	
	return( GUTENFETCH_OK );
}
syntax highlighted by Code2HTML, v. 0.9.1