/*
gutenfetch - a small utility to list and fetch books available through
project gutenberg
Copyright (C) 2001, 2002, 2003, 2004 Russell Francis
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the
Free Software Foundation, Inc.
59 Temple Place, Suite 330
Boston, MA 02111-1307 USA
Last updated on $Date: 2004/07/20 00:23:59 $ by $Author: johntabularasa $.
*/
#include "stddefs.h"
#ifdef HAVE_STDIO_H
# include <stdio.h>
#endif
#ifdef HAVE_STDLIB_H
# include <stdlib.h>
#endif
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
#ifdef HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif
#ifdef HAVE_SYS_UIO_H
# include <sys/uio.h>
#endif
#ifdef HAVE_SYS_STAT_H
# include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
# include <fcntl.h>
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#endif
#ifdef HAVE_STRINGS_H
# include <strings.h>
#endif
#ifdef HAVE_STDARG_H
# include <stdarg.h>
#endif
#ifdef HAVE_ASSERT_H
# include <assert.h>
#endif
#ifdef HAVE_DIRENT_H
# include <dirent.h>
#endif
#ifdef HAVE_ERRNO_H
# include <errno.h>
#endif
#include "list.h"
#include "libgutenfetch_filter.h"
#include "libgutenfetch_utility.h"
#include "gutenfetch.h"
#ifndef HAVE_STRSEP
/**
* The strsep() function locates, in the string referenced by *stringp, the
* first occurrence of any character in the string delim (or the terminating
* `\0' character) and replaces it with a `\0'. The location of the next
* character after the delimiter character (or NULL, if the end of the
* string was reached) is stored in *stringp. The original value of
* *stringp is returned.
*
* An ``empty'' field (i.e., a character in the string delim occurs as the
* first character of *stringp) can be detected by comparing the location
* referenced by the returned pointer to `\0'.
*
* If *stringp is initially NULL, strsep() returns NULL.
*
* @param stringp The string to separate.
* @param delim The character to separate the string stringp on.
* @return NULL or the next character after the first found delim.
*/
char *
strsep(char **stringp, const char *delim)
{
char *res;
if (!stringp || !*stringp || !**stringp)
return NULL;
res = *stringp;
while (**stringp && !strchr(delim, **stringp))
++(*stringp);
if (**stringp) {
**stringp = '\0';
++(*stringp);
}
return res;
}
#endif /* HAVE_STRSEP */
#ifndef HAVE_MERGESORT
/*-
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* The following mergesort function and supporting functions were taken
* from FreeBSD to provide a sorting routine on machines which do not
* provide mergesort().
*
* Russell Francis <johntabularasa@users.sf.net> 11-15-03
*/
#ifndef u_char
# define u_char unsigned char
#endif
#define ISIZE sizeof(int)
#define PSIZE sizeof(u_char *)
#define ICOPY_LIST(src, dst, last) \
do \
*(int*)dst = *(int*)src, src += ISIZE, dst += ISIZE; \
while(src < last)
#define ICOPY_ELT(src, dst, i) \
do \
*(int*) dst = *(int*) src, src += ISIZE, dst += ISIZE; \
while (i -= ISIZE)
#define CCOPY_LIST(src, dst, last) \
do \
*dst++ = *src++; \
while (src < last)
#define CCOPY_ELT(src, dst, i) \
do \
*dst++ = *src++; \
while (i -= 1)
/*
* Find the next possible pointer head. (Trickery for forcing an array
* to do double duty as a linked list when objects do not align with word
* boundaries.
*/
/* Assumption: PSIZE is a power of 2. */
#define EVAL(p) (u_char **) \
((u_char *)0 + \
(((u_char *)p + PSIZE - 1 - (u_char *) 0) & ~(PSIZE - 1)))
static void
mergesort_insertionsort(u_char *, size_t, size_t, int (*)(const void *, const void *));
void
mergesort_setup(u_char *, u_char *, size_t, size_t, int(*)(const void *, const void *));
int
mergesort(void *, size_t, size_t, int(*)(const void *, const void *));
/*
* Arguments are as for qsort.
*/
int
mergesort(base, nmemb, size, cmp)
void *base;
size_t nmemb;
size_t size;
int (*cmp)(const void *, const void *);
{
int i, sense;
int big, iflag;
u_char *f1, *f2, *t, *b, *tp2, *q, *l1, *l2;
u_char *list2, *list1, *p2, *p, *last, **p1;
if (size < PSIZE / 2) { /* Pointers must fit into 2 * size. */
errno = EINVAL;
return (-1);
}
if (nmemb == 0)
return (0);
/*
* XXX
* Stupid subtraction for the Cray.
*/
iflag = 0;
if (!(size % ISIZE) && !(((char *)base - (char *)0) % ISIZE))
iflag = 1;
if ((list2 = malloc(nmemb * size + PSIZE)) == NULL)
return (-1);
list1 = base;
mergesort_setup(list1, list2, nmemb, size, cmp);
last = list2 + nmemb * size;
i = big = 0;
while (*EVAL(list2) != last) {
l2 = list1;
p1 = EVAL(list1);
for (tp2 = p2 = list2; p2 != last; p1 = EVAL(l2)) {
p2 = *EVAL(p2);
f1 = l2;
f2 = l1 = list1 + (p2 - list2);
if (p2 != last)
p2 = *EVAL(p2);
l2 = list1 + (p2 - list2);
while (f1 < l1 && f2 < l2) {
if ((*cmp)(f1, f2) <= 0) {
q = f2;
b = f1, t = l1;
sense = -1;
} else {
q = f1;
b = f2, t = l2;
sense = 0;
}
if (!big) { /* here i = 0 */
while ((b += size) < t && cmp(q, b) >sense)
if (++i == 6) {
big = 1;
goto EXPONENTIAL;
}
} else {
EXPONENTIAL: for (i = size; ; i <<= 1)
if ((p = (b + i)) >= t) {
if ((p = t - size) > b &&
(*cmp)(q, p) <= sense)
t = p;
else
b = p;
break;
} else if ((*cmp)(q, p) <= sense) {
t = p;
if (i == size)
big = 0;
goto FASTCASE;
} else
b = p;
while (t > b+size) {
i = (((t - b) / size) >> 1) * size;
if ((*cmp)(q, p = b + i) <= sense)
t = p;
else
b = p;
}
goto COPY;
FASTCASE: while (i > size)
if ((*cmp)(q,
p = b + (i >>= 1)) <= sense)
t = p;
else
b = p;
COPY: b = t;
}
i = size;
if (q == f1) {
if (iflag) {
ICOPY_LIST(f2, tp2, b);
ICOPY_ELT(f1, tp2, i);
} else {
CCOPY_LIST(f2, tp2, b);
CCOPY_ELT(f1, tp2, i);
}
} else {
if (iflag) {
ICOPY_LIST(f1, tp2, b);
ICOPY_ELT(f2, tp2, i);
} else {
CCOPY_LIST(f1, tp2, b);
CCOPY_ELT(f2, tp2, i);
}
}
}
if (f2 < l2) {
if (iflag)
ICOPY_LIST(f2, tp2, l2);
else
CCOPY_LIST(f2, tp2, l2);
} else if (f1 < l1) {
if (iflag)
ICOPY_LIST(f1, tp2, l1);
else
CCOPY_LIST(f1, tp2, l1);
}
*p1 = l2;
}
tp2 = list1; /* swap list1, list2 */
list1 = list2;
list2 = tp2;
last = list2 + nmemb*size;
}
if (base == list2) {
memmove(list2, list1, nmemb*size);
list2 = list1;
}
free(list2);
return (0);
}
#define swap(a, b) { \
s = b; \
i = size; \
do { \
tmp = *a; *a++ = *s; *s++ = tmp; \
} while (--i); \
a -= size; \
}
#define reverse(bot, top) { \
s = top; \
do { \
i = size; \
do { \
tmp = *bot; *bot++ = *s; *s++ = tmp; \
} while (--i); \
s -= size2; \
} while(bot < s); \
}
/*
* Optional hybrid natural/pairwise first pass. Eats up list1 in runs of
* increasing order, list2 in a corresponding linked list. Checks for runs
* when THRESHOLD/2 pairs compare with same sense. (Only used when NATURAL
* is defined. Otherwise simple pairwise merging is used.)
*/
void
mergesort_setup(list1, list2, n, size, cmp)
size_t n, size;
int (*cmp)(const void *, const void *);
u_char *list1, *list2;
{
int i, length, size2, tmp, sense;
u_char *f1, *f2, *s, *l2, *last, *p2;
size2 = size*2;
if (n <= 5) {
mergesort_insertionsort(list1, n, size, cmp);
*EVAL(list2) = (u_char*) list2 + n*size;
return;
}
/*
* Avoid running pointers out of bounds; limit n to evens
* for simplicity.
*/
i = 4 + (n & 1);
mergesort_insertionsort(list1 + (n - i) * size, i, size, cmp);
last = list1 + size * (n - i);
*EVAL(list2 + (last - list1)) = list2 + n * size;
#ifdef NATURAL
p2 = list2;
f1 = list1;
sense = (cmp(f1, f1 + size) > 0);
for (; f1 < last; sense = !sense) {
length = 2;
/* Find pairs with same sense. */
for (f2 = f1 + size2; f2 < last; f2 += size2) {
if ((cmp(f2, f2+ size) > 0) != sense)
break;
length += 2;
}
if (length < THRESHOLD) { /* Pairwise merge */
do {
p2 = *EVAL(p2) = f1 + size2 - list1 + list2;
if (sense > 0)
swap (f1, f1 + size);
} while ((f1 += size2) < f2);
} else { /* Natural merge */
l2 = f2;
for (f2 = f1 + size2; f2 < l2; f2 += size2) {
if ((cmp(f2-size, f2) > 0) != sense) {
p2 = *EVAL(p2) = f2 - list1 + list2;
if (sense > 0)
reverse(f1, f2-size);
f1 = f2;
}
}
if (sense > 0)
reverse (f1, f2-size);
f1 = f2;
if (f2 < last || cmp(f2 - size, f2) > 0)
p2 = *EVAL(p2) = f2 - list1 + list2;
else
p2 = *EVAL(p2) = list2 + n*size;
}
}
#else /* pairwise merge only. */
for (f1 = list1, p2 = list2; f1 < last; f1 += size2) {
p2 = *EVAL(p2) = p2 + size2;
if (cmp (f1, f1 + size) > 0)
swap(f1, f1 + size);
}
#endif /* NATURAL */
}
/*
* This is to avoid out-of-bounds addresses in sorting the
* last 4 elements.
*/
static void
mergesort_insertionsort(a, n, size, cmp)
u_char *a;
size_t n, size;
int (*cmp)(const void *, const void *);
{
u_char *ai, *s, *t, *u, tmp;
int i;
for (ai = a+size; --n >= 1; ai += size)
for (t = ai; t > a; t -= size) {
u = t - size;
if (cmp(u, t) <= 0)
break;
swap(u, t);
}
}
#endif /* HAVE_MERGESORT */
/**
* gutenfetch_util_strcat concatenates strings into one large string.
*
* This function takes a NULL terminated list of NULL terminated
* strings and concatenates them together.
*
* @param a A NULL terminated string.
* @return A string which must be freed when finished. It will
* be either the concatenation of all the strings passed in
* or will be NULL.
*/
char *
gutenfetch_util_strcat(char *a, ...)
{
va_list ap;
size_t len = 1; /* space for null terminator */
list_t *l = NULL;
list_t *head = NULL;
char *item;
assert (a != NULL);
len += strlen(a);
l = list_append(l, a);
va_start(ap, a);
while ( (item = va_arg(ap, char *)) != NULL ) {
len += strlen(item);
l = list_append(l, item);
}
va_end(ap);
item = malloc(sizeof(char) * len);
if ( item == NULL ) {
fprintf(stderr, _("Unable to allocate %u bytes of memory."),
(sizeof(char) * len));
abort();
}
head = l = list_first(l);
strcpy(item, l->data);
for(l = list_next(l); l != NULL ; l = list_next(l))
strcat(item, l->data);
list_remove_all(head, NULL);
return item;
}
/**
* Get the next line from a file.
*
* This function returns the characters in a file up to the '\n'
* character. The returned result is a NULL terminated string.
*
* @param fp The file pointer of the file to read from.
* @return a NULL terminated string which contains the next line
* in the file.
*/
char *
gutenfetch_util_getline(FILE *fp)
{
char *line = NULL;
char *temp = NULL;
size_t total_size;
size_t i;
for (i = 0, total_size = 0; TRUE ; ++i) {
/* allocate more memory for the line if needed. */
if(i == total_size){
total_size += BLOCK_SIZE;
temp = realloc(line, sizeof(char) * total_size);
if (temp == NULL) {
FREE_NULL(line);
/*if (opt_get_verbose()) {
fprintf(stderr,
_("Unable to allocate %u bytes of memory."),
sizeof(char) * total_size);
}*/
break;
}
line = temp;
}
/* Read the next character into the line */
if((line[i] = fgetc(fp)) == '\n'){
line[i] = '\0';
break;
}else if(line[i] == EOF){
if(i == 0) {
FREE_NULL(line);
} else {
line[i] = '\0';
}
break;
}
}
return line;
}
/**
* gutenfetch_ms_strip_text_buffer
*
* Strip the Windows world 0D 0A from the end of lines and
* replace with the unix friendly, 0A.
*
* @param buffer The text buffer we wish to modify. This must be
* NULL terminated or else!
* @return An error code or GUTENFETCH_OK
*/
gutenfetch_error_t
gutenfetch_ms_strip_text_buffer(char *buffer)
{
size_t writing_to = 0;
size_t reading_from = 0;
unsigned char current_byte = 0x00;
unsigned char last_byte = 0x00;
if (buffer == NULL)
return GUTENFETCH_BAD_PARAM;
/* This routine should be pretty quick and only
require one pass over the data. */
while (buffer[reading_from] != '\0') {
last_byte = current_byte;
current_byte = buffer[reading_from];
if ((last_byte == 0x0D) && (current_byte == 0x0A))
writing_to--;
buffer[writing_to] = current_byte;
reading_from++;
writing_to++;
}
buffer[writing_to] = '\0'; /* NULL terminate the result. */
return GUTENFETCH_OK;
}
/**
* gutenfetch_ms_strip_text_file
*
* This routine strips the MS <CR><LF> from a text file and replaces
* them with the UNIX friendly <LF>.
*
* @param filename The filename of the text file we wish to modify.
* @return GUTENFETCH_OK or an error code indicating the error.
*/
gutenfetch_error_t
gutenfetch_ms_strip_text_file(char *filename)
{
gutenfetch_error_t retval = GUTENFETCH_OK;
int fd;
if (filename == NULL)
return GUTENFETCH_BAD_PARAM;
fd = open(filename, O_RDWR);
if (fd >= 0) {
retval = gutenfetch_ms_strip_text_fd(fd);
close(fd);
} else {
retval = GUTENFETCH_SEE_ERRNO;
}
return retval;
}
/**
* gutenfetch_ms_strip_fd
*
* Strip Windows <CR><LF> from a text file.
*
* @param fd The file descriptor of an open r/w file to modify.
* @return GUTENFETCH_OK or an error.
*/
gutenfetch_error_t
gutenfetch_ms_strip_text_fd(int fd)
{
#define READ_BUFFER_SIZE 4096
#define WRITE_BUFFER_SIZE 4096
char read_buffer[READ_BUFFER_SIZE], write_buffer[WRITE_BUFFER_SIZE];
size_t read_buf_count = 0, write_buf_count = 0;
size_t read_buf_index = 0, write_buf_index = 0;
size_t read_fd_index = 0, write_fd_index = 0;
char last_byte = 0x00, current_byte = 0x00;
gutenfetch_error_t reterr = GUTENFETCH_OK;
read_buf_index = lseek(fd, 0, SEEK_SET); /* make sure it is seekable. */
if (read_buf_index == -1)
return GUTENFETCH_SEE_ERRNO;
while (TRUE) {
if (read_buf_index == read_buf_count) {
if (lseek(fd, read_fd_index, SEEK_SET) == -1) {
reterr = GUTENFETCH_SEE_ERRNO;
break;
}
read_buf_index = 0;
read_buf_count = read(fd, read_buffer, READ_BUFFER_SIZE);
if (read_buf_count < 0) { /* Error condition */
reterr = GUTENFETCH_SEE_ERRNO;
break;
}
read_fd_index += read_buf_count;
if (read_buf_count == 0) { /* EOF and clean exit */
break;
}
}
last_byte = current_byte;
current_byte = read_buffer[read_buf_index++];
if ((last_byte == 0x0D) && (current_byte == 0x0A))
write_buf_index--;
write_buffer[write_buf_index++] = current_byte;
if (write_buf_index == WRITE_BUFFER_SIZE) { /* flush the output */
if (lseek(fd, write_fd_index, SEEK_SET) == -1) {
reterr = GUTENFETCH_SEE_ERRNO;
break;
}
write_buf_count = write(fd, write_buffer, write_buf_index);
if (write_buf_count < 0) {
reterr = GUTENFETCH_SEE_ERRNO;
break;
}
write_fd_index += write_buf_count;
if (write_buf_count < write_buf_index) {
memmove(write_buffer, &write_buffer[write_buf_count],
write_buf_index - write_buf_count);
write_buf_index = write_buf_count;
} else {
write_buf_index = 0;
}
}
}
if (write_buf_index != 0) { /* We need to flush the write buffer. */
if (lseek(fd, write_fd_index, SEEK_SET) == -1) {
reterr = GUTENFETCH_SEE_ERRNO;
} else {
while ((write_buf_index != 0) && (reterr == GUTENFETCH_OK)) {
write_buf_count = write(fd, write_buffer, write_buf_index);
if (write_buf_count < 0) {
reterr = GUTENFETCH_SEE_ERRNO;
} else {
write_fd_index += write_buf_count;
if (write_buf_count < write_buf_index) {
memmove(write_buffer, &write_buffer[write_buf_count],
write_buf_index - write_buf_count);
write_buf_index = write_buf_count;
} else {
write_buf_index = 0;
}
}
}
}
}
if( ftruncate(fd, write_fd_index) == -1 ) {
reterr = GUTENFETCH_SEE_ERRNO;
}
return reterr;
}
/**
* gutenfetch_ms_clothe_text_buffer
*
* Replace the UNIX <lf> with the windows <cr><lf> in
* text documents. This function takes a pointer to a
* NULL terminated text buffer. It will probably change
* the address of the buffer if successful, otherwise, it
* will be unchanged.
*
* @param buffer A pointer to a text buffer to modify.
* @return GUTENFETCH_OK or an error code.
*/
gutenfetch_error_t
gutenfetch_ms_clothe_text_buffer(char **buffer)
{
char *new_buffer, *temp;
char current_byte = 0x01, previous_byte = 0x00;
size_t new_size;
size_t i, new_index;
if (buffer == NULL)
return GUTENFETCH_BAD_PARAM;
if (*buffer == NULL)
return GUTENFETCH_BAD_PARAM;
new_size = 4096;
new_buffer = malloc(sizeof(char) * new_size);
if (new_buffer == NULL)
return GUTENFETCH_NOMEM;
new_index = 0;
i = 0;
while (current_byte != '\0') {
previous_byte = current_byte;
current_byte = (*buffer)[i++];
if ((previous_byte != 0x0D) && (current_byte == 0x0A)) {
/* we need to insert the ms <cr><lf> here. */
new_buffer[new_index++] = 0x0D;
if (new_index == new_size) { /* we need more memory. */
new_size *= 2;
temp = realloc(new_buffer, new_size);
if (temp == NULL) {
FREE_NULL(new_buffer);
return GUTENFETCH_NOMEM;
}
new_buffer = temp;
}
}
new_buffer[new_index++] = current_byte;
if (new_index == new_size) {
new_size *= 2;
temp = realloc(new_buffer, new_size);
if (temp == NULL) {
FREE_NULL(new_buffer);
return GUTENFETCH_NOMEM;
}
new_buffer = temp;
}
}
/* shrink held memory block to match data.
Should always release memory. The result should
be NULL terminated allready. */
temp = realloc(new_buffer, new_index);
if (temp == NULL) {
FREE_NULL(new_buffer);
return GUTENFETCH_NOMEM;
}
FREE_NULL(*buffer);
*buffer = new_buffer = temp;
return GUTENFETCH_OK;
}
/**
* gutenfetch_ms_clothe_text_file
*
* Given a filename, replace unix <lf> with the
* windows <cr><lf> newline convention.
*
* @param filename The file we wish to modify.
* @return GUTENFETCH_OK or an error code.
*/
gutenfetch_error_t
gutenfetch_ms_clothe_text_file(char *filename)
{
gutenfetch_error_t retval = GUTENFETCH_OK;
int fd;
if (filename == NULL)
return GUTENFETCH_BAD_PARAM;
fd = open(filename, O_RDWR);
if (fd >= 0) {
retval = gutenfetch_ms_clothe_text_fd(fd);
close(fd);
} else {
retval = GUTENFETCH_SEE_ERRNO;
}
return retval;
}
/**
* gutenfetch_ms_clothe_text_fd
*
* Given a file descriptor, replace the UNIX <LF> with
* the longer windows <cr><lf>
*
* @param fd The filedescriptor
* @return GUTENFETCH_OK or and error code.
*/
gutenfetch_error_t
gutenfetch_ms_clothe_text_fd(int fd)
{
gutenfetch_error_t reterr;
char *buffer = NULL, *temp;
size_t bufsize = 0;
size_t bytes_read = 0;
size_t read_val;
/* start at the beginning of the file. */
if(lseek(fd, 0, SEEK_SET) == -1)
return GUTENFETCH_SEE_ERRNO;
/* read file into buffer. */
while (TRUE) {
if (bytes_read == bufsize) { /* allocate a large buffer. */
bufsize += 4096;
temp = realloc(buffer, bufsize);
if (temp == NULL) {
FREE_NULL(buffer);
return GUTENFETCH_NOMEM;
}
buffer = temp;
}
read_val = read(fd, &buffer[bytes_read], bufsize - bytes_read);
if (read_val < 0) { /* Error condition */
FREE_NULL(buffer);
return GUTENFETCH_SEE_ERRNO;
} else if (read_val == 0) { /* EOF */
if (bytes_read == bufsize) { /* allocate a large buffer. */
bufsize += 1;
temp = realloc(buffer, bufsize);
if (temp == NULL) {
FREE_NULL(buffer);
return GUTENFETCH_NOMEM;
}
buffer = temp;
}
buffer[bytes_read] = '\0';
reterr = gutenfetch_ms_clothe_text_buffer(&buffer);
break;
} else { /* more input to read in. */
bytes_read += read_val;
}
}
if (reterr == GUTENFETCH_OK) {
bufsize = 0;
while (buffer[bufsize] != '\0') /* determine length of buffer */
bufsize++;
lseek(fd, 0, SEEK_SET);
bytes_read = 0;
while (bytes_read < bufsize) {
read_val = write(fd, &buffer[bytes_read], bufsize);
if (read_val == -1) {
FREE_NULL(buffer);
reterr = GUTENFETCH_SEE_ERRNO;
break;
}
bytes_read += read_val;
}
}
return reterr;
}
/**
* gutenfetch_util_get_author
*
* Given a one line description of the book, strip out and
* return the author.
*
* @param str The one line description of the book
* from GUTINDEX.ALL
* @return The author if we can guess it or NULL.
* the result must be freed.
*/
char *
gutenfetch_util_get_author(char *str)
{
char *author = NULL;
list_t *match = NULL, *lt = NULL;
match = gutenfetch_ifilter_match(IFILTER_AUTHOR, str);
if (match != NULL) {
lt = list_next(list_first(match));
if (lt != NULL)
author = strdup(lt->data);
list_remove_all(match, free);
}
return author;
}
/**
* gutenfetch_util_get_title
*
* This function takes the one line description from
* GUTINDEX.ALL and returns the title or NULL.
*
* @param str The one line description of the book.
* @return NULL or the title, if ! NULL, the result
* must be freed.
*/
char *
gutenfetch_util_get_title(char *str)
{
char *title = NULL;
list_t *match = NULL, *lt = NULL;
/* Try the old one first as it is stricter and more likely to fail! */
match = gutenfetch_ifilter_match(IFILTER_OLD_TITLE, str);
if (match != NULL) {
lt = list_next(list_first(match));
if (lt != NULL) {
title = strdup(lt->data);
}
list_remove_all(match, free);
} else { /* maybe it is a new book entry. */
match = gutenfetch_ifilter_match(IFILTER_NEW_TITLE, str);
if (match != NULL) {
lt = list_next(list_first(match));
if (lt != NULL) {
title = strdup(lt->data);
}
list_remove_all(match, free);
}
}
return title;
}
/**
* gutenfetch_util_build_URL
*
* Given a server and a file, append the two and return a string.
* This is sort of a stupid function.
*
* @param server The server we will get the file from.
* @param file The file we will get from the server.
* @return NULL on failure or a valid URL.
*/
char *
gutenfetch_util_build_URL(gutenfetch_server_t *server, const char *file)
{
char *url = NULL;
if ((server != NULL) && (file != NULL)) {
if (strlen(server->host) >= 1) {
if (server->host[strlen(server->host) - 1] != '/') {
url = gutenfetch_util_strcat(
server->host, "/", file, NULL);
} else {
url = gutenfetch_util_strcat(
server->host, file, NULL);
}
}
}
return url;
}
/**
* gutenfetch_util_get_temp_dir
*
* Return the temporary directory we should use
* when creating temporary files.
*
* @return A NULL-terminated string which should
* be used as the temporary directory. This
* string must be freed after use. This function
* will return NULL if a suitable temporary
* directory could not be found.
*/
char *
gutenfetch_util_get_temp_dir(void)
{
static int been_called = FALSE;
static char directory[1024];
static char *dir = NULL;
if (been_called == FALSE) {
been_called = TRUE;
#ifdef WIN32
// XXX implement for windows.
dir = NULL;
#else
snprintf(&directory[0], 1024, "/tmp/libgutenfetch%d.XXXX", getpid());
dir = mkdtemp(directory);
#endif
}
if (dir == NULL)
return NULL;
return strdup(dir);
}
/**
* gutenfetch_util_get_temp_file
*
* Generate a temporary filename and return
* a file descriptor to it.
*
* @param temp_name A pointer to the filename which will be
* filled in by this function.
* @return A valid file descriptor open for read & write
* or -1 on failure.
*/
int
gutenfetch_util_get_temp_file(char **temp_name)
{
int fd;
char *dir = gutenfetch_util_get_temp_dir();
char *file = strdup("ilovekif.XXXX");
char *full = gutenfetch_util_strcat(dir, DIR_SEPARATOR, file, NULL);
fd = mkstemp(full);
if (fd != -1) {
if (temp_name != NULL) {
FREE_NULL( *temp_name );
*temp_name = strdup(full);
}
}
FREE_NULL(dir);
FREE_NULL(file);
FREE_NULL(full);
return fd;
}
/**
* gutenfetch_util_free_temp_dir
*
* This function removes the temporary directory
* and all files below it. It is called once by
* gutenfetch_shutdown before an application exits.
*/
void
gutenfetch_util_free_temp_dir(void)
{
char *temp_dir = gutenfetch_util_get_temp_dir();
if (temp_dir != NULL) {
gutenfetch_util_rmdir(temp_dir);
FREE_NULL(temp_dir);
}
}
/**
* gutenfetch_util_rmdir
*
* This function takes a directory and removes all files
* below it as well as the directory itself.
*
* @param temp_dir The name of a directory to remove.
*/
void
gutenfetch_util_rmdir(const char *temp_dir)
{
if (temp_dir == NULL)
return;
gutenfetch_util_rm_below_dir(temp_dir);
rmdir(temp_dir);
}
/**
* gutenfetch_util_rm_below_dir
*
* This function takes a directory and removes all files
* and directories recursively below it.
*
* @param temp_dir The name of the directory to remove
* all contents from.
*/
void
gutenfetch_util_rm_below_dir(const char *temp_dir)
{
DIR *dir;
struct dirent *entry;
char path[4096];
if (temp_dir == NULL)
return;
dir = opendir(temp_dir);
if (dir != NULL) {
while ((entry = readdir(dir)) != NULL) {
if ( (strcmp(entry->d_name, ".") != 0) &&
(strcmp(entry->d_name, "..") != 0))
{
if (entry->d_type == DT_DIR) {
gutenfetch_util_rmdir(entry->d_name);
} else {
snprintf(path, 4096, "%s%s%s",
temp_dir,
DIR_SEPARATOR,
entry->d_name);
unlink(path);
}
}
}
closedir(dir);
}
return;
}
/**
* gutenfetch_util_rm_old_below_dir
*
* This removes all files below a certain directory
* whose time has expired. This is used by the cache
* functions.
*
* @param expires The time_t that files expire.
* @param temp_dir The directory to scan for old
* files.
*/
void
gutenfetch_util_rm_old_below_dir(
time_t expires,
const char *temp_dir)
{
char path[4096];
DIR *dir;
struct dirent *entry;
struct stat sb;
time_t now;
if (temp_dir == NULL)
return;
dir = opendir(temp_dir);
if (dir != NULL) {
now = time(NULL);
while ((entry = readdir(dir)) != NULL) {
if ( (strcmp(entry->d_name, ".") != 0) &&
(strcmp(entry->d_name, "..") != 0))
{
if (entry->d_type == DT_DIR) {
gutenfetch_util_rm_old_below_dir(
expires, entry->d_name);
} else {
snprintf(path, 4096, "%s%s%s",
temp_dir,
DIR_SEPARATOR,
entry->d_name);
if (stat(path, &sb) == 0) {
if ( (now - sb.st_atime) > expires ) {
unlink(path);
}
}
}
}
}
}
}
/**
* gutenfetch_util_get_mime_from_filename
*
* Given a filename, return the MIME type of the
* file.
*
* @param filename The filename of the file to find
* the mime type of.
* @return A NULL terminated string which describes the
* mime type of the file. It must be freed after
* use.
*/
char *
gutenfetch_util_get_mime_from_filename(const char *filename)
{
char *mime = NULL;
char *ext = NULL;
if (filename == NULL)
return NULL;
gutenfetch_util_get_base_ext(NULL, &ext, filename);
if (ext != NULL) {
if (strcmp(ext, "zip") == 0) {
mime = strdup("application/zip");
} else if (strcmp(ext, "txt") == 0) {
mime = strdup("text/plain");
} else if (strcmp(ext, "htm") == 0) {
mime = strdup("text/html");
} else if (strcmp(ext, "tex") == 0) {
mime = strdup("tex/plain");
} else if (strcmp(ext, "xml") == 0) {
mime = strdup("text/xml");
} else if (strcmp(ext, "mp3") == 0) {
mime = strdup("audio/mpeg");
} else if (strcmp(ext, "rtf") == 0) {
mime = strdup("text/richtext");
} else if (strcmp(ext, "pdf") == 0) {
mime = strdup("application/pdf");
} else if (strcmp(ext, "lit") == 0) {
mime = strdup("application/octet-stream");
} else if (strcmp(ext, "doc") == 0) {
mime = strdup("application/octet-stream");
} else if (strcmp(ext, "pdb") == 0) {
mime = strdup("application/octet-stream");
} else if (strcmp(ext, "prc") == 0) {
mime = strdup("application/octet-stream");
} else {
mime = strdup("application/octet-stream");
}
}
FREE_NULL(ext);
return mime;
}
/**
* gutenfetch_util_extension_is
*
* Test the extension of a filename. Return true if
* the extension matches, false if it doesn't
*
* @param ext The extension we think might match.
* @param filename The filename whose extension we
* are curious about.
* @return TRUE or FALSE.
*/
int
gutenfetch_util_extension_is(char *ext, char *filename)
{
int retval = FALSE;
char *fext = NULL;
gutenfetch_util_get_base_ext(NULL, &fext, filename);
if (strcmp(ext, fext) == 0) {
retval = TRUE;
}
FREE_NULL(fext);
return retval;
}
void
gutenfetch_util_get_base_ext(char **base, char **ext, const char *filename)
{
list_t *lt = NULL;
list_t *match = NULL;
if (filename == NULL) {
return;
}
match = gutenfetch_ifilter_match(IFILTER_FILENAME_BASE_EXT, filename);
if (match != NULL) {
lt = list_next(list_first(match));
if (lt != NULL) {
if (base != NULL) {
*base = strdup((char*)lt->data);
}
}
lt = list_next(lt);
if (lt != NULL) {
if (ext != NULL) {
*ext = strdup((char*)lt->data);
}
}
list_remove_all(match, free);
}
return;
}
char *
gutenfetch_util_get_home_directory(void)
{
char *home_dir = NULL;
#ifdef WIN32
/* Currently, we write a configuration file in the
current working directory if we are on windows.
This is wrong and a bit of a hack I think, perhaps
things should go in the registry? I really am not
sure, if some Windows guru know, feel free to let
me know.
*/
#ifndef PATH_MAX
# define PATH_MAX 256
#endif /* PATH_MAX */
char hdir[PATH_MAX];
home_dir = getcwd(home_dir, PATH_MAX);
#else
home_dir = getenv("HOME");
#endif
return home_dir;
}
void
gutenfetch_util_build_path(const char *filename)
{
list_t *lt = NULL;
list_t *path_list = NULL;
struct stat sb;
char *current_dir = NULL;
char *f = NULL;
char *g = NULL;
char *ptr = NULL;
int stat_val, err = FALSE;
assert(filename != NULL);
/* This chops off the initial '/' from the path
*/
if (strlen(filename) < 2)
return;
g = f = strdup(&filename[1]);
assert(f != NULL);
/* Build the chunks of the path. */
while((ptr = strsep(&f, DIR_SEPARATOR)) != NULL) {
path_list = list_append(path_list, ptr);
}
/* Remove the last element as the should be the filename. */
lt = list_last(path_list);
lt = path_list = list_remove_node(lt, NULL);
/* iterate through them and ensure that they exist or create them. */
lt = list_first(lt);
while ((lt != NULL) && (err == FALSE)){
if (current_dir == NULL) {
current_dir = gutenfetch_util_strcat(DIR_SEPARATOR, lt->data, NULL);
} else {
f = current_dir;
current_dir = gutenfetch_util_strcat(current_dir,
DIR_SEPARATOR, lt->data, NULL);
FREE_NULL(f);
}
lt = list_next(lt);
stat_val = stat(current_dir, &sb);
if (stat_val == 0) {
if ((sb.st_mode | S_IFDIR) == 0) {
/* path is not a directory !!! */
err = TRUE;
}
} else {
if (errno == ENOENT) {
if(mkdir(current_dir, S_IRWXU | S_IRGRP | S_IXGRP) == -1) {
/* Error creating directory. */
err = TRUE;
}
} else {
/* Some other error with stat ??? */
err = TRUE;
}
}
}
list_remove_all(path_list, NULL);
FREE_NULL(current_dir);
FREE_NULL(g);
}
/**
* gutenfetch_util_move
*
* Given two filenames, copy the first one to the second one.
* It will first try to rename them, which should work if
* they are on the same partition, if not, it will manually
* copy all the bytes.
*
* @param from The full path/filename of the file to move.
* @param to The full path/filename of the destination.
* @return 1 on success, -1 on failure.
*/
int
gutenfetch_util_move(const char *from, const char *to)
{
#define BUF_SIZE 8192
char buf[BUF_SIZE];
int from_fd, to_fd;
ssize_t bytes_read, bytes_written;
if ((from == NULL) || (to == NULL))
return( -1 );
/* Build the path to the to file if it doesn't exist. */
gutenfetch_util_build_path(to);
/* First try to just rename the file as this will be
* much quicker if it is allowed. If not, we will manually
* copy the files.
*/
if (rename(from, to) == -1) {
from_fd = open(from, O_RDONLY);
if (from_fd == -1) {
return( -1 );
}
to_fd = open(to,
O_WRONLY | O_CREAT | O_TRUNC,
S_IRWXU | S_IRGRP | S_IXGRP);
if (to_fd == -1) {
close(from_fd);
return( -1 );
}
while ((bytes_read = read(from_fd, buf, BUF_SIZE)) > 0) {
bytes_written = write(to_fd, buf, bytes_read);
if (bytes_read != bytes_written) {
close(from_fd);
close(to_fd);
return( -1 );
}
}
close(from_fd);
close(to_fd);
if (bytes_read != 0)
return( -1 );
/* Remove the from file, this makes it have
* the same semantics as rename.
*/
unlink(from);
}
return( 1 );
}
/**
* gutenfetch_util_read_file_to_buffer
*
* Given a valid file descriptor, read the contents into
* a buffer and return the result.
*
* ** NOTE **
*
* This should only be used to read text files,
* if it is used on a binary file, it may result
* in not being able to read after the first zero.
*
* @param fd The file descriptor to read from.
* @return A NULL-terminated buffer with the contents of the
* file.
*/
char *
gutenfetch_util_read_file_to_buffer(int fd)
{
ssize_t rw = -1;
size_t i = 0;
size_t buf_size = 0;
size_t block_size = 4096;
char *buffer = NULL;
char *temp = NULL;
if (fd != -1) {
lseek(fd, 0, SEEK_SET);
do {
if (i + block_size > buf_size) {
temp = realloc(buffer, buf_size + block_size);
if (temp != NULL) {
buffer = temp;
buf_size += block_size;
} else {
FREE_NULL(buffer);
return( NULL );
}
}
rw = read(fd, &buffer[i], block_size);
if (rw > 0) {
i += rw;
}
} while(rw > 0);
}
if( rw < 0 ) {
FREE_NULL(buffer);
return( NULL );
}
/* NULL-terminate the result. */
if (buffer != NULL) {
temp = realloc(buffer, i + 1);
if (temp != NULL) {
temp[i] = '\0';
buffer = temp;
} else {
FREE_NULL(buffer);
}
}
return buffer;
}
/**
* gutenfetch_util_read_binary_file_to_buffer
*
* Given a valid file descriptor, read the contents into
* a buffer user supplied buffer. Use this function when
* the source may or may not be a binary file or contain
* '\0' within the text body.
*
* @param fd The file descriptor to read from.
* @param buf The buffer to return.
* @param size The number of bytes returned in the buffer.
* @return GUTENFETCH_OK on success, something else on error.
*/
gutenfetch_error_t
gutenfetch_util_read_binary_file_to_buffer(
int fd,
char **buf,
size_t *size)
{
ssize_t rw = -1;
size_t i = 0;
size_t buf_size = 0;
size_t block_size = 4096;
char *buffer = NULL;
char *temp = NULL;
assert(size != NULL);
assert(buf != NULL);
FREE_NULL(*buf);
if (fd != -1) {
if( lseek(fd, 0, SEEK_SET) == -1) {
return( GUTENFETCH_SEE_ERRNO );
}
do {
if (i + block_size > buf_size) {
temp = realloc(buffer, buf_size + block_size);
if (temp != NULL) {
buffer = temp;
buf_size += block_size;
} else {
FREE_NULL(buffer);
return( GUTENFETCH_NOMEM );
}
}
rw = read(fd, &buffer[i], block_size);
if (rw > 0) {
i += rw;
}
} while(rw > 0);
}
if( rw < 0 ) {
FREE_NULL( buffer );
return( GUTENFETCH_SEE_ERRNO );
}
/* Shrink memory block and set return values
* to correct values. */
if (buffer != NULL) {
temp = realloc(buffer, i + 1);
temp[i] = '\0';
if (temp != NULL) {
*buf = buffer = temp;
*size = i;
} else {
FREE_NULL(buffer);
return( GUTENFETCH_NOMEM );
}
}
return( GUTENFETCH_OK );
}
syntax highlighted by Code2HTML, v. 0.9.1