/* gutenfetch - a small utility to list and fetch books available through project gutenberg Copyright (C) 2001, 2002, 2003, 2004 Russell Francis This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc. 59 Temple Place, Suite 330 Boston, MA 02111-1307 USA Last updated on $Date: 2004/07/20 00:23:59 $ by $Author: johntabularasa $. */ #include "stddefs.h" #ifdef HAVE_STDIO_H # include #endif #ifdef HAVE_STDLIB_H # include #endif #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_UIO_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #ifdef HAVE_FCNTL_H # include #endif #ifdef HAVE_STRING_H # include #endif #ifdef HAVE_STRINGS_H # include #endif #ifdef HAVE_STDARG_H # include #endif #ifdef HAVE_ASSERT_H # include #endif #ifdef HAVE_DIRENT_H # include #endif #ifdef HAVE_ERRNO_H # include #endif #include "list.h" #include "libgutenfetch_filter.h" #include "libgutenfetch_utility.h" #include "gutenfetch.h" #ifndef HAVE_STRSEP /** * The strsep() function locates, in the string referenced by *stringp, the * first occurrence of any character in the string delim (or the terminating * `\0' character) and replaces it with a `\0'. The location of the next * character after the delimiter character (or NULL, if the end of the * string was reached) is stored in *stringp. The original value of * *stringp is returned. * * An ``empty'' field (i.e., a character in the string delim occurs as the * first character of *stringp) can be detected by comparing the location * referenced by the returned pointer to `\0'. * * If *stringp is initially NULL, strsep() returns NULL. * * @param stringp The string to separate. * @param delim The character to separate the string stringp on. * @return NULL or the next character after the first found delim. */ char * strsep(char **stringp, const char *delim) { char *res; if (!stringp || !*stringp || !**stringp) return NULL; res = *stringp; while (**stringp && !strchr(delim, **stringp)) ++(*stringp); if (**stringp) { **stringp = '\0'; ++(*stringp); } return res; } #endif /* HAVE_STRSEP */ #ifndef HAVE_MERGESORT /*- * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Peter McIlroy. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * The following mergesort function and supporting functions were taken * from FreeBSD to provide a sorting routine on machines which do not * provide mergesort(). * * Russell Francis 11-15-03 */ #ifndef u_char # define u_char unsigned char #endif #define ISIZE sizeof(int) #define PSIZE sizeof(u_char *) #define ICOPY_LIST(src, dst, last) \ do \ *(int*)dst = *(int*)src, src += ISIZE, dst += ISIZE; \ while(src < last) #define ICOPY_ELT(src, dst, i) \ do \ *(int*) dst = *(int*) src, src += ISIZE, dst += ISIZE; \ while (i -= ISIZE) #define CCOPY_LIST(src, dst, last) \ do \ *dst++ = *src++; \ while (src < last) #define CCOPY_ELT(src, dst, i) \ do \ *dst++ = *src++; \ while (i -= 1) /* * Find the next possible pointer head. (Trickery for forcing an array * to do double duty as a linked list when objects do not align with word * boundaries. */ /* Assumption: PSIZE is a power of 2. */ #define EVAL(p) (u_char **) \ ((u_char *)0 + \ (((u_char *)p + PSIZE - 1 - (u_char *) 0) & ~(PSIZE - 1))) static void mergesort_insertionsort(u_char *, size_t, size_t, int (*)(const void *, const void *)); void mergesort_setup(u_char *, u_char *, size_t, size_t, int(*)(const void *, const void *)); int mergesort(void *, size_t, size_t, int(*)(const void *, const void *)); /* * Arguments are as for qsort. */ int mergesort(base, nmemb, size, cmp) void *base; size_t nmemb; size_t size; int (*cmp)(const void *, const void *); { int i, sense; int big, iflag; u_char *f1, *f2, *t, *b, *tp2, *q, *l1, *l2; u_char *list2, *list1, *p2, *p, *last, **p1; if (size < PSIZE / 2) { /* Pointers must fit into 2 * size. */ errno = EINVAL; return (-1); } if (nmemb == 0) return (0); /* * XXX * Stupid subtraction for the Cray. */ iflag = 0; if (!(size % ISIZE) && !(((char *)base - (char *)0) % ISIZE)) iflag = 1; if ((list2 = malloc(nmemb * size + PSIZE)) == NULL) return (-1); list1 = base; mergesort_setup(list1, list2, nmemb, size, cmp); last = list2 + nmemb * size; i = big = 0; while (*EVAL(list2) != last) { l2 = list1; p1 = EVAL(list1); for (tp2 = p2 = list2; p2 != last; p1 = EVAL(l2)) { p2 = *EVAL(p2); f1 = l2; f2 = l1 = list1 + (p2 - list2); if (p2 != last) p2 = *EVAL(p2); l2 = list1 + (p2 - list2); while (f1 < l1 && f2 < l2) { if ((*cmp)(f1, f2) <= 0) { q = f2; b = f1, t = l1; sense = -1; } else { q = f1; b = f2, t = l2; sense = 0; } if (!big) { /* here i = 0 */ while ((b += size) < t && cmp(q, b) >sense) if (++i == 6) { big = 1; goto EXPONENTIAL; } } else { EXPONENTIAL: for (i = size; ; i <<= 1) if ((p = (b + i)) >= t) { if ((p = t - size) > b && (*cmp)(q, p) <= sense) t = p; else b = p; break; } else if ((*cmp)(q, p) <= sense) { t = p; if (i == size) big = 0; goto FASTCASE; } else b = p; while (t > b+size) { i = (((t - b) / size) >> 1) * size; if ((*cmp)(q, p = b + i) <= sense) t = p; else b = p; } goto COPY; FASTCASE: while (i > size) if ((*cmp)(q, p = b + (i >>= 1)) <= sense) t = p; else b = p; COPY: b = t; } i = size; if (q == f1) { if (iflag) { ICOPY_LIST(f2, tp2, b); ICOPY_ELT(f1, tp2, i); } else { CCOPY_LIST(f2, tp2, b); CCOPY_ELT(f1, tp2, i); } } else { if (iflag) { ICOPY_LIST(f1, tp2, b); ICOPY_ELT(f2, tp2, i); } else { CCOPY_LIST(f1, tp2, b); CCOPY_ELT(f2, tp2, i); } } } if (f2 < l2) { if (iflag) ICOPY_LIST(f2, tp2, l2); else CCOPY_LIST(f2, tp2, l2); } else if (f1 < l1) { if (iflag) ICOPY_LIST(f1, tp2, l1); else CCOPY_LIST(f1, tp2, l1); } *p1 = l2; } tp2 = list1; /* swap list1, list2 */ list1 = list2; list2 = tp2; last = list2 + nmemb*size; } if (base == list2) { memmove(list2, list1, nmemb*size); list2 = list1; } free(list2); return (0); } #define swap(a, b) { \ s = b; \ i = size; \ do { \ tmp = *a; *a++ = *s; *s++ = tmp; \ } while (--i); \ a -= size; \ } #define reverse(bot, top) { \ s = top; \ do { \ i = size; \ do { \ tmp = *bot; *bot++ = *s; *s++ = tmp; \ } while (--i); \ s -= size2; \ } while(bot < s); \ } /* * Optional hybrid natural/pairwise first pass. Eats up list1 in runs of * increasing order, list2 in a corresponding linked list. Checks for runs * when THRESHOLD/2 pairs compare with same sense. (Only used when NATURAL * is defined. Otherwise simple pairwise merging is used.) */ void mergesort_setup(list1, list2, n, size, cmp) size_t n, size; int (*cmp)(const void *, const void *); u_char *list1, *list2; { int i, length, size2, tmp, sense; u_char *f1, *f2, *s, *l2, *last, *p2; size2 = size*2; if (n <= 5) { mergesort_insertionsort(list1, n, size, cmp); *EVAL(list2) = (u_char*) list2 + n*size; return; } /* * Avoid running pointers out of bounds; limit n to evens * for simplicity. */ i = 4 + (n & 1); mergesort_insertionsort(list1 + (n - i) * size, i, size, cmp); last = list1 + size * (n - i); *EVAL(list2 + (last - list1)) = list2 + n * size; #ifdef NATURAL p2 = list2; f1 = list1; sense = (cmp(f1, f1 + size) > 0); for (; f1 < last; sense = !sense) { length = 2; /* Find pairs with same sense. */ for (f2 = f1 + size2; f2 < last; f2 += size2) { if ((cmp(f2, f2+ size) > 0) != sense) break; length += 2; } if (length < THRESHOLD) { /* Pairwise merge */ do { p2 = *EVAL(p2) = f1 + size2 - list1 + list2; if (sense > 0) swap (f1, f1 + size); } while ((f1 += size2) < f2); } else { /* Natural merge */ l2 = f2; for (f2 = f1 + size2; f2 < l2; f2 += size2) { if ((cmp(f2-size, f2) > 0) != sense) { p2 = *EVAL(p2) = f2 - list1 + list2; if (sense > 0) reverse(f1, f2-size); f1 = f2; } } if (sense > 0) reverse (f1, f2-size); f1 = f2; if (f2 < last || cmp(f2 - size, f2) > 0) p2 = *EVAL(p2) = f2 - list1 + list2; else p2 = *EVAL(p2) = list2 + n*size; } } #else /* pairwise merge only. */ for (f1 = list1, p2 = list2; f1 < last; f1 += size2) { p2 = *EVAL(p2) = p2 + size2; if (cmp (f1, f1 + size) > 0) swap(f1, f1 + size); } #endif /* NATURAL */ } /* * This is to avoid out-of-bounds addresses in sorting the * last 4 elements. */ static void mergesort_insertionsort(a, n, size, cmp) u_char *a; size_t n, size; int (*cmp)(const void *, const void *); { u_char *ai, *s, *t, *u, tmp; int i; for (ai = a+size; --n >= 1; ai += size) for (t = ai; t > a; t -= size) { u = t - size; if (cmp(u, t) <= 0) break; swap(u, t); } } #endif /* HAVE_MERGESORT */ /** * gutenfetch_util_strcat concatenates strings into one large string. * * This function takes a NULL terminated list of NULL terminated * strings and concatenates them together. * * @param a A NULL terminated string. * @return A string which must be freed when finished. It will * be either the concatenation of all the strings passed in * or will be NULL. */ char * gutenfetch_util_strcat(char *a, ...) { va_list ap; size_t len = 1; /* space for null terminator */ list_t *l = NULL; list_t *head = NULL; char *item; assert (a != NULL); len += strlen(a); l = list_append(l, a); va_start(ap, a); while ( (item = va_arg(ap, char *)) != NULL ) { len += strlen(item); l = list_append(l, item); } va_end(ap); item = malloc(sizeof(char) * len); if ( item == NULL ) { fprintf(stderr, _("Unable to allocate %u bytes of memory."), (sizeof(char) * len)); abort(); } head = l = list_first(l); strcpy(item, l->data); for(l = list_next(l); l != NULL ; l = list_next(l)) strcat(item, l->data); list_remove_all(head, NULL); return item; } /** * Get the next line from a file. * * This function returns the characters in a file up to the '\n' * character. The returned result is a NULL terminated string. * * @param fp The file pointer of the file to read from. * @return a NULL terminated string which contains the next line * in the file. */ char * gutenfetch_util_getline(FILE *fp) { char *line = NULL; char *temp = NULL; size_t total_size; size_t i; for (i = 0, total_size = 0; TRUE ; ++i) { /* allocate more memory for the line if needed. */ if(i == total_size){ total_size += BLOCK_SIZE; temp = realloc(line, sizeof(char) * total_size); if (temp == NULL) { FREE_NULL(line); /*if (opt_get_verbose()) { fprintf(stderr, _("Unable to allocate %u bytes of memory."), sizeof(char) * total_size); }*/ break; } line = temp; } /* Read the next character into the line */ if((line[i] = fgetc(fp)) == '\n'){ line[i] = '\0'; break; }else if(line[i] == EOF){ if(i == 0) { FREE_NULL(line); } else { line[i] = '\0'; } break; } } return line; } /** * gutenfetch_ms_strip_text_buffer * * Strip the Windows world 0D 0A from the end of lines and * replace with the unix friendly, 0A. * * @param buffer The text buffer we wish to modify. This must be * NULL terminated or else! * @return An error code or GUTENFETCH_OK */ gutenfetch_error_t gutenfetch_ms_strip_text_buffer(char *buffer) { size_t writing_to = 0; size_t reading_from = 0; unsigned char current_byte = 0x00; unsigned char last_byte = 0x00; if (buffer == NULL) return GUTENFETCH_BAD_PARAM; /* This routine should be pretty quick and only require one pass over the data. */ while (buffer[reading_from] != '\0') { last_byte = current_byte; current_byte = buffer[reading_from]; if ((last_byte == 0x0D) && (current_byte == 0x0A)) writing_to--; buffer[writing_to] = current_byte; reading_from++; writing_to++; } buffer[writing_to] = '\0'; /* NULL terminate the result. */ return GUTENFETCH_OK; } /** * gutenfetch_ms_strip_text_file * * This routine strips the MS from a text file and replaces * them with the UNIX friendly . * * @param filename The filename of the text file we wish to modify. * @return GUTENFETCH_OK or an error code indicating the error. */ gutenfetch_error_t gutenfetch_ms_strip_text_file(char *filename) { gutenfetch_error_t retval = GUTENFETCH_OK; int fd; if (filename == NULL) return GUTENFETCH_BAD_PARAM; fd = open(filename, O_RDWR); if (fd >= 0) { retval = gutenfetch_ms_strip_text_fd(fd); close(fd); } else { retval = GUTENFETCH_SEE_ERRNO; } return retval; } /** * gutenfetch_ms_strip_fd * * Strip Windows from a text file. * * @param fd The file descriptor of an open r/w file to modify. * @return GUTENFETCH_OK or an error. */ gutenfetch_error_t gutenfetch_ms_strip_text_fd(int fd) { #define READ_BUFFER_SIZE 4096 #define WRITE_BUFFER_SIZE 4096 char read_buffer[READ_BUFFER_SIZE], write_buffer[WRITE_BUFFER_SIZE]; size_t read_buf_count = 0, write_buf_count = 0; size_t read_buf_index = 0, write_buf_index = 0; size_t read_fd_index = 0, write_fd_index = 0; char last_byte = 0x00, current_byte = 0x00; gutenfetch_error_t reterr = GUTENFETCH_OK; read_buf_index = lseek(fd, 0, SEEK_SET); /* make sure it is seekable. */ if (read_buf_index == -1) return GUTENFETCH_SEE_ERRNO; while (TRUE) { if (read_buf_index == read_buf_count) { if (lseek(fd, read_fd_index, SEEK_SET) == -1) { reterr = GUTENFETCH_SEE_ERRNO; break; } read_buf_index = 0; read_buf_count = read(fd, read_buffer, READ_BUFFER_SIZE); if (read_buf_count < 0) { /* Error condition */ reterr = GUTENFETCH_SEE_ERRNO; break; } read_fd_index += read_buf_count; if (read_buf_count == 0) { /* EOF and clean exit */ break; } } last_byte = current_byte; current_byte = read_buffer[read_buf_index++]; if ((last_byte == 0x0D) && (current_byte == 0x0A)) write_buf_index--; write_buffer[write_buf_index++] = current_byte; if (write_buf_index == WRITE_BUFFER_SIZE) { /* flush the output */ if (lseek(fd, write_fd_index, SEEK_SET) == -1) { reterr = GUTENFETCH_SEE_ERRNO; break; } write_buf_count = write(fd, write_buffer, write_buf_index); if (write_buf_count < 0) { reterr = GUTENFETCH_SEE_ERRNO; break; } write_fd_index += write_buf_count; if (write_buf_count < write_buf_index) { memmove(write_buffer, &write_buffer[write_buf_count], write_buf_index - write_buf_count); write_buf_index = write_buf_count; } else { write_buf_index = 0; } } } if (write_buf_index != 0) { /* We need to flush the write buffer. */ if (lseek(fd, write_fd_index, SEEK_SET) == -1) { reterr = GUTENFETCH_SEE_ERRNO; } else { while ((write_buf_index != 0) && (reterr == GUTENFETCH_OK)) { write_buf_count = write(fd, write_buffer, write_buf_index); if (write_buf_count < 0) { reterr = GUTENFETCH_SEE_ERRNO; } else { write_fd_index += write_buf_count; if (write_buf_count < write_buf_index) { memmove(write_buffer, &write_buffer[write_buf_count], write_buf_index - write_buf_count); write_buf_index = write_buf_count; } else { write_buf_index = 0; } } } } } if( ftruncate(fd, write_fd_index) == -1 ) { reterr = GUTENFETCH_SEE_ERRNO; } return reterr; } /** * gutenfetch_ms_clothe_text_buffer * * Replace the UNIX with the windows in * text documents. This function takes a pointer to a * NULL terminated text buffer. It will probably change * the address of the buffer if successful, otherwise, it * will be unchanged. * * @param buffer A pointer to a text buffer to modify. * @return GUTENFETCH_OK or an error code. */ gutenfetch_error_t gutenfetch_ms_clothe_text_buffer(char **buffer) { char *new_buffer, *temp; char current_byte = 0x01, previous_byte = 0x00; size_t new_size; size_t i, new_index; if (buffer == NULL) return GUTENFETCH_BAD_PARAM; if (*buffer == NULL) return GUTENFETCH_BAD_PARAM; new_size = 4096; new_buffer = malloc(sizeof(char) * new_size); if (new_buffer == NULL) return GUTENFETCH_NOMEM; new_index = 0; i = 0; while (current_byte != '\0') { previous_byte = current_byte; current_byte = (*buffer)[i++]; if ((previous_byte != 0x0D) && (current_byte == 0x0A)) { /* we need to insert the ms here. */ new_buffer[new_index++] = 0x0D; if (new_index == new_size) { /* we need more memory. */ new_size *= 2; temp = realloc(new_buffer, new_size); if (temp == NULL) { FREE_NULL(new_buffer); return GUTENFETCH_NOMEM; } new_buffer = temp; } } new_buffer[new_index++] = current_byte; if (new_index == new_size) { new_size *= 2; temp = realloc(new_buffer, new_size); if (temp == NULL) { FREE_NULL(new_buffer); return GUTENFETCH_NOMEM; } new_buffer = temp; } } /* shrink held memory block to match data. Should always release memory. The result should be NULL terminated allready. */ temp = realloc(new_buffer, new_index); if (temp == NULL) { FREE_NULL(new_buffer); return GUTENFETCH_NOMEM; } FREE_NULL(*buffer); *buffer = new_buffer = temp; return GUTENFETCH_OK; } /** * gutenfetch_ms_clothe_text_file * * Given a filename, replace unix with the * windows newline convention. * * @param filename The file we wish to modify. * @return GUTENFETCH_OK or an error code. */ gutenfetch_error_t gutenfetch_ms_clothe_text_file(char *filename) { gutenfetch_error_t retval = GUTENFETCH_OK; int fd; if (filename == NULL) return GUTENFETCH_BAD_PARAM; fd = open(filename, O_RDWR); if (fd >= 0) { retval = gutenfetch_ms_clothe_text_fd(fd); close(fd); } else { retval = GUTENFETCH_SEE_ERRNO; } return retval; } /** * gutenfetch_ms_clothe_text_fd * * Given a file descriptor, replace the UNIX with * the longer windows * * @param fd The filedescriptor * @return GUTENFETCH_OK or and error code. */ gutenfetch_error_t gutenfetch_ms_clothe_text_fd(int fd) { gutenfetch_error_t reterr; char *buffer = NULL, *temp; size_t bufsize = 0; size_t bytes_read = 0; size_t read_val; /* start at the beginning of the file. */ if(lseek(fd, 0, SEEK_SET) == -1) return GUTENFETCH_SEE_ERRNO; /* read file into buffer. */ while (TRUE) { if (bytes_read == bufsize) { /* allocate a large buffer. */ bufsize += 4096; temp = realloc(buffer, bufsize); if (temp == NULL) { FREE_NULL(buffer); return GUTENFETCH_NOMEM; } buffer = temp; } read_val = read(fd, &buffer[bytes_read], bufsize - bytes_read); if (read_val < 0) { /* Error condition */ FREE_NULL(buffer); return GUTENFETCH_SEE_ERRNO; } else if (read_val == 0) { /* EOF */ if (bytes_read == bufsize) { /* allocate a large buffer. */ bufsize += 1; temp = realloc(buffer, bufsize); if (temp == NULL) { FREE_NULL(buffer); return GUTENFETCH_NOMEM; } buffer = temp; } buffer[bytes_read] = '\0'; reterr = gutenfetch_ms_clothe_text_buffer(&buffer); break; } else { /* more input to read in. */ bytes_read += read_val; } } if (reterr == GUTENFETCH_OK) { bufsize = 0; while (buffer[bufsize] != '\0') /* determine length of buffer */ bufsize++; lseek(fd, 0, SEEK_SET); bytes_read = 0; while (bytes_read < bufsize) { read_val = write(fd, &buffer[bytes_read], bufsize); if (read_val == -1) { FREE_NULL(buffer); reterr = GUTENFETCH_SEE_ERRNO; break; } bytes_read += read_val; } } return reterr; } /** * gutenfetch_util_get_author * * Given a one line description of the book, strip out and * return the author. * * @param str The one line description of the book * from GUTINDEX.ALL * @return The author if we can guess it or NULL. * the result must be freed. */ char * gutenfetch_util_get_author(char *str) { char *author = NULL; list_t *match = NULL, *lt = NULL; match = gutenfetch_ifilter_match(IFILTER_AUTHOR, str); if (match != NULL) { lt = list_next(list_first(match)); if (lt != NULL) author = strdup(lt->data); list_remove_all(match, free); } return author; } /** * gutenfetch_util_get_title * * This function takes the one line description from * GUTINDEX.ALL and returns the title or NULL. * * @param str The one line description of the book. * @return NULL or the title, if ! NULL, the result * must be freed. */ char * gutenfetch_util_get_title(char *str) { char *title = NULL; list_t *match = NULL, *lt = NULL; /* Try the old one first as it is stricter and more likely to fail! */ match = gutenfetch_ifilter_match(IFILTER_OLD_TITLE, str); if (match != NULL) { lt = list_next(list_first(match)); if (lt != NULL) { title = strdup(lt->data); } list_remove_all(match, free); } else { /* maybe it is a new book entry. */ match = gutenfetch_ifilter_match(IFILTER_NEW_TITLE, str); if (match != NULL) { lt = list_next(list_first(match)); if (lt != NULL) { title = strdup(lt->data); } list_remove_all(match, free); } } return title; } /** * gutenfetch_util_build_URL * * Given a server and a file, append the two and return a string. * This is sort of a stupid function. * * @param server The server we will get the file from. * @param file The file we will get from the server. * @return NULL on failure or a valid URL. */ char * gutenfetch_util_build_URL(gutenfetch_server_t *server, const char *file) { char *url = NULL; if ((server != NULL) && (file != NULL)) { if (strlen(server->host) >= 1) { if (server->host[strlen(server->host) - 1] != '/') { url = gutenfetch_util_strcat( server->host, "/", file, NULL); } else { url = gutenfetch_util_strcat( server->host, file, NULL); } } } return url; } /** * gutenfetch_util_get_temp_dir * * Return the temporary directory we should use * when creating temporary files. * * @return A NULL-terminated string which should * be used as the temporary directory. This * string must be freed after use. This function * will return NULL if a suitable temporary * directory could not be found. */ char * gutenfetch_util_get_temp_dir(void) { static int been_called = FALSE; static char directory[1024]; static char *dir = NULL; if (been_called == FALSE) { been_called = TRUE; #ifdef WIN32 // XXX implement for windows. dir = NULL; #else snprintf(&directory[0], 1024, "/tmp/libgutenfetch%d.XXXX", getpid()); dir = mkdtemp(directory); #endif } if (dir == NULL) return NULL; return strdup(dir); } /** * gutenfetch_util_get_temp_file * * Generate a temporary filename and return * a file descriptor to it. * * @param temp_name A pointer to the filename which will be * filled in by this function. * @return A valid file descriptor open for read & write * or -1 on failure. */ int gutenfetch_util_get_temp_file(char **temp_name) { int fd; char *dir = gutenfetch_util_get_temp_dir(); char *file = strdup("ilovekif.XXXX"); char *full = gutenfetch_util_strcat(dir, DIR_SEPARATOR, file, NULL); fd = mkstemp(full); if (fd != -1) { if (temp_name != NULL) { FREE_NULL( *temp_name ); *temp_name = strdup(full); } } FREE_NULL(dir); FREE_NULL(file); FREE_NULL(full); return fd; } /** * gutenfetch_util_free_temp_dir * * This function removes the temporary directory * and all files below it. It is called once by * gutenfetch_shutdown before an application exits. */ void gutenfetch_util_free_temp_dir(void) { char *temp_dir = gutenfetch_util_get_temp_dir(); if (temp_dir != NULL) { gutenfetch_util_rmdir(temp_dir); FREE_NULL(temp_dir); } } /** * gutenfetch_util_rmdir * * This function takes a directory and removes all files * below it as well as the directory itself. * * @param temp_dir The name of a directory to remove. */ void gutenfetch_util_rmdir(const char *temp_dir) { if (temp_dir == NULL) return; gutenfetch_util_rm_below_dir(temp_dir); rmdir(temp_dir); } /** * gutenfetch_util_rm_below_dir * * This function takes a directory and removes all files * and directories recursively below it. * * @param temp_dir The name of the directory to remove * all contents from. */ void gutenfetch_util_rm_below_dir(const char *temp_dir) { DIR *dir; struct dirent *entry; char path[4096]; if (temp_dir == NULL) return; dir = opendir(temp_dir); if (dir != NULL) { while ((entry = readdir(dir)) != NULL) { if ( (strcmp(entry->d_name, ".") != 0) && (strcmp(entry->d_name, "..") != 0)) { if (entry->d_type == DT_DIR) { gutenfetch_util_rmdir(entry->d_name); } else { snprintf(path, 4096, "%s%s%s", temp_dir, DIR_SEPARATOR, entry->d_name); unlink(path); } } } closedir(dir); } return; } /** * gutenfetch_util_rm_old_below_dir * * This removes all files below a certain directory * whose time has expired. This is used by the cache * functions. * * @param expires The time_t that files expire. * @param temp_dir The directory to scan for old * files. */ void gutenfetch_util_rm_old_below_dir( time_t expires, const char *temp_dir) { char path[4096]; DIR *dir; struct dirent *entry; struct stat sb; time_t now; if (temp_dir == NULL) return; dir = opendir(temp_dir); if (dir != NULL) { now = time(NULL); while ((entry = readdir(dir)) != NULL) { if ( (strcmp(entry->d_name, ".") != 0) && (strcmp(entry->d_name, "..") != 0)) { if (entry->d_type == DT_DIR) { gutenfetch_util_rm_old_below_dir( expires, entry->d_name); } else { snprintf(path, 4096, "%s%s%s", temp_dir, DIR_SEPARATOR, entry->d_name); if (stat(path, &sb) == 0) { if ( (now - sb.st_atime) > expires ) { unlink(path); } } } } } } } /** * gutenfetch_util_get_mime_from_filename * * Given a filename, return the MIME type of the * file. * * @param filename The filename of the file to find * the mime type of. * @return A NULL terminated string which describes the * mime type of the file. It must be freed after * use. */ char * gutenfetch_util_get_mime_from_filename(const char *filename) { char *mime = NULL; char *ext = NULL; if (filename == NULL) return NULL; gutenfetch_util_get_base_ext(NULL, &ext, filename); if (ext != NULL) { if (strcmp(ext, "zip") == 0) { mime = strdup("application/zip"); } else if (strcmp(ext, "txt") == 0) { mime = strdup("text/plain"); } else if (strcmp(ext, "htm") == 0) { mime = strdup("text/html"); } else if (strcmp(ext, "tex") == 0) { mime = strdup("tex/plain"); } else if (strcmp(ext, "xml") == 0) { mime = strdup("text/xml"); } else if (strcmp(ext, "mp3") == 0) { mime = strdup("audio/mpeg"); } else if (strcmp(ext, "rtf") == 0) { mime = strdup("text/richtext"); } else if (strcmp(ext, "pdf") == 0) { mime = strdup("application/pdf"); } else if (strcmp(ext, "lit") == 0) { mime = strdup("application/octet-stream"); } else if (strcmp(ext, "doc") == 0) { mime = strdup("application/octet-stream"); } else if (strcmp(ext, "pdb") == 0) { mime = strdup("application/octet-stream"); } else if (strcmp(ext, "prc") == 0) { mime = strdup("application/octet-stream"); } else { mime = strdup("application/octet-stream"); } } FREE_NULL(ext); return mime; } /** * gutenfetch_util_extension_is * * Test the extension of a filename. Return true if * the extension matches, false if it doesn't * * @param ext The extension we think might match. * @param filename The filename whose extension we * are curious about. * @return TRUE or FALSE. */ int gutenfetch_util_extension_is(char *ext, char *filename) { int retval = FALSE; char *fext = NULL; gutenfetch_util_get_base_ext(NULL, &fext, filename); if (strcmp(ext, fext) == 0) { retval = TRUE; } FREE_NULL(fext); return retval; } void gutenfetch_util_get_base_ext(char **base, char **ext, const char *filename) { list_t *lt = NULL; list_t *match = NULL; if (filename == NULL) { return; } match = gutenfetch_ifilter_match(IFILTER_FILENAME_BASE_EXT, filename); if (match != NULL) { lt = list_next(list_first(match)); if (lt != NULL) { if (base != NULL) { *base = strdup((char*)lt->data); } } lt = list_next(lt); if (lt != NULL) { if (ext != NULL) { *ext = strdup((char*)lt->data); } } list_remove_all(match, free); } return; } char * gutenfetch_util_get_home_directory(void) { char *home_dir = NULL; #ifdef WIN32 /* Currently, we write a configuration file in the current working directory if we are on windows. This is wrong and a bit of a hack I think, perhaps things should go in the registry? I really am not sure, if some Windows guru know, feel free to let me know. */ #ifndef PATH_MAX # define PATH_MAX 256 #endif /* PATH_MAX */ char hdir[PATH_MAX]; home_dir = getcwd(home_dir, PATH_MAX); #else home_dir = getenv("HOME"); #endif return home_dir; } void gutenfetch_util_build_path(const char *filename) { list_t *lt = NULL; list_t *path_list = NULL; struct stat sb; char *current_dir = NULL; char *f = NULL; char *g = NULL; char *ptr = NULL; int stat_val, err = FALSE; assert(filename != NULL); /* This chops off the initial '/' from the path */ if (strlen(filename) < 2) return; g = f = strdup(&filename[1]); assert(f != NULL); /* Build the chunks of the path. */ while((ptr = strsep(&f, DIR_SEPARATOR)) != NULL) { path_list = list_append(path_list, ptr); } /* Remove the last element as the should be the filename. */ lt = list_last(path_list); lt = path_list = list_remove_node(lt, NULL); /* iterate through them and ensure that they exist or create them. */ lt = list_first(lt); while ((lt != NULL) && (err == FALSE)){ if (current_dir == NULL) { current_dir = gutenfetch_util_strcat(DIR_SEPARATOR, lt->data, NULL); } else { f = current_dir; current_dir = gutenfetch_util_strcat(current_dir, DIR_SEPARATOR, lt->data, NULL); FREE_NULL(f); } lt = list_next(lt); stat_val = stat(current_dir, &sb); if (stat_val == 0) { if ((sb.st_mode | S_IFDIR) == 0) { /* path is not a directory !!! */ err = TRUE; } } else { if (errno == ENOENT) { if(mkdir(current_dir, S_IRWXU | S_IRGRP | S_IXGRP) == -1) { /* Error creating directory. */ err = TRUE; } } else { /* Some other error with stat ??? */ err = TRUE; } } } list_remove_all(path_list, NULL); FREE_NULL(current_dir); FREE_NULL(g); } /** * gutenfetch_util_move * * Given two filenames, copy the first one to the second one. * It will first try to rename them, which should work if * they are on the same partition, if not, it will manually * copy all the bytes. * * @param from The full path/filename of the file to move. * @param to The full path/filename of the destination. * @return 1 on success, -1 on failure. */ int gutenfetch_util_move(const char *from, const char *to) { #define BUF_SIZE 8192 char buf[BUF_SIZE]; int from_fd, to_fd; ssize_t bytes_read, bytes_written; if ((from == NULL) || (to == NULL)) return( -1 ); /* Build the path to the to file if it doesn't exist. */ gutenfetch_util_build_path(to); /* First try to just rename the file as this will be * much quicker if it is allowed. If not, we will manually * copy the files. */ if (rename(from, to) == -1) { from_fd = open(from, O_RDONLY); if (from_fd == -1) { return( -1 ); } to_fd = open(to, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU | S_IRGRP | S_IXGRP); if (to_fd == -1) { close(from_fd); return( -1 ); } while ((bytes_read = read(from_fd, buf, BUF_SIZE)) > 0) { bytes_written = write(to_fd, buf, bytes_read); if (bytes_read != bytes_written) { close(from_fd); close(to_fd); return( -1 ); } } close(from_fd); close(to_fd); if (bytes_read != 0) return( -1 ); /* Remove the from file, this makes it have * the same semantics as rename. */ unlink(from); } return( 1 ); } /** * gutenfetch_util_read_file_to_buffer * * Given a valid file descriptor, read the contents into * a buffer and return the result. * * ** NOTE ** * * This should only be used to read text files, * if it is used on a binary file, it may result * in not being able to read after the first zero. * * @param fd The file descriptor to read from. * @return A NULL-terminated buffer with the contents of the * file. */ char * gutenfetch_util_read_file_to_buffer(int fd) { ssize_t rw = -1; size_t i = 0; size_t buf_size = 0; size_t block_size = 4096; char *buffer = NULL; char *temp = NULL; if (fd != -1) { lseek(fd, 0, SEEK_SET); do { if (i + block_size > buf_size) { temp = realloc(buffer, buf_size + block_size); if (temp != NULL) { buffer = temp; buf_size += block_size; } else { FREE_NULL(buffer); return( NULL ); } } rw = read(fd, &buffer[i], block_size); if (rw > 0) { i += rw; } } while(rw > 0); } if( rw < 0 ) { FREE_NULL(buffer); return( NULL ); } /* NULL-terminate the result. */ if (buffer != NULL) { temp = realloc(buffer, i + 1); if (temp != NULL) { temp[i] = '\0'; buffer = temp; } else { FREE_NULL(buffer); } } return buffer; } /** * gutenfetch_util_read_binary_file_to_buffer * * Given a valid file descriptor, read the contents into * a buffer user supplied buffer. Use this function when * the source may or may not be a binary file or contain * '\0' within the text body. * * @param fd The file descriptor to read from. * @param buf The buffer to return. * @param size The number of bytes returned in the buffer. * @return GUTENFETCH_OK on success, something else on error. */ gutenfetch_error_t gutenfetch_util_read_binary_file_to_buffer( int fd, char **buf, size_t *size) { ssize_t rw = -1; size_t i = 0; size_t buf_size = 0; size_t block_size = 4096; char *buffer = NULL; char *temp = NULL; assert(size != NULL); assert(buf != NULL); FREE_NULL(*buf); if (fd != -1) { if( lseek(fd, 0, SEEK_SET) == -1) { return( GUTENFETCH_SEE_ERRNO ); } do { if (i + block_size > buf_size) { temp = realloc(buffer, buf_size + block_size); if (temp != NULL) { buffer = temp; buf_size += block_size; } else { FREE_NULL(buffer); return( GUTENFETCH_NOMEM ); } } rw = read(fd, &buffer[i], block_size); if (rw > 0) { i += rw; } } while(rw > 0); } if( rw < 0 ) { FREE_NULL( buffer ); return( GUTENFETCH_SEE_ERRNO ); } /* Shrink memory block and set return values * to correct values. */ if (buffer != NULL) { temp = realloc(buffer, i + 1); temp[i] = '\0'; if (temp != NULL) { *buf = buffer = temp; *size = i; } else { FREE_NULL(buffer); return( GUTENFETCH_NOMEM ); } } return( GUTENFETCH_OK ); }