/* gutenfetch - a small utility to list and fetch books available through project gutenberg Copyright (C) 2001, 2002, 2003, 2004 Russell Francis This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc. 59 Temple Place, Suite 330 Boston, MA 02111-1307 USA Last updated on $Date: 2004/07/07 02:41:22 $ by $Author: johntabularasa $. */ #include "stddefs.h" #include "gutenfetch.h" #include "libgutenfetch_init.h" #include "libgutenfetch_fileinfo.h" #include "libgutenfetch_servers.h" #include "libgutenfetch_utility.h" #include "libgutenfetch_filter.h" #include "libgutenfetch_listing.h" #include "libgutenfetch_etext.h" #include "libgutenfetch_cache.h" #include "rb.h" #ifdef USE_ZLIB # if (HAVE_ZLIB_H == 1) # include # endif #endif #ifdef HAVE_ASSERT_H # include #endif #ifdef HAVE_STDLIB_H # include #endif #ifdef HAVE_STRING_H # include #endif #ifdef HAVE_STRINGS_H # include #endif #ifdef HAVE_TIME_H # include #endif #ifdef HAVE_PTHREAD #ifdef HAVE_PTHREAD_H #include #endif /* HAVE_PTHREAD_H */ #endif /* HAVE_PTHREAD */ /** Private definitions **/ #if 0 typedef struct { char *directory; list_t *contents; } detailed_data_t; #define LINE_LENGTH 4096 typedef struct { char *buffer; size_t bufsize; char line[LINE_LENGTH]; size_t line_index; list_t *list; } build_etext_data_t; typedef struct { #ifdef USE_ZLIB gzFile gzf; #else FILE *fp; #endif int fd; char line[LINE_LENGTH]; size_t line_index; list_t *list; unsigned int first_call : 1; } detail_etext_data_t; typedef struct { char *directory; list_t *file; } directory_data_t; #endif /** Private variables **/ static list_t *etext_catalog = NULL; static unsigned int etext_catalog_count = 0; gutenfetch_etext_t **etext_catalog_block_alloc = NULL; /** Private Functions **/ #if 0 gutenfetch_error_t gutenfetch_detail_etexts( detail_etext_data_t *ddata, int (*)(void *, double, double, double, const char *), void *); #endif /** * gutenfetch_line_is_old_ebook_entry * * @param line The line which may or may not be an ebook entry. * @return NULL if it isn't, a valid gutenfetch_etext_t * */ gutenfetch_etext_t * gutenfetch_line_is_old_ebook_entry(char *line) { gutenfetch_etext_t *etext = NULL; list_t *match = NULL; list_t *lt = NULL; char temp_filebase[9]; int i; match = gutenfetch_ifilter_match(IFILTER_GUTINDEX_OLD, line); if (match != NULL) { /* we have a match */ etext = gutenfetch_etext_new(); if (etext == NULL) { list_remove_all(match, free); return NULL; } lt = list_first(match); if (lt == NULL) { gutenfetch_etext_free(etext); list_remove_all(match, free); return NULL; } /* fill in the full line. */ etext->full = strdup(lt->data); /* skip the month field. */ lt = list_next(lt); /* get the directory */ lt = list_next(lt); if (lt == NULL) { gutenfetch_etext_free(etext); list_remove_all(match, free); return NULL; } i = (int)strtol((char*)lt->data, NULL, 10); if ((i < 90) && (i >= 60)) { /* This deals with the new case where they list * the real date it was put into etext in front * so we don't get the directory right on the first * 10~ etexts * !!!! DAMN IT !!!! STOP CHANGING YOUR FUCKING * FORMAT! */ etext->directory = strdup("etext90"); } else { etext->directory = gutenfetch_util_strcat( "etext", lt->data, NULL); } /* get title and author */ lt = list_next(lt); if (lt == NULL) { gutenfetch_etext_free(etext); list_remove_all(match, free); return NULL; } etext->author = gutenfetch_util_get_author(lt->data); etext->title = gutenfetch_util_get_title(lt->data); /* get filebase */ lt = list_next(lt); if (lt == NULL) { gutenfetch_etext_free(etext); list_remove_all(match, free); return NULL; } memcpy(&temp_filebase[0], lt->data, 8); temp_filebase[8] = '\0'; for (i = 7;((temp_filebase[i] == 'x') && (i > 0)); --i) temp_filebase[i] = '\0'; etext->filebase = strdup(temp_filebase); /* Get the unique integer id. */ lt = list_next(lt); if (lt == NULL) { gutenfetch_etext_free(etext); list_remove_all(match, free); return NULL; } etext->id = (int)strtol((char*)lt->data, NULL, 10); /* Get the copyright/reserved/australia tag. */ lt = list_next(lt); if (lt == NULL) { gutenfetch_etext_free(etext); list_remove_all(match, free); return NULL; } if (lt->data != NULL) { if(((char*)lt->data)[0] == '*') { etext->cflag.reserved = 1; } else if (((char*)lt->data)[0] == 'C') { etext->cflag.copyright = 1; } else if (((char*)lt->data)[0] == 'A') { etext->cflag.australia = 1; } } list_remove_all(match, free); } return etext; } /** * gutenfetch_line_is_new_ebook_entry * * Given a NULL terminated text string, determine * if it describes a new ebook listing >= 10000 in * the GUTINDEX.ALL file. * * @param line The line which may or may not be a new * ebook listing. * @return NULL if it isn't or a valid and filled out * gutenfetch_etext_t * structure. */ gutenfetch_etext_t * gutenfetch_line_is_new_ebook_entry(char *line) { gutenfetch_etext_t *etext = NULL; list_t *match = NULL; list_t *lt = NULL; match = gutenfetch_ifilter_match(IFILTER_GUTINDEX_NEW, line); if (match != NULL) { /* initialize the etext. */ etext = gutenfetch_etext_new(); if (etext == NULL) { list_remove_all(match, free); return NULL; } lt = list_first(match); assert (lt != NULL); /* fill in full line. */ etext->full = strdup(lt->data); /* fill in author and title */ lt = list_next(lt); if (lt != NULL) { etext->author = gutenfetch_util_get_author(lt->data); etext->title = gutenfetch_util_get_title(lt->data);; } /* fill in id, directory, filebase */ lt = list_next(lt); if (lt != NULL) { if (strlen(lt->data) > 4) { etext->id = (int)strtol((char*)lt->data, NULL, 10); etext->filebase = strdup(lt->data); etext->directory = malloc(sizeof(char) * 15); assert(etext->directory != NULL); snprintf(etext->directory, 15, "%c/%c/%c/%c/%d", ((char*)lt->data)[0], ((char*)lt->data)[1], ((char*)lt->data)[2], ((char*)lt->data)[3], etext->id); } } /* fill in condition flags */ lt = list_next(lt); if (lt != NULL) { if (lt->data != NULL) { if(((char*)lt->data)[0] == 'A') { /* aussie */ etext->cflag.australia = 1; } else if (((char*)lt->data)[0] == 'C') {/* copyright */ etext->cflag.copyright = 1; } else if (((char*)lt->data)[0] == '*') { /* reserved */ etext->cflag.reserved = 1; } } } list_remove_all(match, free); } return etext; } /** * gutenfetch_line_is_ebook_entry * * @param line The line of text which may be an etext entry * from the GUTINDEX.ALL file. * @return NULL if it isn't or a valid and as filled in as * we can get gutenfetch_etext_t. */ gutenfetch_etext_t * gutenfetch_line_is_ebook_entry(char *line) { gutenfetch_etext_t *etext = NULL; etext = gutenfetch_line_is_old_ebook_entry(line); if (etext == NULL) { /* Not an old entry, maybe a new one? */ etext = gutenfetch_line_is_new_ebook_entry(line); } return etext; } /** Semi-Global Functions */ /** * gutenfetch_listing_init * * Initialize resources used by this module. */ void gutenfetch_listing_init(void) { } /** * gutenfetch_listing_shutdown * * Release resources used by this module. */ void gutenfetch_listing_shutdown(void) { if (etext_catalog != NULL) list_remove_all(etext_catalog, (void (*)(void*))gutenfetch_etext_free); FREE_NULL(etext_catalog_block_alloc); } /** Exported Global Functions **/ /** * gutenfetch_get_raw_listing * * This routine puts a buffer which has * the entire contents of the GUTINDEX.ALL or * GUTINDEX.AUS file or both concatenated together. * */ gutenfetch_error_t gutenfetch_get_raw_listing( char **buffer, listing_type_t type, int (*pfunc)(void *, double, double, double, const char *), void *pfunc_data) { int fd = -1; char *a = NULL; char *b = NULL; assert(buffer != NULL); if (*buffer != NULL) FREE_NULL(*buffer); if ((type == LIST_ALL) || (type == LIST_NON_AUSTRALIAN)) { fd = gutenfetch_cache_fetch(NON_AUSTRALIAN, "GUTINDEX.ALL", pfunc, pfunc_data); if (fd != -1) { b = gutenfetch_util_read_file_to_buffer(fd); } } if ((type == LIST_ALL) || (type == LIST_AUSTRALIAN)) { fd = gutenfetch_cache_fetch(AUSTRALIAN, "GUTINDEX.AUS", pfunc, pfunc_data); if (fd != -1) { a = gutenfetch_util_read_file_to_buffer(fd); } } /* now our internal buffers should have all the info we need. */ if ((a != NULL) && (b != NULL)) { *buffer = gutenfetch_util_strcat(b, a, NULL); FREE_NULL(a); FREE_NULL(b); } else if (a != NULL) { *buffer = a; } else if (b != NULL) { *buffer = b; } else { /* Unable to fetch gutindex.all or gutindex.aus. */ /* make an empty null-terminated buffer. */ *buffer = malloc( sizeof( char ) * 1); *buffer[0] = '\0'; } return GUTENFETCH_OK; } #if 0 /** * gutenfetch_detail_listing * * Detail a list of electronic texts. This function * does nothing unless gutenfetch_get_listing is called * first. */ gutenfetch_error_t gutenfetch_detail_listing( int (*pfunc)(void *, double, double, double, const char *), void *pfunc_data) { int fd; detail_etext_data_t ddata; #ifdef USE_ZLIB fd = gutenfetch_cache_fetch( NON_AUSTRALIAN, "ls-lR.gz", pfunc, pfunc_data); #else fd = gutenfetch_cache_fetch( NON_AUSTRALIAN, "ls-R", pfunc, pfunc_data); #endif if (fd != -1) { ddata.fd = fd; ddata.list = NULL; ddata.line_index = 0; ddata.first_call = TRUE; gutenfetch_detail_etexts(&ddata, pfunc, pfunc_data); close(fd); } return GUTENFETCH_OK; } /** * gutenfetch_detail_etexts * * This function takes the detail_etext_data_t structure * to get the temporary file which the ls-R or ls-lR.gz file * has been written to. It then scans this file for detailed * information regarding the etexts we obtained from GUTINDEX.ALL * * @param ddata The detail_etext_data_t structure. * */ gutenfetch_error_t gutenfetch_detail_etexts( detail_etext_data_t *ddata, int (*progress_func)(void *, double, double, double, const char *), void *progress_func_data) { char msg[4096]; unsigned int loops = 0; unsigned int count = 0, i; list_t *entry_lt = NULL; gutenfetch_etext_entry_t *entry = NULL; gutenfetch_etext_t *etext; file_info_t *file_info = NULL; directory_data_t *dir_data = NULL; directory_data_t *zip_dir_data = NULL; directory_data_t static_directory_data; char *line = NULL; list_t *ftemp = NULL; list_t *match = NULL; list_t *lt = NULL; list_t *cat_lt = NULL; #ifdef USE_ZLIB size_t size; ddata->gzf = gzdopen(ddata->fd, "rb"); if (ddata->gzf == NULL) { return GUTENFETCH_UNABLE_TO_DETAIL_LISTING; } /* Build a red-black tree from the ls-lR file */ while ( gzgets(ddata->gzf, ddata->line, LINE_LENGTH) != Z_NULL) { match = gutenfetch_ifilter_match( IFILTER_LS_LR_DETAIL_DIRECTORY, ddata->line); if (match != NULL) { lt = list_next(list_first(match)); if (lt != NULL) { add_directory_to_tree((const char *)lt->data, dir_tree, &dir_data); add_directory_to_tree((const char *)lt->data, zip_dir_tree, &zip_dir_data); } list_remove_all(match, free); } else if (dir_data != NULL) { match = gutenfetch_ifilter_match( IFILTER_LS_LR_DETAIL_ENTRY, ddata->line); if (match != NULL) { lt = list_next(list_first(match)); if (lt != NULL) { size = strtol((char*)lt->data, NULL, 10); } lt = list_next(list_next(list_next(lt))); if (lt != NULL) { file_info = file_info_new((char*)lt->data, size); assert(file_info != NULL); if (gutenfetch_util_extension_is("zip", (char *)lt->data)) { zip_dir_data->file = list_prepend(zip_dir_data->file, file_info); } else { dir_data->file = list_prepend(dir_data->file, file_info); } } } } } #else /* Build a red-black tree from the ls-R file */ ddata->fp = fdopen(ddata->fd, "r"); if (ddata->fp == NULL) { return GUTENFETCH_UNABLE_TO_DETAIL_LISTING; } fseek(ddata->fp, 0, SEEK_SET); for (line = fgets(ddata->line, LINE_LENGTH, ddata->fp); line != NULL; line = fgets(ddata->line, LINE_LENGTH, ddata->fp)) { match = gutenfetch_filter_match(ls_R_detail_directory_filter, ddata->line); if (match != NULL) { lt = list_next(list_first(match)); /* Get the second element. */ if (lt != NULL) { /* add an entry to the directory rb tree. */ add_directory_to_tree((const char *)lt->data, dir_tree, &dir_data); /* add an entry to the zipfile rb tree. */ add_directory_to_tree((const char *)lt->data, zip_dir_tree, &zip_dir_data); } list_remove_all(match, free); } else if (dir_data != NULL) { match = gutenfetch_filter_match(ls_R_detail_entry_filter, ddata->line); if (match != NULL) { lt = list_first(match); if (lt != NULL) { file_info = file_info_new((char*)lt->data, 0); assert(file_info != NULL); if (gutenfetch_util_extension_is("zip", (char*)lt->data)) { zip_dir_data->file = list_prepend(zip_dir_data->file, file_info); } else { dir_data->file = list_prepend(dir_data->file, file_info); } } } } } #endif /* USE_ZLIB */ /* Iterate through the etext_catalog and look for entries in the tree. * As they are found, add them to the gutenfetch_etext_entry_t ** array * within the etext_catalog. */ cat_lt = list_first(etext_catalog); while (cat_lt != NULL) { etext = (gutenfetch_etext_t*)cat_lt->data; if (etext != NULL) { if ((etext->directory != NULL) && (etext->filebase != NULL)) { if (progress_func != NULL) { if (etext->title != NULL) { snprintf(msg, 4096, "Detailing etext '%s'.", etext->title); } else { snprintf(msg, 4096, "Detailing etext #:%d.", etext->id); } progress_func( progress_func_data, (double)loops / (double)etext_catalog_count, etext_catalog_count, loops, msg); } static_directory_data.directory = etext->directory; if (static_directory_data.directory[strlen(static_directory_data.directory)-1] == '/') static_directory_data.directory[strlen(static_directory_data.directory)-1] = '\0'; static_directory_data.file = NULL; dir_data = rb_find(dir_tree, &static_directory_data); zip_dir_data = rb_find(zip_dir_tree, &static_directory_data); if (dir_data != NULL) { lt = list_first(dir_data->file); while (lt != NULL) { file_info = (file_info_t*)lt->data; /* This is the most common case. */ if (strncmp(file_info->filename, etext->filebase, strlen(etext->filebase)) == 0) { entry = gutenfetch_etext_entry_build_new( etext->directory, file_info->filename, file_info->filesize, zip_dir_data->file); /* Add entry to our entry list and increment the counter. */ if (entry != NULL) { entry_lt = list_prepend(entry_lt, entry); count++; } } else if ((etext->filebase[0] == '?') && ((file_info->filename[0] == '7') || (file_info->filename[0] == '8'))) { if (strncmp(&file_info->filename[1], &etext->filebase[1], strlen(etext->filebase) - 1) == 0) { entry = gutenfetch_etext_entry_build_new( etext->directory, file_info->filename, file_info->filesize, zip_dir_data->file); if (entry != NULL) { entry_lt = list_prepend(entry_lt, entry); ++count; } } } lt = list_next(lt); } /* Free any etext_entries which may be around. */ if (etext->entry != NULL) { i = 0; while(etext->entry[i] != NULL) { gutenfetch_etext_entry_free(etext->entry[i]); ++i; } } /* allocate new ones. */ count++; etext->entry = malloc(sizeof(gutenfetch_etext_entry_t*) * count); assert(etext->entry != NULL); ftemp = list_first(entry_lt); i = 0; while (ftemp != NULL) { etext->entry[i++] = (gutenfetch_etext_entry_t*)ftemp->data; ftemp = list_next(ftemp); } etext->entry[i] = NULL; } } } cat_lt = list_next(cat_lt); count = 0; list_remove_all(entry_lt, NULL); entry_lt = NULL; entry = NULL; loops++; } /* return success. */ return GUTENFETCH_OK; } #endif int tagged_entries_compare(const void *a, const void *b, void *data) { return strcmp((char*)a, (char *)b); } /** * gutenfetch_add_entries_in_file_to_catalog * * Read in a file in the format of GUTINDEX.??? * and add the valid entries to the etext_catalog. * * @param file The GUTINDEX.??? file. * @param pfunc the User supplied progress function. * @param pfunc_data the user supplied data for the progress function. */ void gutenfetch_add_entries_in_file_to_catalog( const char *file, int (*pfunc)(void *, double, double, double, const char *), void *pfunc_data) { struct rb_table *tagged_entries; char msg[4096]; char *str; char *line; char **probe_ret; gutenfetch_etext_t *text = NULL; int fd; FILE *fp; fd = gutenfetch_cache_fetch(NON_AUSTRALIAN, file, pfunc, pfunc_data); if (fd != -1) { fp = fdopen(fd, "r"); if (fp != NULL) { tagged_entries = rb_create(tagged_entries_compare, NULL, NULL); while( (line = gutenfetch_util_getline(fp)) != NULL) { text = gutenfetch_line_is_ebook_entry(line); if (text != NULL) { /* check that it isn't a duplicate. */ str = gutenfetch_util_strcat( text->directory, "+", text->filebase, NULL); probe_ret = (char**)rb_probe(tagged_entries, str); if (*(char**)probe_ret == str) { etext_catalog = list_prepend(etext_catalog, text); etext_catalog_count++; if (pfunc != NULL) { snprintf(msg, 4096, "Found %d etexts in %s.", etext_catalog_count, file); pfunc( pfunc_data, 0.0, etext_catalog_count, etext_catalog_count, msg); } } else { FREE_NULL(str); gutenfetch_etext_free(text); } } FREE_NULL(line); } rb_destroy(tagged_entries, (void(*)(void*, void*))free); fclose(fp); } } } /** * gutenfetch_get_listing * * Retreive a detailed listing of all available * electronic texts from the active PG server. * * @param etext A pointer to return the list. * @param type Which ebook listings we should concern * ourselves with. * @param pfunc A user provided function which is called * to update the progress of the operation or NULL. * @param pfunc_data A user provided pointer which is * passed as the first argument to pfunc. * @return GUTENFETCH_OK or an error code. */ gutenfetch_error_t gutenfetch_get_listing( gutenfetch_etext_t ***etext, listing_type_t type, int (*pfunc)(void *, double, double, double, const char *), void *pfunc_data) { list_t *lt = NULL; size_t list_size; if (( (type != LIST_NON_AUSTRALIAN) && (type != LIST_AUSTRALIAN) && (type != LIST_ALL) ) || (etext == NULL)) { return GUTENFETCH_BAD_PARAM; } if ((type == LIST_ALL) || (type == LIST_NON_AUSTRALIAN)) gutenfetch_add_entries_in_file_to_catalog("GUTINDEX.ALL", pfunc, pfunc_data); if ((type == LIST_ALL) || (type == LIST_AUSTRALIAN)) gutenfetch_add_entries_in_file_to_catalog("GUTINDEX.AUS", pfunc, pfunc_data); /* convert the etext catalog into a more standard array. */ if (*etext != NULL) FREE_NULL(*etext); *etext = malloc(sizeof(gutenfetch_etext_t *) * (etext_catalog_count + 1)); etext_catalog_block_alloc = *etext; if (*etext == NULL) return GUTENFETCH_NOMEM; list_size = etext_catalog_count; (*etext)[list_size--] = NULL; lt = list_first(etext_catalog); while (lt != NULL) { /* fill up the array with our etexts */ (*etext)[list_size--] = (gutenfetch_etext_t*)lt->data; lt = list_next(lt); if (pfunc != NULL) { pfunc( pfunc_data, (double)(etext_catalog_count - list_size) / (double)etext_catalog_count, etext_catalog_count, (etext_catalog_count - list_size), "Building etexts"); } } return GUTENFETCH_OK; }