/*
gutenfetch - a small utility to list and fetch books available through
project gutenberg
Copyright (C) 2001, 2002, 2003, 2004 Russell Francis
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the
Free Software Foundation, Inc.
59 Temple Place, Suite 330
Boston, MA 02111-1307 USA
Last updated on $Date: 2004/07/07 02:41:22 $ by $Author: johntabularasa $.
*/
#include "stddefs.h"
#include "gutenfetch.h"
#include "libgutenfetch_init.h"
#include "libgutenfetch_fileinfo.h"
#include "libgutenfetch_servers.h"
#include "libgutenfetch_utility.h"
#include "libgutenfetch_filter.h"
#include "libgutenfetch_listing.h"
#include "libgutenfetch_etext.h"
#include "libgutenfetch_cache.h"
#include "rb.h"
#ifdef USE_ZLIB
# if (HAVE_ZLIB_H == 1)
# include <zlib.h>
# endif
#endif
#ifdef HAVE_ASSERT_H
# include <assert.h>
#endif
#ifdef HAVE_STDLIB_H
# include <stdlib.h>
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#endif
#ifdef HAVE_STRINGS_H
# include <strings.h>
#endif
#ifdef HAVE_TIME_H
# include <time.h>
#endif
#ifdef HAVE_PTHREAD
#ifdef HAVE_PTHREAD_H
#include <pthread.h>
#endif /* HAVE_PTHREAD_H */
#endif /* HAVE_PTHREAD */
/** Private definitions **/
#if 0
typedef struct {
char *directory;
list_t *contents;
} detailed_data_t;
#define LINE_LENGTH 4096
typedef struct {
char *buffer;
size_t bufsize;
char line[LINE_LENGTH];
size_t line_index;
list_t *list;
} build_etext_data_t;
typedef struct {
#ifdef USE_ZLIB
gzFile gzf;
#else
FILE *fp;
#endif
int fd;
char line[LINE_LENGTH];
size_t line_index;
list_t *list;
unsigned int first_call : 1;
} detail_etext_data_t;
typedef struct {
char *directory;
list_t *file;
} directory_data_t;
#endif
/** Private variables **/
static list_t *etext_catalog = NULL;
static unsigned int etext_catalog_count = 0;
gutenfetch_etext_t **etext_catalog_block_alloc = NULL;
/** Private Functions **/
#if 0
gutenfetch_error_t
gutenfetch_detail_etexts(
detail_etext_data_t *ddata,
int (*)(void *, double, double, double, const char *),
void *);
#endif
/**
* gutenfetch_line_is_old_ebook_entry
*
* @param line The line which may or may not be an ebook entry.
* @return NULL if it isn't, a valid gutenfetch_etext_t *
*/
gutenfetch_etext_t *
gutenfetch_line_is_old_ebook_entry(char *line)
{
gutenfetch_etext_t *etext = NULL;
list_t *match = NULL;
list_t *lt = NULL;
char temp_filebase[9];
int i;
match = gutenfetch_ifilter_match(IFILTER_GUTINDEX_OLD, line);
if (match != NULL) { /* we have a match */
etext = gutenfetch_etext_new();
if (etext == NULL) {
list_remove_all(match, free);
return NULL;
}
lt = list_first(match);
if (lt == NULL) {
gutenfetch_etext_free(etext);
list_remove_all(match, free);
return NULL;
}
/* fill in the full line. */
etext->full = strdup(lt->data);
/* skip the month field. */
lt = list_next(lt);
/* get the directory */
lt = list_next(lt);
if (lt == NULL) {
gutenfetch_etext_free(etext);
list_remove_all(match, free);
return NULL;
}
i = (int)strtol((char*)lt->data, NULL, 10);
if ((i < 90) && (i >= 60)) {
/* This deals with the new case where they list
* the real date it was put into etext in front
* so we don't get the directory right on the first
* 10~ etexts
* !!!! DAMN IT !!!! STOP CHANGING YOUR FUCKING
* FORMAT!
*/
etext->directory = strdup("etext90");
} else {
etext->directory = gutenfetch_util_strcat(
"etext", lt->data, NULL);
}
/* get title and author */
lt = list_next(lt);
if (lt == NULL) {
gutenfetch_etext_free(etext);
list_remove_all(match, free);
return NULL;
}
etext->author = gutenfetch_util_get_author(lt->data);
etext->title = gutenfetch_util_get_title(lt->data);
/* get filebase */
lt = list_next(lt);
if (lt == NULL) {
gutenfetch_etext_free(etext);
list_remove_all(match, free);
return NULL;
}
memcpy(&temp_filebase[0], lt->data, 8);
temp_filebase[8] = '\0';
for (i = 7;((temp_filebase[i] == 'x') && (i > 0)); --i)
temp_filebase[i] = '\0';
etext->filebase = strdup(temp_filebase);
/* Get the unique integer id. */
lt = list_next(lt);
if (lt == NULL) {
gutenfetch_etext_free(etext);
list_remove_all(match, free);
return NULL;
}
etext->id = (int)strtol((char*)lt->data, NULL, 10);
/* Get the copyright/reserved/australia tag. */
lt = list_next(lt);
if (lt == NULL) {
gutenfetch_etext_free(etext);
list_remove_all(match, free);
return NULL;
}
if (lt->data != NULL) {
if(((char*)lt->data)[0] == '*') {
etext->cflag.reserved = 1;
} else if (((char*)lt->data)[0] == 'C') {
etext->cflag.copyright = 1;
} else if (((char*)lt->data)[0] == 'A') {
etext->cflag.australia = 1;
}
}
list_remove_all(match, free);
}
return etext;
}
/**
* gutenfetch_line_is_new_ebook_entry
*
* Given a NULL terminated text string, determine
* if it describes a new ebook listing >= 10000 in
* the GUTINDEX.ALL file.
*
* @param line The line which may or may not be a new
* ebook listing.
* @return NULL if it isn't or a valid and filled out
* gutenfetch_etext_t * structure.
*/
gutenfetch_etext_t *
gutenfetch_line_is_new_ebook_entry(char *line)
{
gutenfetch_etext_t *etext = NULL;
list_t *match = NULL;
list_t *lt = NULL;
match = gutenfetch_ifilter_match(IFILTER_GUTINDEX_NEW, line);
if (match != NULL) {
/* initialize the etext. */
etext = gutenfetch_etext_new();
if (etext == NULL) {
list_remove_all(match, free);
return NULL;
}
lt = list_first(match);
assert (lt != NULL);
/* fill in full line. */
etext->full = strdup(lt->data);
/* fill in author and title */
lt = list_next(lt);
if (lt != NULL) {
etext->author = gutenfetch_util_get_author(lt->data);
etext->title = gutenfetch_util_get_title(lt->data);;
}
/* fill in id, directory, filebase */
lt = list_next(lt);
if (lt != NULL) {
if (strlen(lt->data) > 4) {
etext->id = (int)strtol((char*)lt->data, NULL, 10);
etext->filebase = strdup(lt->data);
etext->directory = malloc(sizeof(char) * 15);
assert(etext->directory != NULL);
snprintf(etext->directory, 15, "%c/%c/%c/%c/%d",
((char*)lt->data)[0],
((char*)lt->data)[1],
((char*)lt->data)[2],
((char*)lt->data)[3],
etext->id);
}
}
/* fill in condition flags */
lt = list_next(lt);
if (lt != NULL) {
if (lt->data != NULL) {
if(((char*)lt->data)[0] == 'A') { /* aussie */
etext->cflag.australia = 1;
} else if (((char*)lt->data)[0] == 'C') {/* copyright */
etext->cflag.copyright = 1;
} else if (((char*)lt->data)[0] == '*') { /* reserved */
etext->cflag.reserved = 1;
}
}
}
list_remove_all(match, free);
}
return etext;
}
/**
* gutenfetch_line_is_ebook_entry
*
* @param line The line of text which may be an etext entry
* from the GUTINDEX.ALL file.
* @return NULL if it isn't or a valid and as filled in as
* we can get gutenfetch_etext_t.
*/
gutenfetch_etext_t *
gutenfetch_line_is_ebook_entry(char *line)
{
gutenfetch_etext_t *etext = NULL;
etext = gutenfetch_line_is_old_ebook_entry(line);
if (etext == NULL) { /* Not an old entry, maybe a new one? */
etext = gutenfetch_line_is_new_ebook_entry(line);
}
return etext;
}
/** Semi-Global Functions */
/**
* gutenfetch_listing_init
*
* Initialize resources used by this module.
*/
void
gutenfetch_listing_init(void)
{
}
/**
* gutenfetch_listing_shutdown
*
* Release resources used by this module.
*/
void
gutenfetch_listing_shutdown(void)
{
if (etext_catalog != NULL)
list_remove_all(etext_catalog, (void (*)(void*))gutenfetch_etext_free);
FREE_NULL(etext_catalog_block_alloc);
}
/** Exported Global Functions **/
/**
* gutenfetch_get_raw_listing
*
* This routine puts a buffer which has
* the entire contents of the GUTINDEX.ALL or
* GUTINDEX.AUS file or both concatenated together.
*
*/
gutenfetch_error_t
gutenfetch_get_raw_listing(
char **buffer,
listing_type_t type,
int (*pfunc)(void *, double, double, double, const char *),
void *pfunc_data)
{
int fd = -1;
char *a = NULL;
char *b = NULL;
assert(buffer != NULL);
if (*buffer != NULL)
FREE_NULL(*buffer);
if ((type == LIST_ALL) || (type == LIST_NON_AUSTRALIAN)) {
fd = gutenfetch_cache_fetch(NON_AUSTRALIAN, "GUTINDEX.ALL", pfunc, pfunc_data);
if (fd != -1) {
b = gutenfetch_util_read_file_to_buffer(fd);
}
}
if ((type == LIST_ALL) || (type == LIST_AUSTRALIAN)) {
fd = gutenfetch_cache_fetch(AUSTRALIAN, "GUTINDEX.AUS", pfunc, pfunc_data);
if (fd != -1) {
a = gutenfetch_util_read_file_to_buffer(fd);
}
}
/* now our internal buffers should have all the info we need. */
if ((a != NULL) && (b != NULL)) {
*buffer = gutenfetch_util_strcat(b, a, NULL);
FREE_NULL(a);
FREE_NULL(b);
} else if (a != NULL) {
*buffer = a;
} else if (b != NULL) {
*buffer = b;
} else { /* Unable to fetch gutindex.all or gutindex.aus. */
/* make an empty null-terminated buffer. */
*buffer = malloc( sizeof( char ) * 1);
*buffer[0] = '\0';
}
return GUTENFETCH_OK;
}
#if 0
/**
* gutenfetch_detail_listing
*
* Detail a list of electronic texts. This function
* does nothing unless gutenfetch_get_listing is called
* first.
*/
gutenfetch_error_t
gutenfetch_detail_listing(
int (*pfunc)(void *, double, double, double, const char *),
void *pfunc_data)
{
int fd;
detail_etext_data_t ddata;
#ifdef USE_ZLIB
fd = gutenfetch_cache_fetch(
NON_AUSTRALIAN, "ls-lR.gz", pfunc, pfunc_data);
#else
fd = gutenfetch_cache_fetch(
NON_AUSTRALIAN, "ls-R", pfunc, pfunc_data);
#endif
if (fd != -1) {
ddata.fd = fd;
ddata.list = NULL;
ddata.line_index = 0;
ddata.first_call = TRUE;
gutenfetch_detail_etexts(&ddata, pfunc, pfunc_data);
close(fd);
}
return GUTENFETCH_OK;
}
/**
* gutenfetch_detail_etexts
*
* This function takes the detail_etext_data_t structure
* to get the temporary file which the ls-R or ls-lR.gz file
* has been written to. It then scans this file for detailed
* information regarding the etexts we obtained from GUTINDEX.ALL
*
* @param ddata The detail_etext_data_t structure.
*
*/
gutenfetch_error_t
gutenfetch_detail_etexts(
detail_etext_data_t *ddata,
int (*progress_func)(void *, double, double, double, const char *),
void *progress_func_data)
{
char msg[4096];
unsigned int loops = 0;
unsigned int count = 0, i;
list_t *entry_lt = NULL;
gutenfetch_etext_entry_t *entry = NULL;
gutenfetch_etext_t *etext;
file_info_t *file_info = NULL;
directory_data_t *dir_data = NULL;
directory_data_t *zip_dir_data = NULL;
directory_data_t static_directory_data;
char *line = NULL;
list_t *ftemp = NULL;
list_t *match = NULL;
list_t *lt = NULL;
list_t *cat_lt = NULL;
#ifdef USE_ZLIB
size_t size;
ddata->gzf = gzdopen(ddata->fd, "rb");
if (ddata->gzf == NULL) {
return GUTENFETCH_UNABLE_TO_DETAIL_LISTING;
}
/* Build a red-black tree from the ls-lR file */
while ( gzgets(ddata->gzf, ddata->line, LINE_LENGTH) != Z_NULL) {
match = gutenfetch_ifilter_match(
IFILTER_LS_LR_DETAIL_DIRECTORY,
ddata->line);
if (match != NULL) {
lt = list_next(list_first(match));
if (lt != NULL) {
add_directory_to_tree((const char *)lt->data,
dir_tree, &dir_data);
add_directory_to_tree((const char *)lt->data,
zip_dir_tree, &zip_dir_data);
}
list_remove_all(match, free);
} else if (dir_data != NULL) {
match = gutenfetch_ifilter_match(
IFILTER_LS_LR_DETAIL_ENTRY,
ddata->line);
if (match != NULL) {
lt = list_next(list_first(match));
if (lt != NULL) {
size = strtol((char*)lt->data, NULL, 10);
}
lt = list_next(list_next(list_next(lt)));
if (lt != NULL) {
file_info = file_info_new((char*)lt->data, size);
assert(file_info != NULL);
if (gutenfetch_util_extension_is("zip", (char *)lt->data)) {
zip_dir_data->file = list_prepend(zip_dir_data->file,
file_info);
} else {
dir_data->file = list_prepend(dir_data->file, file_info);
}
}
}
}
}
#else
/* Build a red-black tree from the ls-R file */
ddata->fp = fdopen(ddata->fd, "r");
if (ddata->fp == NULL) {
return GUTENFETCH_UNABLE_TO_DETAIL_LISTING;
}
fseek(ddata->fp, 0, SEEK_SET);
for (line = fgets(ddata->line, LINE_LENGTH, ddata->fp);
line != NULL;
line = fgets(ddata->line, LINE_LENGTH, ddata->fp))
{
match = gutenfetch_filter_match(ls_R_detail_directory_filter,
ddata->line);
if (match != NULL) {
lt = list_next(list_first(match)); /* Get the second element. */
if (lt != NULL) {
/* add an entry to the directory rb tree. */
add_directory_to_tree((const char *)lt->data, dir_tree, &dir_data);
/* add an entry to the zipfile rb tree. */
add_directory_to_tree((const char *)lt->data, zip_dir_tree, &zip_dir_data);
}
list_remove_all(match, free);
} else if (dir_data != NULL) {
match = gutenfetch_filter_match(ls_R_detail_entry_filter,
ddata->line);
if (match != NULL) {
lt = list_first(match);
if (lt != NULL) {
file_info = file_info_new((char*)lt->data, 0);
assert(file_info != NULL);
if (gutenfetch_util_extension_is("zip", (char*)lt->data)) {
zip_dir_data->file = list_prepend(zip_dir_data->file, file_info);
} else {
dir_data->file = list_prepend(dir_data->file, file_info);
}
}
}
}
}
#endif /* USE_ZLIB */
/* Iterate through the etext_catalog and look for entries in the tree.
* As they are found, add them to the gutenfetch_etext_entry_t ** array
* within the etext_catalog.
*/
cat_lt = list_first(etext_catalog);
while (cat_lt != NULL) {
etext = (gutenfetch_etext_t*)cat_lt->data;
if (etext != NULL) {
if ((etext->directory != NULL) && (etext->filebase != NULL)) {
if (progress_func != NULL) {
if (etext->title != NULL) {
snprintf(msg, 4096, "Detailing etext '%s'.", etext->title);
} else {
snprintf(msg, 4096, "Detailing etext #:%d.", etext->id);
}
progress_func(
progress_func_data,
(double)loops / (double)etext_catalog_count,
etext_catalog_count,
loops,
msg);
}
static_directory_data.directory = etext->directory;
if (static_directory_data.directory[strlen(static_directory_data.directory)-1] == '/')
static_directory_data.directory[strlen(static_directory_data.directory)-1] = '\0';
static_directory_data.file = NULL;
dir_data = rb_find(dir_tree, &static_directory_data);
zip_dir_data = rb_find(zip_dir_tree, &static_directory_data);
if (dir_data != NULL) {
lt = list_first(dir_data->file);
while (lt != NULL) {
file_info = (file_info_t*)lt->data;
/* This is the most common case. */
if (strncmp(file_info->filename, etext->filebase, strlen(etext->filebase)) == 0) {
entry = gutenfetch_etext_entry_build_new(
etext->directory,
file_info->filename,
file_info->filesize,
zip_dir_data->file);
/* Add entry to our entry list and increment the counter. */
if (entry != NULL) {
entry_lt = list_prepend(entry_lt, entry);
count++;
}
} else if ((etext->filebase[0] == '?') &&
((file_info->filename[0] == '7') || (file_info->filename[0] == '8'))) {
if (strncmp(&file_info->filename[1], &etext->filebase[1], strlen(etext->filebase) - 1) == 0) {
entry = gutenfetch_etext_entry_build_new(
etext->directory,
file_info->filename,
file_info->filesize,
zip_dir_data->file);
if (entry != NULL) {
entry_lt = list_prepend(entry_lt, entry);
++count;
}
}
}
lt = list_next(lt);
}
/* Free any etext_entries which may be around. */
if (etext->entry != NULL) {
i = 0;
while(etext->entry[i] != NULL) {
gutenfetch_etext_entry_free(etext->entry[i]);
++i;
}
}
/* allocate new ones. */
count++;
etext->entry = malloc(sizeof(gutenfetch_etext_entry_t*) * count);
assert(etext->entry != NULL);
ftemp = list_first(entry_lt);
i = 0;
while (ftemp != NULL) {
etext->entry[i++] = (gutenfetch_etext_entry_t*)ftemp->data;
ftemp = list_next(ftemp);
}
etext->entry[i] = NULL;
}
}
}
cat_lt = list_next(cat_lt);
count = 0;
list_remove_all(entry_lt, NULL);
entry_lt = NULL;
entry = NULL;
loops++;
}
/* return success. */
return GUTENFETCH_OK;
}
#endif
int
tagged_entries_compare(const void *a, const void *b, void *data)
{
return strcmp((char*)a, (char *)b);
}
/**
* gutenfetch_add_entries_in_file_to_catalog
*
* Read in a file in the format of GUTINDEX.???
* and add the valid entries to the etext_catalog.
*
* @param file The GUTINDEX.??? file.
* @param pfunc the User supplied progress function.
* @param pfunc_data the user supplied data for the progress function.
*/
void
gutenfetch_add_entries_in_file_to_catalog(
const char *file,
int (*pfunc)(void *, double, double, double, const char *),
void *pfunc_data)
{
struct rb_table *tagged_entries;
char msg[4096];
char *str;
char *line;
char **probe_ret;
gutenfetch_etext_t *text = NULL;
int fd;
FILE *fp;
fd = gutenfetch_cache_fetch(NON_AUSTRALIAN, file, pfunc, pfunc_data);
if (fd != -1) {
fp = fdopen(fd, "r");
if (fp != NULL) {
tagged_entries = rb_create(tagged_entries_compare, NULL, NULL);
while( (line = gutenfetch_util_getline(fp)) != NULL) {
text = gutenfetch_line_is_ebook_entry(line);
if (text != NULL) {
/* check that it isn't a duplicate. */
str = gutenfetch_util_strcat(
text->directory, "+", text->filebase, NULL);
probe_ret = (char**)rb_probe(tagged_entries, str);
if (*(char**)probe_ret == str) {
etext_catalog = list_prepend(etext_catalog, text);
etext_catalog_count++;
if (pfunc != NULL) {
snprintf(msg, 4096,
"Found %d etexts in %s.",
etext_catalog_count, file);
pfunc(
pfunc_data,
0.0,
etext_catalog_count,
etext_catalog_count,
msg);
}
} else {
FREE_NULL(str);
gutenfetch_etext_free(text);
}
}
FREE_NULL(line);
}
rb_destroy(tagged_entries, (void(*)(void*, void*))free);
fclose(fp);
}
}
}
/**
* gutenfetch_get_listing
*
* Retreive a detailed listing of all available
* electronic texts from the active PG server.
*
* @param etext A pointer to return the list.
* @param type Which ebook listings we should concern
* ourselves with.
* @param pfunc A user provided function which is called
* to update the progress of the operation or NULL.
* @param pfunc_data A user provided pointer which is
* passed as the first argument to pfunc.
* @return GUTENFETCH_OK or an error code.
*/
gutenfetch_error_t
gutenfetch_get_listing(
gutenfetch_etext_t ***etext,
listing_type_t type,
int (*pfunc)(void *, double, double, double, const char *),
void *pfunc_data)
{
list_t *lt = NULL;
size_t list_size;
if (( (type != LIST_NON_AUSTRALIAN) &&
(type != LIST_AUSTRALIAN) &&
(type != LIST_ALL) ) || (etext == NULL))
{
return GUTENFETCH_BAD_PARAM;
}
if ((type == LIST_ALL) || (type == LIST_NON_AUSTRALIAN))
gutenfetch_add_entries_in_file_to_catalog("GUTINDEX.ALL", pfunc, pfunc_data);
if ((type == LIST_ALL) || (type == LIST_AUSTRALIAN))
gutenfetch_add_entries_in_file_to_catalog("GUTINDEX.AUS", pfunc, pfunc_data);
/* convert the etext catalog into a more standard array. */
if (*etext != NULL)
FREE_NULL(*etext);
*etext = malloc(sizeof(gutenfetch_etext_t *) * (etext_catalog_count + 1));
etext_catalog_block_alloc = *etext;
if (*etext == NULL)
return GUTENFETCH_NOMEM;
list_size = etext_catalog_count;
(*etext)[list_size--] = NULL;
lt = list_first(etext_catalog);
while (lt != NULL) { /* fill up the array with our etexts */
(*etext)[list_size--] = (gutenfetch_etext_t*)lt->data;
lt = list_next(lt);
if (pfunc != NULL) {
pfunc(
pfunc_data,
(double)(etext_catalog_count - list_size) / (double)etext_catalog_count,
etext_catalog_count,
(etext_catalog_count - list_size),
"Building etexts");
}
}
return GUTENFETCH_OK;
}
syntax highlighted by Code2HTML, v. 0.9.1