/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
 * gsf-input-gzip.c: wrapper to uncompress gzipped input
 *
 * Copyright (C) 2002-2006 Jody Goldberg (jody@gnome.org)
 * Copyright (C) 2005-2006 Morten Welinder (terra@gnome.org)
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2.1 of the GNU Lesser General Public
 * License as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */

#include <gsf-config.h>
#include <gsf/gsf-input-gzip.h>
#include <gsf/gsf-input-impl.h>
#include <gsf/gsf-impl-utils.h>
#include <gsf/gsf-utils.h>

#include <zlib.h>
#include <stdio.h>
#include <string.h>

#define Z_BUFSIZE 0x100

static GObjectClass *parent_class;

struct _GsfInputGZip {
	GsfInput input;

	GsfInput *source; /* compressed data */
	gboolean raw; /* No header and no trailer.  */
	GError *err;
	gsf_off_t uncompressed_size;
	gboolean stop_byte_added;

	z_stream  stream;
	guint8 const *gzipped_data;
	uLong     crc;     /* crc32 of uncompressed data */

	guint8   *buf;
	size_t    buf_size;

	gsf_off_t header_size, trailer_size;
	gsf_off_t seek_skipped;
};

typedef struct {
	GsfInputClass input_class;
} GsfInputGZipClass;

enum {
	PROP_0,
	PROP_RAW,
	PROP_SOURCE,
	PROP_UNCOMPRESSED_SIZE
};

/* gzip flag byte */
#define GZIP_IS_ASCII		0x01 /* file contains text ? */
#define GZIP_HEADER_CRC		0x02 /* there is a CRC in the header */
#define GZIP_EXTRA_FIELD	0x04 /* there is an 'extra' field */
#define GZIP_ORIGINAL_NAME	0x08 /* the original is stored */
#define GZIP_HAS_COMMENT	0x10 /* There is a comment in the header */
#define GZIP_HEADER_FLAGS (unsigned)(GZIP_IS_ASCII |GZIP_HEADER_CRC |GZIP_EXTRA_FIELD |GZIP_ORIGINAL_NAME |GZIP_HAS_COMMENT)

static gboolean
check_header (GsfInputGZip *input)
{
	if (input->raw) {
		input->header_size = 0;
		input->trailer_size = 0;
	} else {
		static guint8 const signature[2] = {0x1f, 0x8b};
		guint8 const *data;
		unsigned flags, len;

		/* Check signature */
		if (NULL == (data = gsf_input_read (input->source, 2 + 1 + 1 + 6, NULL)) ||
		    0 != memcmp (data, signature, sizeof (signature)))
			return TRUE;

		/* verify flags and compression type */
		flags  = data[3];
		if (data[2] != Z_DEFLATED || (flags & ~GZIP_HEADER_FLAGS) != 0)
			return TRUE;

		/* If we have the size, don't bother seeking to the end.  */
		if (input->uncompressed_size < 0) {
			/* Get the uncompressed size */
			if (gsf_input_seek (input->source, (gsf_off_t) -4, G_SEEK_END) ||
			    NULL == (data = gsf_input_read (input->source, 4, NULL)))
				return TRUE;
			/* FIXME, but how?  The size read here is modulo 2^32.  */
			input->uncompressed_size = GSF_LE_GET_GUINT32 (data);

			if (input->uncompressed_size / 1000 > gsf_input_size (input->source)) {
				g_warning ("Suspiciously well compressed file with better than 1000:1 ratio.\n"
					   "It is probably truncated or corrupt");
			}
		}

		if (gsf_input_seek (input->source, 2 + 1 + 1 + 6, G_SEEK_SET))
			return TRUE;

		if (flags & GZIP_EXTRA_FIELD) {
			if (NULL == (data = gsf_input_read (input->source, 2, NULL)))
				return TRUE;
			len = GSF_LE_GET_GUINT16 (data);
			if (NULL == gsf_input_read (input->source, len, NULL))
				return TRUE;
		}
		if (flags & GZIP_ORIGINAL_NAME) {
			/* Skip over the filename (which is in ISO 8859-1 encoding).  */
			do {
				if (NULL == (data = gsf_input_read (input->source, 1, NULL)))
					return TRUE;
			} while (*data != 0);
		}

		if (flags & GZIP_HAS_COMMENT) {
			/* Skip over the comment (which is in ISO 8859-1 encoding).  */
			do {
				if (NULL == (data = gsf_input_read (input->source, 1, NULL)))
					return TRUE;
			} while (*data != 0);
		}

		if (flags & GZIP_HEADER_CRC &&
		    NULL == (data = gsf_input_read (input->source, 2, NULL)))
			return TRUE;

		input->header_size = input->source->cur_offset;
		/* the last 8 bytes are the crc and size.  */
		input->trailer_size = 8;
	}

	gsf_input_set_size (GSF_INPUT (input), input->uncompressed_size);

	if (gsf_input_remaining (input->source) < input->trailer_size)
		return TRUE;	/* No room for payload */

	return FALSE;
}

static gboolean
init_zip (GsfInputGZip *gzip, GError **err)
{
	gsf_off_t cur_pos;

	if (Z_OK != inflateInit2 (&(gzip->stream), -MAX_WBITS)) {
		if (err != NULL)
			*err = g_error_new (gsf_input_error_id (), 0,
				"Unable to initialize zlib");
		return TRUE;
	}

	cur_pos = gsf_input_tell (gzip->source);
	if (gsf_input_seek (gzip->source, 0, G_SEEK_SET)) {
		if (err)
			*err = g_error_new (gsf_input_error_id (), 0,
					    "Failed to rewind source");
		return TRUE;
	}

	if (check_header (gzip) != FALSE) {
		if (err != NULL)
			*err = g_error_new (gsf_input_error_id (), 0,
				"Invalid gzip header");
		if (gsf_input_seek (gzip->source, cur_pos, G_SEEK_SET)) {
			g_warning ("attempt to restore position failed ??");
		}
		return TRUE;
	}

	return FALSE;
}

/**
 * gsf_input_gzip_new :
 * @source : The underlying data source.
 * @err	   : optionally %NULL.
 *
 * Adds a reference to @source.
 *
 * Returns: a new file or %NULL.
 **/
GsfInput *
gsf_input_gzip_new (GsfInput *source, GError **err)
{
	GsfInputGZip *gzip;

	g_return_val_if_fail (GSF_IS_INPUT (source), NULL);

	gzip = g_object_new (GSF_INPUT_GZIP_TYPE,
			     "source", source,
			     NULL);
	if (G_UNLIKELY (NULL == gzip)) return NULL;

	if (gzip->err) {
		if (err)
			*err = g_error_copy (gzip->err);
		g_object_unref (gzip);
		return NULL;
	}
	gsf_input_set_name (GSF_INPUT (gzip), gsf_input_name (source));

	return GSF_INPUT (gzip);
}

static void
gsf_input_gzip_finalize (GObject *obj)
{
	GsfInputGZip *gzip = (GsfInputGZip *)obj;

	if (gzip->source != NULL) {
		g_object_unref (G_OBJECT (gzip->source));
		gzip->source = NULL;
	}

	g_free (gzip->buf);

	if (gzip->stream.state != NULL)
		inflateEnd (&(gzip->stream));

	g_clear_error (&gzip->err);

	parent_class->finalize (obj);
}

static GsfInput *
gsf_input_gzip_dup (GsfInput *src_input, GError **err)
{
	GsfInputGZip const *src = (GsfInputGZip *)src_input;
	GsfInputGZip *dst;
	GsfInput *src_source_copy;

	if (src->source) {
		src_source_copy = gsf_input_dup (src->source, err);
		if (err)
			return NULL;
	} else
		src_source_copy = NULL;

	dst = g_object_new (GSF_INPUT_GZIP_TYPE,
			    "source", src_source_copy,
			    "raw", src->raw,
			    NULL);

	if (src_source_copy)
		g_object_unref (src_source_copy);

	if (G_UNLIKELY (NULL == dst))
		return NULL;

	if (src->err) {
		g_clear_error (&dst->err);
		dst->err = g_error_copy (src->err);
	} else if (dst->err) {
		if (err)
			*err = g_error_copy (dst->err);
		g_object_unref (dst);
		return NULL;
	}

	return GSF_INPUT (dst);
}

static guint8 const *
gsf_input_gzip_read (GsfInput *input, size_t num_bytes, guint8 *buffer)
{
	GsfInputGZip *gzip = GSF_INPUT_GZIP (input);

	if (buffer == NULL) {
		if (gzip->buf_size < num_bytes) {
			gzip->buf_size = MAX (num_bytes, 256);
			g_free (gzip->buf);
			gzip->buf = g_new (guint8, gzip->buf_size);
		}
		buffer = gzip->buf;
	}

	gzip->stream.next_out = buffer;
	gzip->stream.avail_out = num_bytes;
	while (gzip->stream.avail_out != 0) {
		int zerr;
		if (gzip->stream.avail_in == 0) {
			gsf_off_t remain = gsf_input_remaining (gzip->source);
			if (remain <= gzip->trailer_size) {
				if (remain < gzip->trailer_size || gzip->stop_byte_added) {
					g_clear_error (&gzip->err);
					gzip->err = g_error_new
						(gsf_input_error_id (), 0,
						 "truncated source");
					return NULL;
				}
				/* zlib requires an extra byte.  */
				gzip->stream.avail_in = 1;
				gzip->gzipped_data = "";
				gzip->stop_byte_added = TRUE;
			} else {
				size_t n = MIN (remain - gzip->trailer_size,
						Z_BUFSIZE);

				gzip->gzipped_data =
					gsf_input_read (gzip->source, n, NULL);
				if (!gzip->gzipped_data) {
					g_clear_error (&gzip->err);
					gzip->err = g_error_new
						(gsf_input_error_id (), 0,
						 "Failed to read from source");
					return NULL;
				}
				gzip->stream.avail_in = n;
			}
			gzip->stream.next_in = (Byte *)gzip->gzipped_data;
		}
		zerr = inflate (&(gzip->stream), Z_NO_FLUSH);
		if (zerr != Z_OK) {
			if (zerr != Z_STREAM_END)
				return NULL;
			/* Premature end of stream.  */
			if (gzip->stream.avail_out != 0)
				return NULL;
		}
	}

	gzip->crc = crc32 (gzip->crc, buffer, (uInt)(gzip->stream.next_out - buffer));
	return buffer;
}

static gboolean
gsf_input_gzip_seek (GsfInput *input, gsf_off_t offset, GSeekType whence)
{
	GsfInputGZip *gzip = GSF_INPUT_GZIP (input);
	/* Global flag -- we don't want one per stream.  */
	static gboolean warned = FALSE;
	gsf_off_t pos = offset;

	/* Note, that pos has already been sanity checked.  */
	switch (whence) {
	case G_SEEK_SET : break;
	case G_SEEK_CUR : pos += input->cur_offset;	break;
	case G_SEEK_END : pos += input->size;		break;
	default : return TRUE;
	}

	if (pos < input->cur_offset) {
		if (gsf_input_seek (gzip->source, gzip->header_size, G_SEEK_SET))
			return TRUE;
		gzip->crc = crc32 (0L, Z_NULL, 0);
		gzip->stream.avail_in = 0;
		if (inflateReset (&(gzip->stream)) != Z_OK)
			return TRUE;
		input->cur_offset = 0;
	}

	if (gsf_input_seek_emulate (input, pos))
		return TRUE;

	gzip->seek_skipped += pos;
	if (!warned &&
	    gzip->seek_skipped != pos && /* Don't warn for single seek.  */
	    gzip->seek_skipped >= 1000000) {
		warned = TRUE;
		g_warning ("Seeking in gzipped streams is awfully slow.");
	}

	return FALSE;
}

static void
gsf_input_gzip_init (GObject *obj)
{
	GsfInputGZip *gzip = GSF_INPUT_GZIP (obj);

	gzip->source = NULL;
	gzip->raw = FALSE;
	gzip->uncompressed_size = -1;
	gzip->err = NULL;
	gzip->stream.zalloc	= (alloc_func)0;
	gzip->stream.zfree	= (free_func)0;
	gzip->stream.opaque	= (voidpf)0;
	gzip->stream.next_in	= Z_NULL;
	gzip->stream.next_out	= Z_NULL;
	gzip->stream.avail_in	= gzip->stream.avail_out = 0;
	gzip->crc		= crc32 (0L, Z_NULL, 0);
	gzip->buf		= NULL;
	gzip->buf_size		= 0;
	gzip->seek_skipped = 0;
}

static void
gsf_input_gzip_get_property (GObject     *object,
			     guint        property_id,
			     GValue      *value,
			     GParamSpec  *pspec)
{
	GsfInputGZip *gzip = (GsfInputGZip *)object;

	switch (property_id) {
	case PROP_RAW:
		g_value_set_boolean (value, gzip->raw);
		break;
	case PROP_SOURCE:
		g_value_set_object (value, gzip->source);
		break;
	case PROP_UNCOMPRESSED_SIZE:
		g_value_set_int64 (value, gzip->uncompressed_size);
		break;
	default:
		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
		break;
	}
}

static void
gsf_input_gzip_set_source (GsfInputGZip *gzip, GsfInput *source)
{
	if (source)
		g_object_ref (GSF_INPUT (source));
	if (gzip->source)
		g_object_unref (gzip->source);
	gzip->source = source;
}

static void
gsf_input_gzip_set_property (GObject      *object,
			     guint         property_id,
			     GValue const *value,
			     GParamSpec   *pspec)
{
	GsfInputGZip *gzip = (GsfInputGZip *)object;

	switch (property_id) {
	case PROP_RAW:
		gzip->raw = g_value_get_boolean (value);
		break;
	case PROP_SOURCE:
		gsf_input_gzip_set_source (gzip, g_value_get_object (value));
		break;
	case PROP_UNCOMPRESSED_SIZE:
		gzip->uncompressed_size = g_value_get_int64 (value);
		break;
	default:
		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
		break;
	}
}

static GObject*
gsf_input_gzip_constructor (GType                  type,
			    guint                  n_construct_properties,
			    GObjectConstructParam *construct_params)
{
  GsfInputGZip *gzip;

  gzip = (GsfInputGZip *)(parent_class->constructor (type,
						     n_construct_properties,
						     construct_params));

  if (!gzip->source) {
	  g_clear_error (&gzip->err);
	  gzip->err = g_error_new (gsf_input_error_id (), 0,
				   "NULL source");
  } else if (gzip->raw && gzip->uncompressed_size < 0) {
	  g_clear_error (&gzip->err);
	  gzip->err = g_error_new (gsf_input_error_id (), 0,
				   "Uncompressed size not set");
  } else if (init_zip (gzip, &gzip->err) != FALSE) {
	  /* Nothing more.  */
  }

  return (GObject *)gzip;
}

static void
gsf_input_gzip_class_init (GObjectClass *gobject_class)
{
	GsfInputClass *input_class = GSF_INPUT_CLASS (gobject_class);

	gobject_class->constructor  = gsf_input_gzip_constructor;
	gobject_class->finalize     = gsf_input_gzip_finalize;
	gobject_class->set_property = gsf_input_gzip_set_property;
	gobject_class->get_property = gsf_input_gzip_get_property;
	input_class->Dup	    = gsf_input_gzip_dup;
	input_class->Read	    = gsf_input_gzip_read;
	input_class->Seek	    = gsf_input_gzip_seek;

	g_object_class_install_property
		(gobject_class,
		 PROP_RAW,
		 g_param_spec_boolean ("raw", "Raw",
				       "Whether to read compressed data with no header and no trailer.",
				       FALSE,
				       GSF_PARAM_STATIC |
				       G_PARAM_READWRITE |
				       G_PARAM_CONSTRUCT_ONLY));
	g_object_class_install_property
		(gobject_class,
		 PROP_SOURCE,
		 g_param_spec_object ("source", "Source",
				      "Where the compressed data comes from.",
				      GSF_INPUT_TYPE,
				      GSF_PARAM_STATIC |
				      G_PARAM_READWRITE |
				      G_PARAM_CONSTRUCT_ONLY));
	/**
	 * GsfInputGzip:uncompressed_size:
	 *
	 * The size that the data will have after uncompression.
	 * The is mandatory for raw streams and if the uncompressed size is
	 * larger than 4GB.
	 */  
	g_object_class_install_property
		(gobject_class,
		 PROP_UNCOMPRESSED_SIZE,
		 g_param_spec_int64 ("uncompressed-size", "Size after decompression",
				     "The source's uncompressed size",
				     -1, G_MAXINT64, -1,
				     GSF_PARAM_STATIC |
				     G_PARAM_READWRITE |
				     G_PARAM_CONSTRUCT_ONLY));

	parent_class = g_type_class_peek_parent (gobject_class);
}

GSF_CLASS (GsfInputGZip, gsf_input_gzip,
	   gsf_input_gzip_class_init, gsf_input_gzip_init,
	   GSF_INPUT_TYPE)



syntax highlighted by Code2HTML, v. 0.9.1