/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
 * gsf-msole-utils.c: 
 *
 * Copyright (C) 2002-2006 Jody Goldberg (jody@gnome.org)
 * Copyright (C) 2002-2006 Dom Lachowicz (cinamod@hotmail.com)
 * excel_iconv* family of functions (C) 2001 by Vlad Harchev <hvv@hippo.ru>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2.1 of the GNU Lesser General Public
 * License as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */

#include <gsf-config.h>
#include <gsf/gsf-docprop-vector.h>
#include <gsf/gsf-msole-utils.h>
#include <gsf/gsf-input.h>
#include <gsf/gsf-output.h>
#include <gsf/gsf-utils.h>
#include <gsf/gsf-timestamp.h>
#include <gsf/gsf-meta-names.h>
#include <gsf/gsf-doc-meta-data.h>
#include <gsf/gsf-clip-data.h>

#include <locale.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <time.h>
#include <glib/gi18n-lib.h>

#define NO_DEBUG_OLE_PROPS
#ifndef NO_DEBUG_OLE_PROPS
#define d(code)	do { code } while (0)
#else
#define d(code)
#endif

/*
 * The Format Identifier for Summary Information
 * F29F85E0-4FF9-1068-AB91-08002B27B3D9
 */
static guint8 const component_guid [] = {
	0xe0, 0x85, 0x9f, 0xf2, 0xf9, 0x4f, 0x68, 0x10,
	0xab, 0x91, 0x08, 0x00, 0x2b, 0x27, 0xb3, 0xd9
};

/*
 * The Format Identifier for Document Summary Information
 * D5CDD502-2E9C-101B-9397-08002B2CF9AE
 */
static guint8 const document_guid [] = {
	0x02, 0xd5, 0xcd, 0xd5, 0x9c, 0x2e, 0x1b, 0x10,
	0x93, 0x97, 0x08, 0x00, 0x2b, 0x2c, 0xf9, 0xae
};

/*
 * The Format Identifier for User-Defined Properties
 * D5CDD505-2E9C-101B-9397-08002B2CF9AE
 */
static guint8 const user_guid [] = {
	0x05, 0xd5, 0xcd, 0xd5, 0x9c, 0x2e, 0x1b, 0x10,
	0x93, 0x97, 0x08, 0x00, 0x2b, 0x2c, 0xf9, 0xae
};

typedef enum {
	COMMON_PROP,	/* in either summary or docsummary */
	COMPONENT_PROP, /* SummaryInformation properties */
	DOC_PROP,	/* DocumentSummaryInformation properties */
	USER_PROP
} GsfMSOleMetaDataType;

typedef enum {
	VT_EMPTY	   = 0,
	VT_NULL		   = 1,
	VT_I2		   = 2,
	VT_I4		   = 3,
	VT_R4		   = 4,
	VT_R8		   = 5,
	VT_CY		   = 6,
	VT_DATE		   = 7,
	VT_BSTR		   = 8,
	VT_DISPATCH	   = 9,
	VT_ERROR	   = 10,
	VT_BOOL		   = 11,
	VT_VARIANT	   = 12,
	VT_UNKNOWN	   = 13,
	VT_DECIMAL	   = 14,

	VT_I1		   = 16,
	VT_UI1		   = 17,
	VT_UI2		   = 18,
	VT_UI4		   = 19,
	VT_I8		   = 20,
	VT_UI8		   = 21,
	VT_INT		   = 22,
	VT_UINT		   = 23,
	VT_VOID		   = 24,
	VT_HRESULT	   = 25,
	VT_PTR		   = 26,
	VT_SAFEARRAY	   = 27,
	VT_CARRAY	   = 28,
	VT_USERDEFINED	   = 29,
	VT_LPSTR	   = 30,
	VT_LPWSTR	   = 31,

	VT_FILETIME	   = 64,
	VT_BLOB		   = 65,
	VT_STREAM	   = 66,
	VT_STORAGE	   = 67,
	VT_STREAMED_OBJECT = 68,
	VT_STORED_OBJECT   = 69,
	VT_BLOB_OBJECT	   = 70,
	VT_CF		   = 71,
	VT_CLSID	   = 72,
	VT_VECTOR	   = 0x1000
} GsfMSOleVariantType;

typedef struct {
	char const		*ms_name;
	GsfMSOleMetaDataType	 section;
	char const		*gsf_name;
	guint32			 id;
	GsfMSOleVariantType	 prefered_type;
} GsfMSOleMetaDataPropMap;

typedef struct {
	guint32		id;
	gsf_off_t	offset;
} GsfMSOleMetaDataProp;

typedef struct {
	GsfMSOleMetaDataType type;
	gsf_off_t   offset;
	guint32	    size, num_props;
	GIConv	    iconv_handle;
	unsigned    char_size;
	GHashTable *dict;
} GsfMSOleMetaDataSection;

static GsfMSOleMetaDataPropMap const builtin_props [] = {
	{ "Dictionary",		  COMMON_PROP,	GSF_META_NAME_DICTIONARY,            0,	   0, /* magic */},
	{ "CodePage",		  COMMON_PROP,	GSF_META_NAME_CODEPAGE,              1,	   VT_I2 },
	{ "LOCALE_SYSTEM_DEFAULT",COMMON_PROP,	GSF_META_NAME_LOCALE_SYSTEM_DEFAULT, 0x80000000, VT_UI4},
	{ "CASE_SENSITIVE",	  COMMON_PROP,	GSF_META_NAME_CASE_SENSITIVE,        0x80000003, VT_UI4},
	{ "Category",		DOC_PROP,	GSF_META_NAME_CATEGORY,             2,	VT_LPSTR },
	{ "PresentationFormat",	DOC_PROP,	GSF_META_NAME_PRESENTATION_FORMAT,  3,	VT_LPSTR },
	{ "NumBytes",		DOC_PROP,	GSF_META_NAME_BYTE_COUNT,           4,	VT_I4 },
	{ "NumLines",		DOC_PROP,	GSF_META_NAME_LINE_COUNT,           5,	VT_I4 },
	{ "NumParagraphs",	DOC_PROP,	GSF_META_NAME_PARAGRAPH_COUNT,      6,	VT_I4 },
	{ "NumSlides",		DOC_PROP,	GSF_META_NAME_SLIDE_COUNT,          7,	VT_I4 },
	{ "NumNotes",		DOC_PROP,	GSF_META_NAME_NOTE_COUNT,           8,	VT_I4 },
	{ "NumHiddenSlides",	DOC_PROP,	GSF_META_NAME_HIDDEN_SLIDE_COUNT,   9,	VT_I4 },
	{ "NumMMClips",		DOC_PROP,	GSF_META_NAME_MM_CLIP_COUNT,       10,	VT_I4 },
	{ "Scale",		DOC_PROP,	GSF_META_NAME_SCALE,               11,	VT_BOOL },
	{ "HeadingPairs",	DOC_PROP,	GSF_META_NAME_HEADING_PAIRS,       12,	VT_VECTOR | VT_VARIANT },
	{ "DocumentParts",	DOC_PROP,	GSF_META_NAME_DOCUMENT_PARTS,      13,	VT_VECTOR | VT_LPSTR },
	{ "Manager",		DOC_PROP,	GSF_META_NAME_MANAGER,             14,	VT_LPSTR },
	{ "Company",		DOC_PROP,	GSF_META_NAME_COMPANY,             15,	VT_LPSTR },
	{ "LinksDirty",		DOC_PROP,	GSF_META_NAME_LINKS_DIRTY,         16,	VT_BOOL },
	{ "DocSumInfo_17",      DOC_PROP,	GSF_META_NAME_MSOLE_UNKNOWN_17,    17,	VT_UNKNOWN },
	{ "DocSumInfo_18",      DOC_PROP,	GSF_META_NAME_MSOLE_UNKNOWN_18,    18,	VT_UNKNOWN },
	{ "DocSumInfo_19",      DOC_PROP,	GSF_META_NAME_MSOLE_UNKNOWN_19,    19,	VT_BOOL },
	{ "DocSumInfo_20",      DOC_PROP,	GSF_META_NAME_MSOLE_UNKNOWN_20,    20,	VT_UNKNOWN },
	{ "DocSumInfo_21",      DOC_PROP,	GSF_META_NAME_MSOLE_UNKNOWN_21,    21,	VT_UNKNOWN },
	{ "DocSumInfo_22",      DOC_PROP,	GSF_META_NAME_MSOLE_UNKNOWN_22,    22,	VT_BOOL },
	{ "DocSumInfo_23",      DOC_PROP,	GSF_META_NAME_MSOLE_UNKNOWN_23,    23,	VT_I4 },
	{ "Title",		COMPONENT_PROP, GSF_META_NAME_TITLE,		    2,	VT_LPSTR },
	{ "Subject",		COMPONENT_PROP, GSF_META_NAME_SUBJECT,		    3,	VT_LPSTR },
	{ "Author",		COMPONENT_PROP, GSF_META_NAME_CREATOR,		    4,	VT_LPSTR },
	{ "Keywords",		COMPONENT_PROP, GSF_META_NAME_KEYWORDS,		    5,	VT_LPSTR },
	{ "Comments",		COMPONENT_PROP, GSF_META_NAME_DESCRIPTION,	    6,	VT_LPSTR },
	{ "Template",		COMPONENT_PROP, GSF_META_NAME_TEMPLATE,		    7,	VT_LPSTR },
	{ "LastSavedBy",	COMPONENT_PROP, GSF_META_NAME_LAST_SAVED_BY,	    8,	VT_LPSTR },
	{ "RevisionNumber",	COMPONENT_PROP, GSF_META_NAME_REVISION_COUNT,	    9,	VT_LPSTR },
	{ "TotalEditingTime",	COMPONENT_PROP, GSF_META_NAME_EDITING_DURATION,	   10,	VT_FILETIME },
	{ "LastPrinted",	COMPONENT_PROP, GSF_META_NAME_LAST_PRINTED,	   11,	VT_FILETIME },
	{ "CreateTime",		COMPONENT_PROP, GSF_META_NAME_DATE_CREATED,	   12,	VT_FILETIME },
	{ "LastSavedTime",	COMPONENT_PROP, GSF_META_NAME_DATE_MODIFIED,	   13,	VT_FILETIME },
	{ "NumPages",		COMPONENT_PROP, GSF_META_NAME_PAGE_COUNT,	   14,	VT_I4 },
	{ "NumWords",		COMPONENT_PROP, GSF_META_NAME_WORD_COUNT,	   15,	VT_I4 },
	{ "NumCharacters",	COMPONENT_PROP, GSF_META_NAME_CHARACTER_COUNT,	   16,	VT_I4 },
	{ "Thumbnail",		COMPONENT_PROP, GSF_META_NAME_THUMBNAIL,	   17,	VT_CF },
	{ "AppName",		COMPONENT_PROP, GSF_META_NAME_GENERATOR,	   18,	VT_LPSTR },
	{ "Security",		COMPONENT_PROP, GSF_META_NAME_SECURITY,		   19,	VT_I4 }
};

static GHashTable *name_to_prop_hash = NULL;

static char const *
msole_vt_name (GsfMSOleVariantType type)
{
	static char const *names[] = {
		"VT_EMPTY",	"VT_NULL",	"VT_I2",	"VT_I4",	"VT_R4",
		"VT_R8",	"VT_CY",	"VT_DATE",	"VT_BSTR",	"VT_DISPATCH",
		"VT_ERROR",	"VT_BOOL",	"VT_VARIANT",	"VT_UNKNOWN",	"VT_DECIMAL",
		NULL,		"VT_I1",	"VT_UI1",	"VT_UI2",	"VT_UI4",
		"VT_I8",	"VT_UI8",	"VT_INT",	"VT_UINT",	"VT_VOID",
		"VT_HRESULT",	"VT_PTR",	"VT_SAFEARRAY",	"VT_CARRAY",	"VT_USERDEFINED",
		"VT_LPSTR",	"VT_LPWSTR",
	};	
	static char const *names2[] = {
		"VT_FILETIME",
		"VT_BLOB",	"VT_STREAM",	"VT_STORAGE",	"VT_STREAMED_OBJECT",
		"VT_STORED_OBJECT", "VT_BLOB_OBJECT", "VT_CF",	"VT_CLSID"
	};

	type &= ~VT_VECTOR;
	if (type <= VT_LPWSTR)
		return names[type];
	g_return_val_if_fail (type >= VT_FILETIME, "_UNKNOWN_");
	g_return_val_if_fail (type <= VT_CLSID, "_UNKNOWN_");
	return names2[type-VT_FILETIME];
}

static char const *
msole_prop_id_to_gsf (GsfMSOleMetaDataSection *section, guint32 id, gboolean *linked)
{
	char const *res = NULL;
	GsfMSOleMetaDataPropMap const *map = NULL;
	unsigned i = 0;

	*linked = FALSE;
	if (section->dict != NULL) {
		if (id & 0x1000000) {
			*linked = TRUE;
			id &= ~0x1000000;
			d (g_print ("LINKED "););
		}

		res = g_hash_table_lookup (section->dict, GINT_TO_POINTER (id));

		if (res != NULL) {
			d (g_print (res););
			return res;
		}
	}

	map = builtin_props ;
	i = G_N_ELEMENTS (builtin_props);
	while (i-- > 0)
		if (map[i].id == id &&
		    (map[i].section == COMMON_PROP || map[i].section == section->type)) {
			d (g_print (map[i].gsf_name););
			return map[i].gsf_name;
		}

	d (g_print ("_UNKNOWN_(0x%x %d)", id, id););

	return NULL;
}

static GsfMSOleMetaDataPropMap const *
msole_gsf_name_to_prop (char const *name)
{
	if (NULL == name_to_prop_hash) {
		int i;
		name_to_prop_hash = g_hash_table_new (g_str_hash, g_str_equal);
		for (i = G_N_ELEMENTS (builtin_props); i-- > 0; )
			g_hash_table_replace (name_to_prop_hash,
				(gpointer) builtin_props[i].gsf_name,
				(gpointer) (builtin_props+i));
	}

	return g_hash_table_lookup (name_to_prop_hash, (gpointer)name);
}

static void
set_error_missing_data (GError **error, const char *property_name, gsize size_needed, gsize size_gotten)
{
	g_set_error (error,
		     GSF_ERROR,
		     GSF_ERROR_INVALID_DATA,
		     _("Missing data when reading the %s property; got %" G_GSIZE_FORMAT "bytes, "
		       "but %" G_GSIZE_FORMAT " bytes at least are needed."),
		     property_name,
		     size_needed,
		     size_gotten);
}

/* Can return errors from gsf_blob_new() and GSF_ERROR_INVALID_DATA */
static gboolean
parse_vt_cf (GValue *res, guint8 const **data, guint8 const *data_end, GError **error)
{
	/* clipboard size		uint32		sizeof (clipboard format tag) + sizeof (clipboard data)
	 * clipboard format tag		int32		see below
	 * clipboard data		byte[]		see below
	 *
	 * Clipboard format tag:
	 * -1 - Windows clipboard format
	 * -2 - Macintosh clipboard format
	 * -3 - GUID that contains a format identifier (FMTID)
	 * >0 - custom clipboard format name plus data (see msdn site below)
	 *  0 - No data
	 *
	 * References:
	 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/stg/stg/propvariant.asp
	 * http://jakarta.apache.org/poi/hpsf/thumbnails.html
	 * http://linux.com.hk/docs/poi/org/apache/poi/hpsf/Thumbnail.html
	 * http://sparks.discreet.com/knowledgebase/public/solutions/ExtractThumbnailImg.htm
	 */
	guint32 clip_size, clip_data_size;
	gint32 clip_format;
	GsfBlob *blob;
	GsfClipData *clip_data;

	/* Clipboard size field */

	if (data_end < *data + 4) {
		set_error_missing_data (error, "VT_CF", 4, data_end - *data);
		return FALSE;
	}

	clip_size = GSF_LE_GET_GUINT32 (*data);

	if (clip_size < 4) {	/* must emcompass int32 format plus data size */
		g_set_error (error,
			     GSF_ERROR,
			     GSF_ERROR_INVALID_DATA,
			     _("Corrupt data in the VT_CF property; clipboard data length must be at least 4 bytes, "
			       "but the data says it only has %" G_GSIZE_FORMAT " bytes available."),
			     (gsize) clip_size);
		return FALSE;
	}

	*data += 4;

	/* Check clipboard format plus data size */

	if (data_end < *data + clip_size) {
		set_error_missing_data (error, "VT_CF", clip_size, data_end - *data);
		return FALSE;
	}

	clip_format = GSF_LE_GET_GINT32 (*data);
	*data += 4;

	switch (clip_format) {
	case GSF_CLIP_FORMAT_WINDOWS_CLIPBOARD:
	case GSF_CLIP_FORMAT_MACINTOSH_CLIPBOARD:
	case GSF_CLIP_FORMAT_GUID:
	case GSF_CLIP_FORMAT_NO_DATA:
		/* everything is ok */
		break;

	default:
		if (clip_format > 0)
			clip_format = GSF_CLIP_FORMAT_CLIPBOARD_FORMAT_NAME;
		else
			clip_format = GSF_CLIP_FORMAT_UNKNOWN;

		break;
	}

	clip_data_size = clip_size - 4;

	blob = gsf_blob_new (clip_data_size, *data, error);

	*data += clip_data_size;

	if (!blob)
		return FALSE;

	clip_data = gsf_clip_data_new (clip_format, blob);
	g_object_unref (blob);

	g_value_init (res, GSF_TYPE_CLIP_DATA);
	g_value_set_object (res, clip_data);
	g_object_unref (clip_data);

	return TRUE;
}

static GValue *
msole_prop_parse (GsfMSOleMetaDataSection *section,
		  guint32 type, guint8 const **data, guint8 const *data_end)
{
	GValue *res;
	char *str;
	guint32 len;
	gboolean const is_vector = type & VT_VECTOR;
	GError *error;

	g_return_val_if_fail (!(type & (unsigned)(~0x1fff)), NULL); /* not valid in a prop set */

	type &= 0xfff;

	if (is_vector) {
		/*
		 *  A vector is basically an array.  If the type associated with
		 *  it is a variant, then each element can have a different
		 *  variant type.  Otherwise, each element has the same variant
		 *  type associated with the vector.
		 */
		unsigned i, n;
		GsfDocPropVector *vector;

		g_return_val_if_fail (*data + 4 <= data_end, NULL);

		n = GSF_LE_GET_GUINT32 (*data);
		*data += 4;

		d (g_print (" array with %d elem\n", n);
		   gsf_mem_dump (*data, (unsigned)(data_end - *data)););
		
		vector = gsf_docprop_vector_new ();

		for (i = 0 ; i < n ; i++) {
			GValue *v;
			d (g_print ("\t[%d] ", i););
			v = msole_prop_parse (section, type, data, data_end);
			if (v) {
				if (G_IS_VALUE (v)) {
					gsf_docprop_vector_append (vector, v);
					g_value_unset (v);
				}
				g_free (v);
			}
		}

		res = g_new0 (GValue, 1);
		g_value_init (res, GSF_DOCPROP_VECTOR_TYPE);
		g_value_set_object (res, vector);
		g_object_unref (vector);
		return res;
	}

	res = g_new0 (GValue, 1);
	d (g_print ("%s\n", msole_vt_name (type)););
	switch (type) {
	case VT_EMPTY :
		/*
		 * A property with a type indicator of VT_EMPTY has no data
		 * associated with it; that is, the size of the value is zero.
		 */
		/* value::unset == empty */
		break;

	case VT_NULL :
		/* This is like a pointer to NULL */
		/* value::unset == null too :-) do we need to distinguish ? */
		break;

	case VT_I2 :
		/* 2-byte signed integer */
		g_return_val_if_fail (*data + 2 <= data_end, NULL);
		g_value_init (res, G_TYPE_INT);
		g_value_set_int	(res, GSF_LE_GET_GINT16 (*data));
		*data += 2;
		break;

	case VT_I4 :
		/* 4-byte signed integer */
		g_return_val_if_fail (*data + 4 <= data_end, NULL);
		g_value_init (res, G_TYPE_INT);
		g_value_set_int	(res, GSF_LE_GET_GINT32 (*data));
		*data += 4;
		break;

	case VT_R4 :
		/* 32-bit IEEE floating-point value */
		g_return_val_if_fail (*data + 4 <= data_end, NULL);
		g_value_init (res, G_TYPE_FLOAT);
		g_value_set_float (res, GSF_LE_GET_FLOAT (*data));
		*data += 4;
		break;

	case VT_R8 :
		/* 64-bit IEEE floating-point value */
		g_return_val_if_fail (*data + 8 <= data_end, NULL);
		g_value_init (res, G_TYPE_DOUBLE);
		g_value_set_double (res, GSF_LE_GET_DOUBLE (*data));
		*data += 8;
		break;

	case VT_CY :
		/* 8-byte two's complement integer (scaled by 10,000) */
		/* CHEAT : just store as an int64 for now */
		g_return_val_if_fail (*data + 8 <= data_end, NULL);
		g_value_init (res, G_TYPE_INT64);
		g_value_set_int64 (res, GSF_LE_GET_GINT64 (*data));
		break;

	case VT_DATE :
		/* 
		 * 64-bit floating-point number representing the number of days
		 * (not seconds) since December 31, 1899.
		 */
/* FIXME FIXME FIXME  TODO */
		break;

	case VT_BSTR :
		/*
		 * Pointer to null-terminated Unicode string; the string is pre-
		 * ceeded by a DWORD representing the byte count of the number
		 * of bytes in the string (including the  terminating null).
		 */
/* FIXME FIXME FIXME  TODO */
		break;

	case VT_DISPATCH :
/* FIXME FIXME FIXME  TODO */
		break;

	case VT_BOOL :
		/* A boolean (WORD) value containg 0 (false) or -1 (true). */
		g_return_val_if_fail (*data + 1 <= data_end, NULL);
		g_value_init (res, G_TYPE_BOOLEAN);
		g_value_set_boolean (res, **data ? TRUE : FALSE);
		*data += 1;
		break;

	case VT_VARIANT :	 d (g_print ("\tcontaining a "););
		/*
		 * A type indicator (a DWORD) followed by the corresponding
		 *  value.  VT_VARIANT is only used in conjunction with
		 *  VT_VECTOR.
		 */
		g_free (res);
		type = GSF_LE_GET_GUINT32 (*data);
		*data += 4;
		return msole_prop_parse (section, type, data, data_end);

	case VT_UI1 :
		/* 1-byte unsigned integer */
		g_return_val_if_fail (*data + 1 <= data_end, NULL);
		g_value_init (res, G_TYPE_UCHAR);
		g_value_set_uchar (res, (guchar)(**data));
		*data += 1;
		break;

	case VT_UI2 :
		/* 2-byte unsigned integer */
		g_return_val_if_fail (*data + 2 <= data_end, NULL);
		g_value_init (res, G_TYPE_UINT);
		g_value_set_uint (res, GSF_LE_GET_GUINT16 (*data));
		*data += 2;
		break;

	case VT_UI4 :
		/* 4-type unsigned integer */
		g_return_val_if_fail (*data + 4 <= data_end, NULL);
		g_value_init (res, G_TYPE_UINT);
		g_value_set_uint (res, GSF_LE_GET_GUINT32 (*data));
		*data += 4;
		break;

	case VT_I8 :		 d (g_print ("VT_I8\n"););
		/* 8-byte signed integer */
		g_return_val_if_fail (*data + 8 <= data_end, NULL);
		g_value_init (res, G_TYPE_INT64);
		g_value_set_int64 (res, GSF_LE_GET_GINT64 (*data));
		*data += 8;
		break;

	case VT_UI8 :
		/* 8-byte unsigned integer */
		g_return_val_if_fail (*data + 8 <= data_end, NULL);
		g_value_init (res, G_TYPE_UINT64);
		g_value_set_uint64 (res, GSF_LE_GET_GUINT64 (*data));
		*data += 8;
		break;

	case VT_LPSTR :
		/* 
		 * This is the representation of many strings.  It is stored in
		 * the same representation as VT_BSTR.  Note that the serialized
		 * representation of VP_LPSTR has a preceding byte count, whereas
		 * the in-memory representation does not.
		 */
		/* be anal and safe */
		g_return_val_if_fail (*data + 4 <= data_end, NULL);

		len = GSF_LE_GET_GUINT32 (*data);

		g_return_val_if_fail (len < 0x10000, NULL);
		g_return_val_if_fail (*data + 4 + len*section->char_size <= data_end, NULL);

		error = NULL;
		d (gsf_mem_dump (*data + 4, len * section->char_size););
		str = g_convert_with_iconv (*data + 4,
			len * section->char_size,
			section->iconv_handle, NULL, NULL, &error);

		g_value_init (res, G_TYPE_STRING);
		if (NULL != str) {
			g_value_set_string (res, str);
			g_free (str);
		} else if (NULL != error) {
			g_warning ("error: %s", error->message);
			g_error_free (error);
		} else {
			g_warning ("unknown error converting string property, using blank");
		}
		*data += 4 + len * section->char_size;
		break;

	case VT_LPWSTR :
		/*
		 * A counted and null-terminated Unicode string; a DWORD character
		 * count (where the count includes the terminating null) followed
		 * by that many Unicode (16-bit) characters.  Note that the count
		 * is character count, not byte count.
		 */
		/* be anal and safe */
		g_return_val_if_fail (*data + 4 <= data_end, NULL);

		len = GSF_LE_GET_GUINT32 (*data);

		g_return_val_if_fail (len < 0x10000, NULL);
		g_return_val_if_fail (*data + 4 + len * 2 <= data_end, NULL);

		error = NULL;
		d (gsf_mem_dump (*data + 4, len*2););
		str = g_convert (*data + 4, len*2,
				 "UTF-8", "UTF-16LE", NULL, NULL, &error);

		g_value_init (res, G_TYPE_STRING);
		if (NULL != str) {
			g_value_set_string (res, str);
			g_free (str);
		} else if (NULL != error) {
			g_warning ("error: %s", error->message);
			g_error_free (error);
		} else {
			g_warning ("unknown error converting string property, using blank");
		}
		*data += 4 + len*2;
		break;

	case VT_FILETIME :
		/* 64-bit FILETIME structure, as defined by Win32. */
		g_return_val_if_fail (*data + 8 <= data_end, NULL);
	{
		/* ft * 100ns since Jan 1 1601 */
		guint64 ft = GSF_LE_GET_GUINT64 (*data);
		GsfTimestamp ts;

		ft /= 10000000; /* convert to seconds */
		ft -= G_GINT64_CONSTANT (11644473600); /* move to Jan 1 1970 */
		ts.timet = (time_t)ft;
		g_value_init (res, GSF_TIMESTAMP_TYPE);
		gsf_value_set_timestamp (res, &ts);
		*data += 8;
		break;
	}
	case VT_BLOB :
		/*
		 * A DWORD count of bytes, followed by that many bytes of data.
		 * The byte count does not include the four bytes for the length
		 * of the count itself:  An empty blob would have a count of
		 * zero, followed by zero bytes.  Thus the serialized represen-
		 * tation of a VT_BLOB is similar to that of a VT_BSTR but does
		 * not guarantee a null byte at the end of the data.
		 */
/* FIXME FIXME FIXME  TODO */
		g_free (res);
		res = NULL;
		break;

	case VT_STREAM :
		/*
		 * Indicates the value is stored in a stream that is sibling to
		 * the CONTENTS stream.  Following this type indicator is data
		 * in the format of a serialized VT_LPSTR, which names the stream
		 * containing the data.
		 */
/* FIXME FIXME FIXME  TODO */
		g_free (res);
		res = NULL;
		break;

	case VT_STORAGE :
		/*
		 * Indicates the value is stored in an IStorage that is sibling
		 * to the CONTENTS stream.  Following this type indicator is data
		 * in the format of a serialized VT_LPSTR, which names the
		 * IStorage containing the data.
		 */
/* FIXME FIXME FIXME  TODO */
		g_free (res);
		res = NULL;
		break;

	case VT_STREAMED_OBJECT:
		/*
		 * Same as VT_STREAM, but indicates that the stream contains a
		 * serialized object, which is a class ID followed by initiali-
		 * zation data for the class.
		 */
/* FIXME FIXME FIXME  TODO */
		g_free (res);
		res = NULL;
		break;

	case VT_STORED_OBJECT :
		/*
		 * Same as VT_STORAGE, but indicates that the designated IStorage
		 * contains a loadable object.
		 */
/* FIXME FIXME FIXME  TODO */
		g_free (res);
		res = NULL;
		break;

	case VT_BLOB_OBJECT :
		/*
		 * Contains a serialized object in the same representation as
		 * would appear in a VT_STREAMED_OBJECT.  That is, following the
		 * VT_BLOB_OBJECT tag is a DWORD byte count of the remaining data
		 * (where the byte count does not include the size of itself)
		 * which is in the format of a class ID followed by initialization
		 * data for that class
		 */
/* FIXME FIXME FIXME  TODO */

		g_free (res);
		res = NULL;
		break;

	case VT_CF :
		error = NULL;
		if (!parse_vt_cf (res, data, data_end, &error)) {
			/* suck, we can't propagate the error upwards */
			g_warning ("error: %s", error->message);
			g_error_free (error);
			g_free (res);
			res = NULL;
		}
		break;

	case VT_CLSID :
		/* A class ID (or other GUID) */
		*data += 16;
		g_free (res);
		res = NULL;
		break;

	case VT_ERROR :
		/* A DWORD containing a status code. */
	case VT_UNKNOWN :
	case VT_DECIMAL :
	case VT_I1 :
		/* 1-byte signed integer */
	case VT_INT :
	case VT_UINT :
	case VT_VOID :
	case VT_HRESULT :
	case VT_PTR :
	case VT_SAFEARRAY :
	case VT_CARRAY :
	case VT_USERDEFINED :
		g_warning ("type %s (0x%x) is not permitted in property sets",
			   msole_vt_name (type), type);
		g_free (res);
		res = NULL;
		break;

	default :
		g_warning ("Unknown property type %d (0x%x)", type, type);
		g_free (res);
		res = NULL;
	}

	if (res != NULL && G_IS_VALUE (res)) {
		d ( {
			char *val = g_strdup_value_contents (res);
			g_print ("%s\n", val);
			g_free (val);
		});
	} else {
		d ({
			char const *type_name = msole_vt_name (type);
			if (type_name) {
				g_print ("A '%s' property could not be parsed\n", type_name);
			} else {
				g_print ("A %d property could not be parsed\n", type);
			}
		});
	}
	return res;
}

static gboolean
msole_prop_read (GsfInput *in,
		 GsfMSOleMetaDataSection *section,
		 GsfMSOleMetaDataProp    *props,
		 unsigned		  i,
		 GsfDocMetaData		 *accum)
{
	guint32 type;
	guint8 const *data;
	/* FIXME : why size-4 ? I must be missing something */
	gsf_off_t size = ((i+1) >= section->num_props)
		? section->size-4 : props[i+1].offset;
	char   *name;
	GValue *val;

	g_return_val_if_fail (i < section->num_props, FALSE);
	g_return_val_if_fail (size >= props[i].offset + 4, FALSE);

	size -= props[i].offset; /* includes the type id */
	if (gsf_input_seek (in, section->offset+props[i].offset, G_SEEK_SET) ||
	    NULL == (data = gsf_input_read (in, size, NULL))) {
		g_warning ("failed to read prop #%d", i);
		return FALSE;
	}

	type = GSF_LE_GET_GUINT32 (data);
	data += 4;

	/* dictionary is magic */
	if (props[i].id == 0) {
		guint32 len, id, i, n;
		gsize gslen;
		char *name;
		guint8 const *start = data;

		g_return_val_if_fail (section->dict == NULL, FALSE);

		section->dict = g_hash_table_new_full (
			g_direct_hash, g_direct_equal,
			NULL, g_free);

		d ({ g_print ("Dictionary = \n"); gsf_mem_dump (data-4, size); });
		n = type;
		for (i = 0 ; i < n ; i++) {
			id = GSF_LE_GET_GUINT32 (data);
			len = GSF_LE_GET_GUINT32 (data + 4);

			g_return_val_if_fail (len < 0x10000, FALSE);

			gslen = 0;
			name = g_convert_with_iconv (data + 8,
				len * section->char_size,
				section->iconv_handle, &gslen, NULL, NULL);
			len = (guint32)gslen;
			data += 8 + len;

			d (g_print ("\t%u == %s\n", id, name););
			g_hash_table_replace (section->dict,
				GINT_TO_POINTER (id), name);

			/* MS documentation blows goats !
			 * The docs claim there are padding bytes in the dictionary.
			 * Their examples show padding bytes.
			 * In reality non-unicode strings do not see to have padding.
			 */
			if (section->char_size != 1 && (data - start) % 4)
				data += 4 - ((data - start) % 4);
		}
	} else {
		gboolean linked;
		d (g_print ("===> %u) ", i);
		   gsf_mem_dump (data-4, size););

		name = g_strdup (msole_prop_id_to_gsf (section, props[i].id, &linked));
		d (g_print (" @ %x %x = ", (unsigned)props[i].offset, (unsigned)size););
		val = msole_prop_parse (section, type, &data, data + size);

		if (NULL != name && NULL != val) {
			if (linked) {
				GsfDocProp *prop = gsf_doc_meta_data_lookup (accum, name);
				if (NULL == prop) {
					g_warning ("linking property '%s' before it\'s value is specified",
						   (name ? name : "<null>"));
				} else if (!G_VALUE_HOLDS_STRING (val)) {
					g_warning ("linking property '%s' before it\'s value is specified",
						   (name ? name : "<null>"));
				} else
					gsf_doc_prop_set_link (prop,
						g_value_dup_string (val));
			} else {
				gsf_doc_meta_data_insert (accum, name, val);
				val = NULL;
				name = NULL;
			}
		}

		if (NULL != val) {
			if (G_IS_VALUE (val))
				g_value_unset (val);
			g_free (val);
		}
		g_free (name);
	}

	return TRUE;
}

static int
msole_prop_cmp (gconstpointer a, gconstpointer b)
{
	GsfMSOleMetaDataProp const *prop_a = a;
	GsfMSOleMetaDataProp const *prop_b = b;

	if (prop_a->offset < prop_b->offset)
		return -1;
	else if (prop_a->offset > prop_b->offset)
		return +1;
	else
		return 0;
}

/**
 * gsf_msole_metadata_read :
 * @in    : #GsfInput
 * @accum : #GsfDocMetaData
 *
 * Read a stream formated as a set of MS OLE properties from @in and store the
 * results in @accum.
 *
 * Returns: GError which the caller must free on error.
 **/
GError *
gsf_msole_metadata_read	(GsfInput *in, GsfDocMetaData *accum)
{
	guint8 const *data;
	guint16 version;
	guint32 os, num_sections;
	unsigned i, j;
	GsfMSOleMetaDataSection *sections;
	GsfMSOleMetaDataProp	*props;
	GsfDocProp		*prop;
	
	/* http://bugzilla.gnome.org/show_bug.cgi?id=352055 
	 * psiwin generates files with empty property sections */
	if (gsf_input_size (in) <= 0)
		return NULL;

	data = gsf_input_read (in, 28, NULL);
	if (NULL == data)
		return g_error_new (gsf_input_error_id (), 0,
			"Unable to read MS property stream header");

	d ({g_print ("===================================\n"
		   "header class id ==\n");
	   gsf_mem_dump (data, 28);});
	/*
	 * Validate the Property Set Header.
	 * Format (bytes) :
	 *   00 - 01	Byte order		0xfffe
	 *   02 - 03	Format			0
	 *   04 - 05	OS Version		high word is the OS
	 *   06 - 07				low  word is the OS version
	 *					  0 = win16
	 *					  1 = mac
	 *					  2 = win32
	 *   08 - 23	Class Identifier	Usually Format ID
	 *   24 - 27	Section count		Should be at least 1
	 */
	os	     = GSF_LE_GET_GUINT16 (data + 6);
	version	     = GSF_LE_GET_GUINT16 (data + 2);
	num_sections = GSF_LE_GET_GUINT32 (data + 24);
	if (GSF_LE_GET_GUINT16 (data + 0) != 0xfffe
	    || (version != 0 && version != 1)
	    || os > 2
	    || num_sections > 100) /* arbitrary sanity check */
		return g_error_new (gsf_input_error_id (), 0,
			"Invalid MS property stream header");

	/* extract the section info */
	/*
	 * The Format ID/Offset list follows.
	 * Format:
	 *   00 - 16	Section Name		Format ID
	 *   16 - 19	Section Offset		The offset is the number of
	 *					bytes from the start of the
	 *					whole stream to where the
	 *					section begins.
	 */
	sections = (GsfMSOleMetaDataSection *)g_alloca (sizeof (GsfMSOleMetaDataSection)* num_sections);
	for (i = 0 ; i < num_sections ; i++) {
		data = gsf_input_read (in, 20, NULL);
		if (NULL == data)
			return g_error_new (gsf_input_error_id (), 0,
				"Unable to read MS property stream header");
		if (!memcmp (data, component_guid, sizeof (component_guid)))
			sections [i].type = COMPONENT_PROP;
		else if (!memcmp (data, document_guid, sizeof (document_guid)))
			sections [i].type = DOC_PROP;
		else if (!memcmp (data, user_guid, sizeof (user_guid)))
			sections [i].type = USER_PROP;
		else {
			sections [i].type = USER_PROP;
			g_warning ("Unknown property section type, treating it as USER");
			gsf_mem_dump (data, 16);
		}

		sections [i].offset = GSF_LE_GET_GUINT32 (data + 16);
	}

	/*
	 * A section is the third part of the property set stream.
	 * Format (bytes) :
	 *   00 - 03	Section size	A byte count for the section (which is inclusive
	 *				of the byte count itself and should always be a
	 *				multiple of 4);
	 *   04 - 07	Property count	A count of the number of properties
	 *   08 - xx   			An array of 32-bit Property ID/Offset pairs
	 *   yy - zz			An array of Property Type indicators/Value pairs
	 */
	for (i = 0 ; i < num_sections ; i++) {
		if (gsf_input_seek (in, sections[i].offset, G_SEEK_SET) ||
		    NULL == (data = gsf_input_read (in, 8, NULL)))
			return g_error_new (gsf_input_error_id (), 0,
				"Invalid MS property section");

		sections[i].iconv_handle = (GIConv)-1;
		sections[i].char_size    = 1;
		sections[i].dict      = NULL;
		sections[i].size      = GSF_LE_GET_GUINT32 (data); /* includes header */
		sections[i].num_props = GSF_LE_GET_GUINT32 (data + 4);

		d (g_print ("=============================================\n"
			   "===> section #%d : type %d at offset 0x%x, size 0x%x, numprops = %u\n",
			   i, (int)sections [i].type,
			   (guint32)sections [i].offset,
			   sections[i].size,
			   sections[i].num_props););

		if (sections[i].num_props <= 0)
			continue;

		if (sections[i].num_props > gsf_input_remaining(in) / 8)
			return g_error_new (gsf_input_error_id (), 0,
				"Invalid MS property stream header or file truncated");

		/*
		 * Get and save all the Property ID/Offset pairs.
		 * Format (bytes) :
		 *   00 - 03	id	Property ID
		 *   04 - 07	offset	The distance from the start of the section to the
		 *			start of the Property Type/Value pair.
		 */
		d (g_print ("Offsets\n"););
		props = g_new (GsfMSOleMetaDataProp, sections[i].num_props);
		for (j = 0; j < sections[i].num_props; j++) {
			if (NULL == (data = gsf_input_read (in, 8, NULL))) {
				g_free (props);
				return g_error_new (gsf_input_error_id (), 0,
					"Invalid MS property section");
			}

			props [j].id = GSF_LE_GET_GUINT32 (data);
			props [j].offset  = GSF_LE_GET_GUINT32 (data + 4);
			d (g_print ("%d) ID=%d, offset=0x%x\n", j,
				    props [j].id, (unsigned)props [j].offset););
		}

		/* order prop info by offset to facilitate bounds checking */
		qsort (props, sections[i].num_props,
		       sizeof (GsfMSOleMetaDataProp),
		       msole_prop_cmp);

		/*
		 * Find and process the code page.
		 * Property ID 1 is reserved as an indicator of the code page.
		 */
		sections[i].iconv_handle = (GIConv)-1;
		sections[i].char_size = 1;
		for (j = 0; j < sections[i].num_props; j++) /* first codepage */
			if (props[j].id == 1) {
				msole_prop_read (in, sections+i, props, j, accum);
				if (NULL != (prop = gsf_doc_meta_data_lookup (accum, GSF_META_NAME_CODEPAGE))) {
					GValue const *val = gsf_doc_prop_get_val (prop);
					if (NULL != val && G_VALUE_HOLDS_INT (val)) {
						int codepage = g_value_get_int (val);
						sections[i].iconv_handle =
							gsf_msole_iconv_open_for_import (codepage);
						if (codepage == 1200 || codepage == 1201)
							sections[i].char_size = 2;
					}
				}
			}

		if (sections[i].iconv_handle == (GIConv)-1)
			sections[i].iconv_handle = gsf_msole_iconv_open_for_import (1252);

		/*
		 * Find and process the Property Set Dictionary
		 * Property ID 0 is reserved as an indicator of the dictionary.
		 * For User Defined Sections, Property ID 0 is NOT a dictionary.
		 */
		for (j = 0; j < sections[i].num_props; j++) /* then dictionary */
			if (props[j].id == 0)
				msole_prop_read (in, sections+i, props, j, accum);

		/* Process all the properties */
		for (j = 0; j < sections[i].num_props; j++) /* the rest */
			if (props[j].id > 1)
				msole_prop_read (in, sections+i, props, j, accum);

		gsf_iconv_close (sections[i].iconv_handle);
		g_free (props);
		if (sections[i].dict != NULL)
			g_hash_table_destroy (sections[i].dict);
	}
	return NULL;
}

/****************************************************************************/

typedef struct {
	GsfOutput  *out;
	gboolean    doc_not_component;

	GHashTable *dict;
	struct {
		unsigned count;	 /* includes 2nd prop for links */
		GSList  *props;
	} builtin, user;

	unsigned codepage;
} WritePropState;

static GsfMSOleVariantType
gvalue_to_msole_vt (GValue const *value, GsfMSOleMetaDataPropMap const *map)
{
	g_return_val_if_fail (value != NULL, VT_EMPTY);

	switch (G_TYPE_FUNDAMENTAL (G_VALUE_TYPE (value))) {
	case G_TYPE_BOOLEAN:	return VT_BOOL;
	case G_TYPE_UCHAR:	return VT_UI1;
	case G_TYPE_FLOAT:	return VT_R4;
	case G_TYPE_DOUBLE:	return VT_R8;
	case G_TYPE_STRING: 	return VT_LPSTR;
	case G_TYPE_INT:
		return (NULL != map && map->prefered_type == VT_I2)
			? VT_I2 : VT_I4;
	case G_TYPE_UINT:
		return (NULL != map && map->prefered_type == VT_UI2)
			? VT_UI2 : VT_UI4;
	case G_TYPE_BOXED:
		if (VAL_IS_GSF_TIMESTAMP (value))
			return VT_FILETIME;
		return VT_UNKNOWN;
	case G_TYPE_OBJECT:
		if (VAL_IS_GSF_DOCPROP_VECTOR (value)) {
			GValueArray *vector = gsf_value_get_docprop_varray (value);
			unsigned i, n;
			GsfMSOleVariantType type, tmp;

			if (vector == NULL)
				return VT_UNKNOWN;

			if (map != NULL) {
				type = map->prefered_type & (~VT_VECTOR);
				if (type == VT_VARIANT)
					return VT_VECTOR | VT_VARIANT;
			} else
				type = VT_UNKNOWN;
			n = vector->n_values;
			for (i = 0; i < n; i++) {
				tmp = gvalue_to_msole_vt (
					g_value_array_get_nth (vector, i), NULL);
				if (type == VT_UNKNOWN)
					type = tmp;
				else if (type != tmp)
					return VT_VECTOR | VT_VARIANT;
			}
			return VT_VECTOR | type;
		}
		break;
	}
	return VT_UNKNOWN;
}

/* Returns: TRUE on success */
static gboolean
msole_metadata_write_prop (WritePropState *state,
			   char const *name,
			   GValue const *value,
			   gboolean suppress_type)
{
	static guint8 const zero[1] = { '\0' };
	GsfMSOleMetaDataPropMap const *map =
		(name != NULL) ? msole_gsf_name_to_prop (name) : NULL;
	GsfMSOleVariantType type;
	guint8 buf[8];

	g_return_val_if_fail (value != NULL, FALSE);

	type = gvalue_to_msole_vt (value, map);
	if (!suppress_type) {
		GSF_LE_SET_GUINT32 (buf, type);
		gsf_output_write (state->out, 4, buf);
	}
	if (NULL != map && map->prefered_type != type) {
		d(g_print ("Exporting property '%s' with type 0x%x rather than the usual 0x%x\n",
			   map->gsf_name, type, map->prefered_type););
	}

	if (type & VT_VECTOR) {
		GValueArray *vector = gsf_value_get_docprop_varray (value);
		unsigned i, n = vector->n_values;
		gboolean res;

		GSF_LE_SET_GINT32 (buf, n);
		res = gsf_output_write (state->out, 4, buf);
		for (i = 0; i < n; i++)
			res &= msole_metadata_write_prop (state, NULL,
				g_value_array_get_nth (vector, i),
				type != (VT_VECTOR | VT_VARIANT));
		return res;
	}

	switch (type) {
	case VT_BOOL:
		if (g_value_get_boolean (value))
			GSF_LE_SET_GINT32 (buf, 0xffffffff);
		else
			GSF_LE_SET_GINT32 (buf, 0);
		return gsf_output_write (state->out, 4, buf);
	case VT_UI1:
		GSF_LE_SET_GUINT32 (buf, g_value_get_uchar (value));
		return gsf_output_write (state->out, 4, buf);
	case VT_I2:
		GSF_LE_SET_GINT16 (buf, g_value_get_int (value));
		GSF_LE_SET_GUINT16 (buf+2, 0);
		return gsf_output_write (state->out, 4, buf);
	case VT_I4:
		GSF_LE_SET_GINT32 (buf, g_value_get_int (value));
		return gsf_output_write (state->out, 4, buf);
	case VT_UI2:
	case VT_UI4:
		GSF_LE_SET_GUINT32 (buf, g_value_get_uint (value));
		return gsf_output_write (state->out, 4, buf);
	case VT_R4:
		GSF_LE_SET_FLOAT (buf, g_value_get_float (value));
		return gsf_output_write (state->out, 4, buf);
	case VT_R8:
		GSF_LE_SET_DOUBLE (buf, g_value_get_double (value));
		return gsf_output_write (state->out, 8, buf);

	case VT_LPSTR : {
/* FIXME FIXME FIXME  TODO : use iconv from codepage */
		char const *txt = g_value_get_string (value);
		unsigned len = (NULL != txt) ? strlen (txt) : 0;
		GSF_LE_SET_GUINT32 (buf, len+1);
		return  gsf_output_write (state->out, 4, buf) &&
			gsf_output_write (state->out, len, txt) &&
			gsf_output_write (state->out, 1, zero);
	}

	case VT_FILETIME : {
		GsfTimestamp const *ts = g_value_get_boxed (value);
		gint32  timet_signed = (gint32) ts->timet;
		guint64 ft;

		ft = timet_signed + G_GINT64_CONSTANT (11644473600);
		ft *= 10000000;

		GSF_LE_SET_GUINT64 (buf, ft);

		return gsf_output_write (state->out, 8, buf);
	}

	default:
		break;
	}

	g_warning ("Ignoring property '%s', how do we export a property of type '%s'",
		name ? name : "<unnamed>",
		g_type_name (G_TYPE_FUNDAMENTAL (G_VALUE_TYPE (value))));
	return FALSE;
}

static void
cb_write_dict (char const *name, gpointer id, WritePropState *state)
{
	static guint8 const zero[1] = { '\0' };
	guint8	  buf [8];
	unsigned  len = strlen (name) + 1;
	GSF_LE_SET_GUINT32 (buf, GPOINTER_TO_UINT (id));
	GSF_LE_SET_GUINT32 (buf+4, len+1);
	gsf_output_write (state->out, 8, buf);
	gsf_output_write (state->out, len, name);
	gsf_output_write (state->out, 1, zero);
}

static gboolean
msole_metadata_write_section (WritePropState *state, gboolean user)
{
	char const *name;
	guint8	  buf [8];
	GSList   *ptr   = user ? state->user.props : state->builtin.props;
	unsigned  count = user ? state->user.count : state->builtin.count;
	gsf_off_t len, base  = gsf_output_tell (state->out);
	GsfMSOleMetaDataProp *offsets;
	GsfMSOleMetaDataPropMap const *map;
	GsfDocProp const *prop;
	gpointer tmp;
	unsigned i;
	GValue	 scratch;

	if (user && state->dict == NULL)
		return TRUE;

	/* Skip past the size and id/offset pairs */
	if (!gsf_output_seek (state->out,
			     4 /* length */ +
			     4 /* count */ +
			     8 * count /* id/offset pairs */,
			     G_SEEK_END))
		return FALSE;

	memset (&scratch,  0, sizeof (GValue));
	g_value_init (&scratch, G_TYPE_STRING);

	offsets = g_alloca (sizeof (GsfMSOleMetaDataProp) * count);

	/* 0) codepage */
	offsets[0].id = 1;
	offsets[0].offset = gsf_output_tell (state->out);
	GSF_LE_SET_GUINT32 (buf, VT_I2);
	GSF_LE_SET_GUINT32 (buf+4, state->codepage);
	gsf_output_write (state->out, 8, buf);

	/* 1) dictionary */
	if (user) {
		offsets[1].id = 0;
		offsets[1].offset = gsf_output_tell (state->out);
		GSF_LE_SET_GUINT32 (buf, g_hash_table_size (state->dict));
		gsf_output_write (state->out, 4, buf);
		g_hash_table_foreach (state->dict,
			(GHFunc) cb_write_dict, state);
		i = 2;
	} else
		i = 1;

	/* 2) props */
	for (; ptr != NULL && i < count ; ptr = ptr->next, i++) {
		prop = ptr->data;
		name = gsf_doc_prop_get_name (prop);
		if (user) {
			tmp = g_hash_table_lookup (state->dict, name);
			offsets[i].id = GPOINTER_TO_INT (tmp);
			if (offsets[i].id < 2) {
				g_warning ("Invalid ID (%d) for custom name '%s'", offsets[i].id, name);
				continue;
			}
		} else {
			map = msole_gsf_name_to_prop (name);
			if (map == NULL) {
				g_warning ("Missing map for builting property '%s'", name);
				continue;
			}
			offsets[i].id = map->id;
		}

		offsets[i].offset = gsf_output_tell (state->out);
		msole_metadata_write_prop (state, name,
			gsf_doc_prop_get_val  (prop), FALSE);
		if (gsf_doc_prop_get_link (prop)) {
			i++;
			offsets[i].id     = offsets[i-1].id | 0x1000000;
			offsets[i].offset = gsf_output_tell (state->out);
			g_value_set_static_string (&scratch, 
				gsf_doc_prop_get_link (prop));
			msole_metadata_write_prop (state, NULL, &scratch, FALSE);
		}
	}

	len = gsf_output_tell (state->out) - base;
	gsf_output_seek (state->out, base, G_SEEK_SET);
	GSF_LE_SET_GUINT32 (buf, len);
	GSF_LE_SET_GUINT32 (buf+4, count);
	gsf_output_write (state->out, 8, buf);
	for (i = 0 ; i < count ; i++) {
		GSF_LE_SET_GUINT32 (buf, offsets[i].id);
		GSF_LE_SET_GUINT32 (buf+4, offsets[i].offset - base);
		gsf_output_write (state->out, 8, buf);
	}

	return gsf_output_seek (state->out, 0, G_SEEK_END);
}

static void
cb_count_props (char const *name, GsfDocProp *prop, WritePropState *state)
{
	GsfMSOleMetaDataPropMap const *map = msole_gsf_name_to_prop (name);

	/* allocate predefined ids or add it to the dictionary */
	if (map != NULL) {
		if (map->id == 0) return; /* dictionary is handled elsewhere */
		if (map->section == (state->doc_not_component ? COMPONENT_PROP : DOC_PROP))
			return;
		if (map->id == 1) { /*codepage */
			GValue const *val = gsf_doc_prop_get_val (prop);
			if (NULL != val && G_VALUE_HOLDS_INT (val))
				state->codepage = g_value_get_int (val);
			return;
		}

		d (g_print ("%d) Adding builtin %s'\n",
			    state->builtin.count, map->gsf_name););
		state->builtin.count += gsf_doc_prop_get_link (prop) ? 2 : 1;
		state->builtin.props = g_slist_prepend (state->builtin.props, prop);
	} else if (state->doc_not_component) { /* keep user props in the document */
		d (g_print("user defined named '%s' assigned id = %d\n",
			   name, state->user.count););
		if (NULL == state->dict)
			state->dict = g_hash_table_new (g_str_hash, g_str_equal);
		g_hash_table_insert (state->dict,
			(gpointer) name, GINT_TO_POINTER (state->user.count));
		state->user.count += gsf_doc_prop_get_link (prop) ? 2 : 1;
		state->user.props = g_slist_prepend (state->user.props, prop);
	}
}

/**
 * gsf_msole_metadata_write :
 * @out : #GsfOutput
 * @meta_data : #GsfDocMetaData
 * @doc_not_component : a kludge to differentiate DocumentSummary from Summary
 *
 * Returns: %TRUE on success;
 **/
gboolean
gsf_msole_metadata_write (GsfOutput *out,
			  GsfDocMetaData const *meta_data,
			  gboolean doc_not_component)
{
	static guint8 const header[] = {
		0xfe, 0xff,	/* byte order */
		   0,    0,	/* Format */
		0x04, 0x0a,	/* OS : XP == 0xA04 */
		0x02, 0x00,	/* win32 == 2 */
		0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* clasid = 0 */
	};

	gboolean	success = FALSE;
	guint8		buf [4];
	WritePropState	state;

	state.codepage		= 1252;
	state.out		= out;
	state.dict		= NULL;
	state.builtin.count     = 1; /* codepage */
	state.user.count	= 2; /* codepage and dictionary */
	state.builtin.props     = state.user.props = NULL;
	state.doc_not_component = doc_not_component;
	d (g_print ("================================\nFinding props\n"););
	gsf_doc_meta_data_foreach (meta_data,
		(GHFunc) cb_count_props, &state);
	d (g_print ("Done\n"
		    "================================\n"););

	/* Write stream header */
	GSF_LE_SET_GUINT32 (buf, (state.dict != NULL) ? 2 : 1);
	if (!gsf_output_write (out, sizeof (header), header) ||
	    !gsf_output_write (out, 4, buf))
		goto err;

	/* Write section header(s) */
	GSF_LE_SET_GUINT32 (buf, (state.dict != NULL) ? 0x44 : 0x30);
	if (!gsf_output_write (out, 16,
		doc_not_component ? document_guid : component_guid) ||
	    !gsf_output_write (out, 4, buf))
		goto err;
	if (state.dict != NULL) {
		GSF_LE_SET_GUINT32 (buf, 0);
		if (!gsf_output_write (out, sizeof (user_guid), user_guid) ||
		    !gsf_output_write (out, 4, buf)) /* bogus position, fix it later */
			goto err;
	}

	/* Write section(s) */
	if (!msole_metadata_write_section (&state, FALSE))
		goto err;
	if (state.dict != NULL) {
		gsf_off_t base  = gsf_output_tell (state.out);
		GSF_LE_SET_GUINT32 (buf, base);
		if (!gsf_output_seek (state.out, 0x40, G_SEEK_SET) ||
		    !gsf_output_write (out, 4, buf) ||
		    !gsf_output_seek (state.out, 0, G_SEEK_END) ||
		    !msole_metadata_write_section (&state, TRUE))
			goto err;
	}

	success = TRUE;
err :
	g_slist_free (state.builtin.props);
	g_slist_free (state.user.props);
	if (state.dict != NULL)
		g_hash_table_destroy (state.dict);
	return success;
}

static struct {
	char const *tag;
	guint	lid;
} const gsf_msole_language_ids[] = {
	{ "-none-", 0x0000 }, /* none (language neutral) */
	{ "-none-", 0x0400 }, /* none */
	{ "af_ZA",  0x0436 }, /* Afrikaans */
	{ "am",     0x045e }, /* Amharic */
	{ "sq_AL",  0x041c }, /* Albanian */
	{ "ar_SA",  0x0401 }, /* Arabic (Saudi) */
	{ "ar_IQ",  0x0801 }, /* Arabic (Iraq) */
	{ "ar_EG",  0x0c01 }, /* Arabic (Egypt) */		
	{ "ar_LY",  0x1001 }, /* Arabic (Libya) */
	{ "ar_DZ",  0x1401 }, /* Arabic (Algeria) */
	{ "ar_MA",  0x1801 }, /* Arabic (Morocco) */
	{ "ar_TN",  0x1c01 }, /* Arabic (Tunisia) */
	{ "ar_OM",  0x2001 }, /* Arabic (Oman) */
	{ "ar_YE",  0x2401 }, /* Arabic (Yemen) */		
	{ "ar_SY",  0x2801 }, /* Arabic (Syria) */
	{ "ar_JO",  0x2c01 }, /* Arabic (Jordan) */
	{ "ar_LB",  0x3001 }, /* Arabic (Lebanon) */
	{ "ar_KW",  0x3401 }, /* Arabic (Kuwait) */
	{ "ar_AE",  0x3801 }, /* Arabic (United Arab Emirates) */
	{ "ar_BH",  0x3c01 }, /* Arabic (Bahrain) */		
	{ "ar_QA",  0x4001 }, /* Arabic (Qatar) */
	{ "as",     0x044d }, /* Assamese */
	{ "az",     0x042c }, /* Azerbaijani */
	{ "hy_AM",  0x042b }, /* Armenian */
	{ "az",     0x044c }, /* Azeri (Latin) az_ */
	{ "az",     0x082c }, /* Azeri (Cyrillic) az_ */
	{ "eu_ES",  0x042d }, /* Basque */
	{ "be_BY",  0x0423 }, /* Belarussian */		
	{ "bn",     0x0445 }, /* Bengali bn_ */
	{ "bg_BG",  0x0402 }, /* Bulgarian */
	{ "ca_ES",  0x0403 }, /* Catalan */
	{ "zh_TW",  0x0404 }, /* Chinese (Taiwan) */
	{ "zh_CN",  0x0804 }, /* Chinese (PRC) */
	{ "zh_HK",  0x0c04 }, /* Chinese (Hong Kong) */		
	{ "zh_SG",  0x1004 }, /* Chinese (Singapore) */
	{ "ch_MO",  0x1404 }, /* Chinese (Macau SAR) */
	{ "hr_HR",  0x041a }, /* Croatian */
	{ "cs_CZ",  0x0405 }, /* Czech */
	{ "da_DK",  0x0406 }, /* Danish */
	{ "div",    0x465 }, /* Divehi div_*/
	{ "nl_NL",  0x0413 }, /* Dutch (Netherlands) */		
	{ "nl_BE",  0x0813 }, /* Dutch (Belgium) */
	{ "en_US",  0x0409 }, /* English (USA) */
	{ "en_GB",  0x0809 }, /* English (UK) */
	{ "en_AU",  0x0c09 }, /* English (Australia) */
	{ "en_CA",  0x1009 }, /* English (Canada) */
	{ "en_NZ",  0x1409 }, /* English (New Zealand) */
	{ "en_IE",  0x1809 }, /* English (Ireland) */
	{ "en_ZA",  0x1c09 }, /* English (South Africa) */
	{ "en_JM",  0x2009 }, /* English (Jamaica) */
	{ "en",     0x2409 }, /* English (Caribbean) */
	{ "en_BZ",  0x2809 }, /* English (Belize) */
	{ "en_TT",  0x2c09 }, /* English (Trinidad) */		
	{ "en_ZW",  0x3009 }, /* English (Zimbabwe) */
	{ "en_PH",  0x3409 }, /* English (Phillipines) */
	{ "et_EE",  0x0425 }, /* Estonian */
	{ "fo",     0x0438 }, /* Faeroese fo_ */
	{ "fa_IR",  0x0429 }, /* Farsi */
	{ "fi_FI",  0x040b }, /* Finnish */		
	{ "fr_FR",  0x040c }, /* French (France) */
	{ "fr_BE",  0x080c }, /* French (Belgium) */
	{ "fr_CA",  0x0c0c }, /* French (Canada) */
	{ "fr_CH",  0x100c }, /* French (Switzerland) */
	{ "fr_LU",  0x140c }, /* French (Luxembourg) */
	{ "fr_MC",  0x180c }, /* French (Monaco) */		
	{ "gl",     0x0456 }, /* Galician gl_ */
	{ "ga_IE",  0x083c }, /* Irish Gaelic */
	{ "gd_GB",  0x100c }, /* Scottish Gaelic */
	{ "ka_GE",  0x0437 }, /* Georgian */
	{ "de_DE",  0x0407 }, /* German (Germany) */
	{ "de_CH",  0x0807 }, /* German (Switzerland) */
	{ "de_AT",  0x0c07 }, /* German (Austria) */
	{ "de_LU",  0x1007 }, /* German (Luxembourg) */
	{ "de_LI",  0x1407 }, /* German (Liechtenstein) */
	{ "el_GR",  0x0408 }, /* Greek */
	{ "gu",     0x0447 }, /* Gujarati gu_ */
	{ "ha",     0x0468 }, /* Hausa */
	{ "he_IL",  0x040d }, /* Hebrew */
	{ "hi_IN",  0x0439 }, /* Hindi */
	{ "hu_HU",  0x040e }, /* Hungarian */
	{ "is_IS",  0x040f }, /* Icelandic */		
	{ "id_ID",  0x0421 }, /* Indonesian */
	{ "iu",     0x045d }, /* Inkutitut */
	{ "it_IT",  0x0410 }, /* Italian (Italy) */
	{ "it_CH",  0x0810 }, /* Italian (Switzerland) */
	{ "ja_JP",  0x0411}, /* Japanese */
	{ "kn",     0x044b }, /* Kannada kn_ */
	{ "ks",     0x0860 }, /* Kashmiri (India) ks_ */
	{ "kk",     0x043f }, /* Kazakh kk_ */
	{ "kok",    0x0457 }, /* Konkani kok_ */
	{ "ko_KR",  0x0412 }, /* Korean */
	{ "ko",     0x0812 }, /* Korean (Johab) ko_ */
	{ "kir",    0x0440 }, /* Kyrgyz */
	{ "la",     0x0476 }, /* Latin */
	{ "lo",     0x0454 }, /* Laothian */
	{ "lv_LV",  0x0426 }, /* Latvian */
	{ "lt_LT",  0x0427 }, /* Lithuanian */		
	{ "lt_LT",  0x0827 }, /* Lithuanian (Classic) */
	{ "mk",     0x042f }, /* FYRO Macedonian */
	{ "my_MY",  0x043e }, /* Malaysian */
	{ "my_BN",  0x083e }, /* Malay Brunei Darussalam */
	{ "ml",     0x044c }, /* Malayalam ml_ */
	{ "mr",     0x044e }, /* Marathi mr_ */
	{ "mt",     0x043a }, /* Maltese */
	{ "mo",     0x0450 }, /* Mongolian */
	{ "ne_NP",  0x0461 }, /* Napali (Nepal) */
	{ "ne_IN",  0x0861 }, /* Nepali (India) */
	{ "nb_NO",  0x0414 }, /* Norwegian (Bokmaal) */
	{ "nn_NO",  0x0814 }, /* Norwegian (Nynorsk) */
	{ "or",     0x0448 }, /* Oriya or_ */
	{ "om",     0x0472 }, /* Oromo (Afan, Galla) */
	{ "pl_PL",  0x0415 }, /* Polish */		
	{ "pt_BR",  0x0416 }, /* Portuguese (Brazil) */
	{ "pt_PT",  0x0816 }, /* Portuguese (Portugal) */
	{ "pa",     0x0446 }, /* Punjabi pa_ */
	{ "ps",     0x0463 }, /* Pashto (Pushto) */
	{ "rm",     0x0417 }, /* Rhaeto_Romanic rm_ */
	{ "ro_RO",  0x0418 }, /* Romanian */
	{ "ro_MD",  0x0818 }, /* Romanian (Moldova) */		
	{ "ru_RU",  0x0419 }, /* Russian */
	{ "ru_MD",  0x0819 }, /* Russian (Moldova) */
	{ "se",     0x043b }, /* Sami (Lappish) se_ */
	{ "sa",     0x044f }, /* Sanskrit sa_ */
	{ "sr",     0x0c1a }, /* Serbian (Cyrillic) sr_ */
	{ "sr",     0x081a }, /* Serbian (Latin) sr_ */		
	{ "sd",     0x0459 }, /* Sindhi sd_ */
	{ "sk_SK",  0x041b }, /* Slovak */
	{ "sl_SI",  0x0424 }, /* Slovenian */
	{ "wen",    0x042e }, /* Sorbian wen_ */
	{ "so",     0x0477 }, /* Somali */
	{ "es_ES",  0x040a }, /* Spanish (Spain, Traditional) */
	{ "es_MX",  0x080a }, /* Spanish (Mexico) */		
	{ "es_ES",  0x0c0a }, /* Spanish (Modern) */
	{ "es_GT",  0x100a }, /* Spanish (Guatemala) */
	{ "es_CR",  0x140a }, /* Spanish (Costa Rica) */
	{ "es_PA",  0x180a }, /* Spanish (Panama) */
	{ "es_DO",  0x1c0a }, /* Spanish (Dominican Republic) */
	{ "es_VE",  0x200a }, /* Spanish (Venezuela) */		
	{ "es_CO",  0x240a }, /* Spanish (Colombia) */
	{ "es_PE",  0x280a }, /* Spanish (Peru) */
	{ "es_AR",  0x2c0a }, /* Spanish (Argentina) */
	{ "es_EC",  0x300a }, /* Spanish (Ecuador) */
	{ "es_CL",  0x340a }, /* Spanish (Chile) */
	{ "es_UY",  0x380a }, /* Spanish (Uruguay) */		
	{ "es_PY",  0x3c0a }, /* Spanish (Paraguay) */
	{ "es_BO",  0x400a }, /* Spanish (Bolivia) */
	{ "es_SV",  0x440a }, /* Spanish (El Salvador) */
	{ "es_HN",  0x480a }, /* Spanish (Honduras) */
	{ "es_NI",  0x4c0a }, /* Spanish (Nicaragua) */
	{ "es_PR",  0x500a }, /* Spanish (Puerto Rico) */
	{ "sx",     0x0430 }, /* Sutu */
	{ "sw",     0x0441 }, /* Swahili (Kiswahili/Kenya) */
	{ "sv_SE",  0x041d }, /* Swedish */
	{ "sv_FI",  0x081d }, /* Swedish (Finland) */
	{ "ta",     0x0449 }, /* Tamil ta_ */
	{ "tt",     0x0444 }, /* Tatar (Tatarstan) tt_ */
	{ "te",     0x044a }, /* Telugu te_ */
	{ "th_TH",  0x041e }, /* Thai */
	{ "ts",     0x0431 }, /* Tsonga ts_ */
	{ "tn",     0x0432 }, /* Tswana tn_ */
	{ "tr_TR",  0x041f }, /* Turkish */
	{ "tl",     0x0464 }, /* Tagalog */
	{ "tg",     0x0428 }, /* Tajik */
	{ "bo",     0x0451 }, /* Tibetan */
	{ "ti",     0x0473 }, /* Tigrinya */
	{ "uk_UA",  0x0422 }, /* Ukrainian */		
	{ "ur_PK",  0x0420 }, /* Urdu (Pakistan) */
	{ "ur_IN",  0x0820 }, /* Urdu (India) */
	{ "uz",     0x0443 }, /* Uzbek (Latin) uz_ */
	{ "uz",     0x0843 }, /* Uzbek (Cyrillic) uz_ */
	{ "ven",    0x0433 }, /* Venda ven_ */
	{ "vi_VN",  0x042a }, /* Vietnamese */
	{ "cy_GB",  0x0452 }, /* Welsh */
	{ "xh",     0x0434 }, /* Xhosa xh */
	{ "yi",     0x043d }, /* Yiddish yi_ */
	{ "yo",     0x046a }, /* Yoruba */
	{ "zu",     0x0435 }, /* Zulu zu_ */
	{ "en_US",  0x0800 } /* Default */
};

/**
 * gsf_msole_lid_for_language
 * @lang :
 *
 * Returns: the LID (Language Identifier) for the input language.
 * 	If lang is %null, return 0x0400 ("-none-"), and not 0x0000 ("no proofing")
 **/
guint
gsf_msole_lid_for_language (char const *lang)
{
	guint i = 0 ;
	size_t len;

	if (lang == NULL)
		return 0x0400;   /* return -none- */

	/* Allow lang to match as a prefix (eg fr == fr_FR@euro) */
	len = strlen (lang);
	for (i = 0 ; i < G_N_ELEMENTS(gsf_msole_language_ids); i++)
		if (!strncmp (lang, gsf_msole_language_ids[i].tag, len))
			return gsf_msole_language_ids[i].lid;
	
	return 0x0400 ;   /* return -none- */
}

/**
 * gsf_msole_language_for_lid :
 * @lid :
 *
 * Returns: the xx_YY style string (can be just xx or xxx) for the given LID.
 * 	Return value must not be freed. If the LID is not found, is set to
 * 	0x0400, or is set to 0x0000, will return "-none-"
 **/
char const *
gsf_msole_language_for_lid (guint lid)
{
	guint i = 0 ;
	
	for (i = 0 ; i < G_N_ELEMENTS(gsf_msole_language_ids); i++)
		if (gsf_msole_language_ids[i].lid == lid)
			return gsf_msole_language_ids[i].tag;
	
	return "-none-"; /* default */
}

/**
 * gsf_msole_locale_to_lid :
 *
 * Covert the the codepage into an applicable LID
 **/
guint
gsf_msole_codepage_to_lid (int codepage)
{
	switch (codepage) {
	case 77:		/* MAC_CHARSET */
		return 0xFFF;	/* This number is a hack */
	case 128:		/* SHIFTJIS_CHARSET */
		return 0x411;	/* Japanese */
	case 129:		/* HANGEUL_CHARSET */
		return 0x412;	/* Korean */
	case 130:		/* JOHAB_CHARSET */
		return 0x812;	/* Korean (Johab) */
	case 134:		/* GB2312_CHARSET - Chinese Simplified */
		return 0x804;	/* China PRC - And others!! */
	case 136:		/* CHINESEBIG5_CHARSET - Chinese Traditional */
		return 0x404;	/* Taiwan - And others!! */
	case 161:		/* GREEK_CHARSET */
		return 0x408;	/* Greek */
	case 162:		/* TURKISH_CHARSET */
		return 0x41f;	/* Turkish */
	case 163:		/* VIETNAMESE_CHARSET */
		return 0x42a;	/* Vietnamese */
	case 177:		/* HEBREW_CHARSET */
		return 0x40d;	/* Hebrew */
	case 178:		/* ARABIC_CHARSET */
		return 0x01;	/* Arabic */
	case 186:		/* BALTIC_CHARSET */
		return 0x425;	/* Estonian - And others!! */
	case 204:		/* RUSSIAN_CHARSET */
		return 0x419;	/* Russian - And others!! */
	case 222:		/* THAI_CHARSET */
		return 0x41e;	/* Thai */
	case 238:		/* EASTEUROPE_CHARSET */
		return 0x405;	/* Czech - And many others!! */
	}

	/* default */
	return 0x0;
}

/**
 * gsf_msole_lid_to_codepage
 * @lid :
 *
 * Returns: our best guess at the codepage for the given language id
 **/
int
gsf_msole_lid_to_codepage (guint lid)
{
	if (lid == 0x0FFF) /* Macintosh Hack */
		return 0x0FFF;

	switch (lid & 0xff) {
	case 0x01:		/* Arabic */
		return 1256;
	case 0x02:		/* Bulgarian */
		return 1251;
	case 0x03:		/* Catalan */
		return 1252;
	case 0x04:		/* Chinese */
		switch (lid) {
		case 0x1004:		/* Chinese (Singapore) */
		case 0x0404:		/* Chinese (Taiwan) */
		case 0x1404:		/* Chinese (Macau SAR) */
		case 0x0c04:		/* Chinese (Hong Kong SAR, PRC) */
			return 950;
			
		case 0x0804:		/* Chinese (PRC) */
			return 936;
		default :
			break;
		}
		break;
	case 0x05:		/* Czech */
		return 1250;
	case 0x06:		/* Danish */
		return 1252;
	case 0x07:		/* German */
		return 1252;
	case 0x08:		/* Greek */
		return 1253;
	case 0x09:		/* English */
		return 1252;
	case 0x0a:		/* Spanish */
		return 1252;
	case 0x0b:		/* Finnish */
		return 1252;
	case 0x0c:		/* French */
		return 1252;
	case 0x0d:		/* Hebrew */
		return 1255;
	case 0x0e:		/* Hungarian */
		return 1250;
	case 0x0f:		/* Icelandic */
		return 1252;
	case 0x10:		/* Italian */
		return 1252;
	case 0x11:		/* Japanese */
		return 932;
	case 0x12:		/* Korean */
		switch (lid) {
		case 0x0812:		/* Korean (Johab) */
			return 1361;
		case 0x0412:		/* Korean */
			return 949;
		default :
			break;
		}
		break;
	case 0x13:		/* Dutch */
		return 1252;
	case 0x14:		/* Norwegian */
		return 1252;
	case 0x15:		/* Polish */
		return 1250;
	case 0x16:		/* Portuguese */
		return 1252;
	case 0x17:		/* Rhaeto-Romanic */
		return 1252;
	case 0x18:		/* Romanian */
		return 1250;
	case 0x19:		/* Russian */
		return 1251;
	case 0x1a:		/* Serbian, Croatian, (Bosnian?) */
		switch (lid) {
		case 0x041a:		/* Croatian */
			return 1252;
		case 0x0c1a:		/* Serbian (Cyrillic) */
			return 1251;
		case 0x081a:		/* Serbian (Latin) */
			return 1252;
		default :
			break;
		}
		break;
	case 0x1b:		/* Slovak */
		return 1250;
	case 0x1c:		/* Albanian */
		return 1251;
	case 0x1d:		/* Swedish */
		return 1252;
	case 0x1e:		/* Thai */
		return 874;
	case 0x1f:		/* Turkish */
		return 1254;
	case 0x20:		/* Urdu. This is Unicode only. */
		return 0;
	case 0x21:		/* Bahasa Indonesian */
		return 1252;
	case 0x22:		/* Ukrainian */
		return 1251;
	case 0x23:		/* Byelorussian / Belarusian */
		return 1251;
	case 0x24:		/* Slovenian */
		return 1250;
	case 0x25:		/* Estonian */
		return 1257;
	case 0x26:		/* Latvian */
		return 1257;
	case 0x27:		/* Lithuanian */
		return 1257;
	case 0x29:		/* Farsi / Persian. This is Unicode only. */
		return 0;
	case 0x2a:		/* Vietnamese */
		return 1258;
	case 0x2b:		/* Windows 2000: Armenian. This is Unicode only. */
		return 0;
	case 0x2c:		/* Azeri */
		switch (lid) {
		case 0x082c:		/* Azeri (Cyrillic) */
			return 1251;
		default :
			break;
		}
		break;
	case 0x2d:		/* Basque */
		return 1252;
	case 0x2f:		/* Macedonian */
		return 1251;
	case 0x36:		/* Afrikaans */
		return 1252;
	case 0x37:		/* Windows 2000: Georgian. This is Unicode only. */
		return 0;
	case 0x38:		/* Faeroese */
		return 1252;
	case 0x39:		/* Windows 2000: Hindi. This is Unicode only. */
		return 0;
	case 0x3E:		/* Malaysian / Malay */
		return 1252;
	case 0x41:		/* Swahili */
		return 1252;
	case 0x43:		/* Uzbek */
		switch (lid) {
		case 0x0843:		/* Uzbek (Cyrillic) */
			return 1251;
		default :
			break;
		}
		break;
	case 0x45:		/* Windows 2000: Bengali. This is Unicode only. */
	case 0x46:		/* Windows 2000: Punjabi. This is Unicode only. */
	case 0x47:		/* Windows 2000: Gujarati. This is Unicode only. */
	case 0x48:		/* Windows 2000: Oriya. This is Unicode only. */
	case 0x49:		/* Windows 2000: Tamil. This is Unicode only. */
	case 0x4a:		/* Windows 2000: Telugu. This is Unicode only. */
	case 0x4b:		/* Windows 2000: Kannada. This is Unicode only. */
	case 0x4c:		/* Windows 2000: Malayalam. This is Unicode only. */
	case 0x4d:		/* Windows 2000: Assamese. This is Unicode only. */
	case 0x4e:		/* Windows 2000: Marathi. This is Unicode only. */
	case 0x4f:		/* Windows 2000: Sanskrit. This is Unicode only. */
	case 0x55:		/* Myanmar / Burmese. This is Unicode only. */
	case 0x57:		/* Windows 2000: Konkani. This is Unicode only. */
	case 0x61:		/* Windows 2000: Nepali (India). This is Unicode only. */
		return 0;

#if 0
		/****************************************************************** 
		 * Below this line is untested, unproven, and are just guesses.   *
		 * Insert above and use at your own risk                          *
		 ******************************************************************/

	case 0x042c:		/* Azeri (Latin) */
	case 0x0443:		/* Uzbek (Latin) */
	case 0x30:		/* Sutu */
		return 1252; /* UNKNOWN, believed to be CP1252 */

	case 0x3f:		/* Kazakh */
		return 1251; /* JUST UNKNOWN, probably CP1251 */

	case 0x44:		/* Tatar */
	case 0x58:		/* Manipuri */
	case 0x59:		/* Sindhi */
	case 0x60:		/* Kashmiri (India) */
		return 0; /* UNKNOWN, believed to be Unicode only */
#endif
	};
	
	/* This is just a guess, but it will be a frequent guess */
	return 1252;
}

/**
 * gsf_msole_lid_to_codepage_str
 * @lid :
 * 
 * Returns: the Iconv codepage string for the given LID.
 * 	Return value must be g_free ()'d
 **/
gchar *
gsf_msole_lid_to_codepage_str (guint lid)
{
	guint cp = 0;

	if (lid == 0x0FFF)	/* Macintosh Hack */
		return g_strdup ("MACINTOSH");

	cp = gsf_msole_lid_to_codepage (lid);
	return g_strdup_printf ("CP%d", cp);
}

/**
 * gsf_msole_iconv_win_codepage :
 *
 * Returns: our best guess at the applicable windows code page based on an
 * 	environment variable or the current locale.
 **/
int
gsf_msole_iconv_win_codepage (void)
{
	char *lang;

	if ((lang = getenv("WINDOWS_LANGUAGE")) == NULL) {
		char const *locale = setlocale (LC_CTYPE, NULL);
		if (locale != NULL) {
			char const *lang_sep = strchr (locale, '.');
			if (lang_sep)
				lang = g_strndup (locale, (unsigned)(lang_sep - locale));
			else
				lang = g_strdup (locale); /* simplifies exit */
		}
	}

	if (lang != NULL) {
		guint lid = gsf_msole_lid_for_language (lang);
		g_free (lang);
		return gsf_msole_lid_to_codepage (lid);
	}
	return 1252; /* default ansi */
}

static GSList *
gsf_msole_iconv_get_codepage_string_list (int codepage)
{
	GSList *cp_list = NULL;

	switch (codepage) {
		case 1200:
			cp_list = g_slist_prepend (cp_list, g_strdup ("UTF-16LE"));
			break;
		case 1201:
			cp_list = g_slist_prepend (cp_list, g_strdup ("UTF-16BE"));
			break;
		case 0x8000:
		case 10000:
			cp_list = g_slist_prepend (cp_list, g_strdup ("MACROMAN"));
			cp_list = g_slist_prepend (cp_list, g_strdup ("MACINTOSH"));
			break;
		case -535:
		case 65001:
			cp_list = g_slist_prepend (cp_list, g_strdup ("UTF-8"));
			break;
		case 0x8001:
			/* according to OOo docs 8001 is a synonym CP1252 */
			codepage = 1252;
		default:
			cp_list = g_slist_prepend (cp_list, g_strdup_printf ("CP%u", codepage));
	}
	
	return cp_list;
}

/**
 * gsf_msole_iconv_open_codepage_for_import :
 * @to:
 * @codepage :
 *
 * Returns: an iconv converter for @codepage -> utf8.
 **/
GIConv
gsf_msole_iconv_open_codepage_for_import (char const *to, int codepage)
{
	GIConv iconv_handle = (GIConv)(-1);
	gchar *codepage_str;
	GSList *codepage_list, *cp;
	g_return_val_if_fail (to != NULL, (GIConv)(-1));

	cp = codepage_list = gsf_msole_iconv_get_codepage_string_list (codepage);
	while (cp) {
		codepage_str = cp->data;
		if (iconv_handle == (GIConv)(-1))
			iconv_handle = g_iconv_open (to, codepage_str);
		g_free (codepage_str);
		cp = cp->next;
	}
	g_slist_free (codepage_list);

	if (iconv_handle == (GIConv)(-1))
		g_warning ("Unable to open an iconv handle from codepage %d -> %s",
			   codepage, to);
	return iconv_handle;
}

/**
 * gsf_msole_iconv_open_for_import :
 * @codepage :
 *
 * Returns: an iconv converter for single byte encodings @codepage -> utf8.
 * 	Attempt to handle the semantics of a specification for multibyte encodings
 * 	since this is only supposed to be used for single bytes.
 **/
GIConv
gsf_msole_iconv_open_for_import (int codepage)
{
	return gsf_msole_iconv_open_codepage_for_import ("UTF-8", codepage);
}

/**
 * gsf_msole_iconv_open_codepages_for_export :
 * @codepage_to :
 * @from :
 *
 * Returns: an iconv converter to go from utf8 -> to our best guess at a useful
 * 	windows codepage.
 **/
GIConv
gsf_msole_iconv_open_codepages_for_export (int codepage_to, char const *from)
{
	GIConv iconv_handle = (GIConv)(-1);
	gchar *codepage_str;
	GSList *codepage_list, *cp;
	g_return_val_if_fail (from != NULL, (GIConv)(-1));

	cp = codepage_list = gsf_msole_iconv_get_codepage_string_list (codepage_to);
	while (cp) {
		codepage_str = cp->data;
		if (iconv_handle == (GIConv)(-1))
			iconv_handle = g_iconv_open (codepage_str, from);
		g_free (codepage_str);
		cp = cp->next;
	}
	g_slist_free (codepage_list);

	if (iconv_handle == (GIConv)(-1))
		g_warning ("Unable to open an iconv handle from %s -> codepage %u",
			   from, codepage_to);
	return iconv_handle;
}

/**
 * gsf_msole_iconv_open_codepage_for_export :
 * @codepage_to:
 *
 * Returns: an iconv converter to go from utf8 -> to our best guess at a useful
 * 	windows codepage.
 **/
GIConv
gsf_msole_iconv_open_codepage_for_export (int codepage_to)
{
	return gsf_msole_iconv_open_codepages_for_export (codepage_to, "UTF-8");
}

/**
 * gsf_msole_iconv_open_for_export :
 *
 * Returns: an iconv convert to go from utf8 -> to our best guess at a useful
 * 	windows codepage.
 **/
GIConv
gsf_msole_iconv_open_for_export (void)
{
	return gsf_msole_iconv_open_codepage_for_export (gsf_msole_iconv_win_codepage ());
}

#define VBA_COMPRESSION_WINDOW 4096

/**
 * gsf_msole_inflate:
 * @input: stream to read from
 * @offset: offset into it for start byte of compresse stream
 * 
 * Decompresses an LZ compressed stream.
 * 
 * Return value: A GByteArray that the caller is responsible for freeing
 **/
GByteArray *
gsf_msole_inflate (GsfInput *input, gsf_off_t offset)
{
	GByteArray *res;
	unsigned	i, win_pos, pos = 0;
	unsigned	mask, shift, distance;
	guint8		flag, buffer [VBA_COMPRESSION_WINDOW];
	guint8 const   *tmp;
	guint16		token, len;
	gboolean	clean = TRUE;

	if (gsf_input_seek (input, offset, G_SEEK_SET))
		return NULL;

	res = g_byte_array_new ();

	/* explaination from libole2/ms-ole-vba.c */
	/* The first byte is a flag byte.  Each bit in this byte
	 * determines what the next byte is.  If the bit is zero,
	 * the next byte is a character.  Otherwise the  next two
	 * bytes contain the number of characters to copy from the
	 * umcompresed buffer and where to copy them from (offset,
	 * length).
	 */
	while (NULL != gsf_input_read (input, 1, &flag))
		for (mask = 1; mask < 0x100 ; mask <<= 1)
			if (flag & mask) {
				if (NULL == (tmp = gsf_input_read (input, 2, NULL)))
					break;
				win_pos = pos % VBA_COMPRESSION_WINDOW;
				if (win_pos <= 0x80) {
					if (win_pos <= 0x20)
						shift = (win_pos <= 0x10) ? 12 : 11;
					else
						shift = (win_pos <= 0x40) ? 10 : 9;
				} else {
					if (win_pos <= 0x200)
						shift = (win_pos <= 0x100) ? 8 : 7;
					else if (win_pos <= 0x800)
						shift = (win_pos <= 0x400) ? 6 : 5;
					else
						shift = 4;
				}

				token = GSF_LE_GET_GUINT16 (tmp);
				len = (token & ((1 << shift) - 1)) + 3;
				distance = token >> shift;
				clean = TRUE;
/*				fprintf (stderr, "Shift %d, token len %d, distance %d bytes %.2x %.2x\n",
				shift, len, distance, (token & 0xff), (token >> 8)); */

				for (i = 0; i < len; i++) {
					unsigned srcpos = (pos - distance - 1) % VBA_COMPRESSION_WINDOW;
					guint8 c = buffer [srcpos];
					buffer [pos++ % VBA_COMPRESSION_WINDOW] = c;
				}
			} else {
				if ((pos != 0) && ((pos % VBA_COMPRESSION_WINDOW) == 0) && clean) {
					(void) gsf_input_read (input, 2, NULL);
					clean = FALSE;
					g_byte_array_append (res, buffer, VBA_COMPRESSION_WINDOW);
					break;
				}
				if (NULL != gsf_input_read (input, 1, buffer + (pos % VBA_COMPRESSION_WINDOW)))
					pos++;
				clean = TRUE;
			}

	if (pos % VBA_COMPRESSION_WINDOW)
		g_byte_array_append (res, buffer, pos % VBA_COMPRESSION_WINDOW);
	return res;
}


syntax highlighted by Code2HTML, v. 0.9.1