/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ /* * gsf-msole-utils.c: * * Copyright (C) 2002-2006 Jody Goldberg (jody@gnome.org) * Copyright (C) 2002-2006 Dom Lachowicz (cinamod@hotmail.com) * excel_iconv* family of functions (C) 2001 by Vlad Harchev * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2.1 of the GNU Lesser General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 * USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NO_DEBUG_OLE_PROPS #ifndef NO_DEBUG_OLE_PROPS #define d(code) do { code } while (0) #else #define d(code) #endif /* * The Format Identifier for Summary Information * F29F85E0-4FF9-1068-AB91-08002B27B3D9 */ static guint8 const component_guid [] = { 0xe0, 0x85, 0x9f, 0xf2, 0xf9, 0x4f, 0x68, 0x10, 0xab, 0x91, 0x08, 0x00, 0x2b, 0x27, 0xb3, 0xd9 }; /* * The Format Identifier for Document Summary Information * D5CDD502-2E9C-101B-9397-08002B2CF9AE */ static guint8 const document_guid [] = { 0x02, 0xd5, 0xcd, 0xd5, 0x9c, 0x2e, 0x1b, 0x10, 0x93, 0x97, 0x08, 0x00, 0x2b, 0x2c, 0xf9, 0xae }; /* * The Format Identifier for User-Defined Properties * D5CDD505-2E9C-101B-9397-08002B2CF9AE */ static guint8 const user_guid [] = { 0x05, 0xd5, 0xcd, 0xd5, 0x9c, 0x2e, 0x1b, 0x10, 0x93, 0x97, 0x08, 0x00, 0x2b, 0x2c, 0xf9, 0xae }; typedef enum { COMMON_PROP, /* in either summary or docsummary */ COMPONENT_PROP, /* SummaryInformation properties */ DOC_PROP, /* DocumentSummaryInformation properties */ USER_PROP } GsfMSOleMetaDataType; typedef enum { VT_EMPTY = 0, VT_NULL = 1, VT_I2 = 2, VT_I4 = 3, VT_R4 = 4, VT_R8 = 5, VT_CY = 6, VT_DATE = 7, VT_BSTR = 8, VT_DISPATCH = 9, VT_ERROR = 10, VT_BOOL = 11, VT_VARIANT = 12, VT_UNKNOWN = 13, VT_DECIMAL = 14, VT_I1 = 16, VT_UI1 = 17, VT_UI2 = 18, VT_UI4 = 19, VT_I8 = 20, VT_UI8 = 21, VT_INT = 22, VT_UINT = 23, VT_VOID = 24, VT_HRESULT = 25, VT_PTR = 26, VT_SAFEARRAY = 27, VT_CARRAY = 28, VT_USERDEFINED = 29, VT_LPSTR = 30, VT_LPWSTR = 31, VT_FILETIME = 64, VT_BLOB = 65, VT_STREAM = 66, VT_STORAGE = 67, VT_STREAMED_OBJECT = 68, VT_STORED_OBJECT = 69, VT_BLOB_OBJECT = 70, VT_CF = 71, VT_CLSID = 72, VT_VECTOR = 0x1000 } GsfMSOleVariantType; typedef struct { char const *ms_name; GsfMSOleMetaDataType section; char const *gsf_name; guint32 id; GsfMSOleVariantType prefered_type; } GsfMSOleMetaDataPropMap; typedef struct { guint32 id; gsf_off_t offset; } GsfMSOleMetaDataProp; typedef struct { GsfMSOleMetaDataType type; gsf_off_t offset; guint32 size, num_props; GIConv iconv_handle; unsigned char_size; GHashTable *dict; } GsfMSOleMetaDataSection; static GsfMSOleMetaDataPropMap const builtin_props [] = { { "Dictionary", COMMON_PROP, GSF_META_NAME_DICTIONARY, 0, 0, /* magic */}, { "CodePage", COMMON_PROP, GSF_META_NAME_CODEPAGE, 1, VT_I2 }, { "LOCALE_SYSTEM_DEFAULT",COMMON_PROP, GSF_META_NAME_LOCALE_SYSTEM_DEFAULT, 0x80000000, VT_UI4}, { "CASE_SENSITIVE", COMMON_PROP, GSF_META_NAME_CASE_SENSITIVE, 0x80000003, VT_UI4}, { "Category", DOC_PROP, GSF_META_NAME_CATEGORY, 2, VT_LPSTR }, { "PresentationFormat", DOC_PROP, GSF_META_NAME_PRESENTATION_FORMAT, 3, VT_LPSTR }, { "NumBytes", DOC_PROP, GSF_META_NAME_BYTE_COUNT, 4, VT_I4 }, { "NumLines", DOC_PROP, GSF_META_NAME_LINE_COUNT, 5, VT_I4 }, { "NumParagraphs", DOC_PROP, GSF_META_NAME_PARAGRAPH_COUNT, 6, VT_I4 }, { "NumSlides", DOC_PROP, GSF_META_NAME_SLIDE_COUNT, 7, VT_I4 }, { "NumNotes", DOC_PROP, GSF_META_NAME_NOTE_COUNT, 8, VT_I4 }, { "NumHiddenSlides", DOC_PROP, GSF_META_NAME_HIDDEN_SLIDE_COUNT, 9, VT_I4 }, { "NumMMClips", DOC_PROP, GSF_META_NAME_MM_CLIP_COUNT, 10, VT_I4 }, { "Scale", DOC_PROP, GSF_META_NAME_SCALE, 11, VT_BOOL }, { "HeadingPairs", DOC_PROP, GSF_META_NAME_HEADING_PAIRS, 12, VT_VECTOR | VT_VARIANT }, { "DocumentParts", DOC_PROP, GSF_META_NAME_DOCUMENT_PARTS, 13, VT_VECTOR | VT_LPSTR }, { "Manager", DOC_PROP, GSF_META_NAME_MANAGER, 14, VT_LPSTR }, { "Company", DOC_PROP, GSF_META_NAME_COMPANY, 15, VT_LPSTR }, { "LinksDirty", DOC_PROP, GSF_META_NAME_LINKS_DIRTY, 16, VT_BOOL }, { "DocSumInfo_17", DOC_PROP, GSF_META_NAME_MSOLE_UNKNOWN_17, 17, VT_UNKNOWN }, { "DocSumInfo_18", DOC_PROP, GSF_META_NAME_MSOLE_UNKNOWN_18, 18, VT_UNKNOWN }, { "DocSumInfo_19", DOC_PROP, GSF_META_NAME_MSOLE_UNKNOWN_19, 19, VT_BOOL }, { "DocSumInfo_20", DOC_PROP, GSF_META_NAME_MSOLE_UNKNOWN_20, 20, VT_UNKNOWN }, { "DocSumInfo_21", DOC_PROP, GSF_META_NAME_MSOLE_UNKNOWN_21, 21, VT_UNKNOWN }, { "DocSumInfo_22", DOC_PROP, GSF_META_NAME_MSOLE_UNKNOWN_22, 22, VT_BOOL }, { "DocSumInfo_23", DOC_PROP, GSF_META_NAME_MSOLE_UNKNOWN_23, 23, VT_I4 }, { "Title", COMPONENT_PROP, GSF_META_NAME_TITLE, 2, VT_LPSTR }, { "Subject", COMPONENT_PROP, GSF_META_NAME_SUBJECT, 3, VT_LPSTR }, { "Author", COMPONENT_PROP, GSF_META_NAME_CREATOR, 4, VT_LPSTR }, { "Keywords", COMPONENT_PROP, GSF_META_NAME_KEYWORDS, 5, VT_LPSTR }, { "Comments", COMPONENT_PROP, GSF_META_NAME_DESCRIPTION, 6, VT_LPSTR }, { "Template", COMPONENT_PROP, GSF_META_NAME_TEMPLATE, 7, VT_LPSTR }, { "LastSavedBy", COMPONENT_PROP, GSF_META_NAME_LAST_SAVED_BY, 8, VT_LPSTR }, { "RevisionNumber", COMPONENT_PROP, GSF_META_NAME_REVISION_COUNT, 9, VT_LPSTR }, { "TotalEditingTime", COMPONENT_PROP, GSF_META_NAME_EDITING_DURATION, 10, VT_FILETIME }, { "LastPrinted", COMPONENT_PROP, GSF_META_NAME_LAST_PRINTED, 11, VT_FILETIME }, { "CreateTime", COMPONENT_PROP, GSF_META_NAME_DATE_CREATED, 12, VT_FILETIME }, { "LastSavedTime", COMPONENT_PROP, GSF_META_NAME_DATE_MODIFIED, 13, VT_FILETIME }, { "NumPages", COMPONENT_PROP, GSF_META_NAME_PAGE_COUNT, 14, VT_I4 }, { "NumWords", COMPONENT_PROP, GSF_META_NAME_WORD_COUNT, 15, VT_I4 }, { "NumCharacters", COMPONENT_PROP, GSF_META_NAME_CHARACTER_COUNT, 16, VT_I4 }, { "Thumbnail", COMPONENT_PROP, GSF_META_NAME_THUMBNAIL, 17, VT_CF }, { "AppName", COMPONENT_PROP, GSF_META_NAME_GENERATOR, 18, VT_LPSTR }, { "Security", COMPONENT_PROP, GSF_META_NAME_SECURITY, 19, VT_I4 } }; static GHashTable *name_to_prop_hash = NULL; static char const * msole_vt_name (GsfMSOleVariantType type) { static char const *names[] = { "VT_EMPTY", "VT_NULL", "VT_I2", "VT_I4", "VT_R4", "VT_R8", "VT_CY", "VT_DATE", "VT_BSTR", "VT_DISPATCH", "VT_ERROR", "VT_BOOL", "VT_VARIANT", "VT_UNKNOWN", "VT_DECIMAL", NULL, "VT_I1", "VT_UI1", "VT_UI2", "VT_UI4", "VT_I8", "VT_UI8", "VT_INT", "VT_UINT", "VT_VOID", "VT_HRESULT", "VT_PTR", "VT_SAFEARRAY", "VT_CARRAY", "VT_USERDEFINED", "VT_LPSTR", "VT_LPWSTR", }; static char const *names2[] = { "VT_FILETIME", "VT_BLOB", "VT_STREAM", "VT_STORAGE", "VT_STREAMED_OBJECT", "VT_STORED_OBJECT", "VT_BLOB_OBJECT", "VT_CF", "VT_CLSID" }; type &= ~VT_VECTOR; if (type <= VT_LPWSTR) return names[type]; g_return_val_if_fail (type >= VT_FILETIME, "_UNKNOWN_"); g_return_val_if_fail (type <= VT_CLSID, "_UNKNOWN_"); return names2[type-VT_FILETIME]; } static char const * msole_prop_id_to_gsf (GsfMSOleMetaDataSection *section, guint32 id, gboolean *linked) { char const *res = NULL; GsfMSOleMetaDataPropMap const *map = NULL; unsigned i = 0; *linked = FALSE; if (section->dict != NULL) { if (id & 0x1000000) { *linked = TRUE; id &= ~0x1000000; d (g_print ("LINKED ");); } res = g_hash_table_lookup (section->dict, GINT_TO_POINTER (id)); if (res != NULL) { d (g_print (res);); return res; } } map = builtin_props ; i = G_N_ELEMENTS (builtin_props); while (i-- > 0) if (map[i].id == id && (map[i].section == COMMON_PROP || map[i].section == section->type)) { d (g_print (map[i].gsf_name);); return map[i].gsf_name; } d (g_print ("_UNKNOWN_(0x%x %d)", id, id);); return NULL; } static GsfMSOleMetaDataPropMap const * msole_gsf_name_to_prop (char const *name) { if (NULL == name_to_prop_hash) { int i; name_to_prop_hash = g_hash_table_new (g_str_hash, g_str_equal); for (i = G_N_ELEMENTS (builtin_props); i-- > 0; ) g_hash_table_replace (name_to_prop_hash, (gpointer) builtin_props[i].gsf_name, (gpointer) (builtin_props+i)); } return g_hash_table_lookup (name_to_prop_hash, (gpointer)name); } static void set_error_missing_data (GError **error, const char *property_name, gsize size_needed, gsize size_gotten) { g_set_error (error, GSF_ERROR, GSF_ERROR_INVALID_DATA, _("Missing data when reading the %s property; got %" G_GSIZE_FORMAT "bytes, " "but %" G_GSIZE_FORMAT " bytes at least are needed."), property_name, size_needed, size_gotten); } /* Can return errors from gsf_blob_new() and GSF_ERROR_INVALID_DATA */ static gboolean parse_vt_cf (GValue *res, guint8 const **data, guint8 const *data_end, GError **error) { /* clipboard size uint32 sizeof (clipboard format tag) + sizeof (clipboard data) * clipboard format tag int32 see below * clipboard data byte[] see below * * Clipboard format tag: * -1 - Windows clipboard format * -2 - Macintosh clipboard format * -3 - GUID that contains a format identifier (FMTID) * >0 - custom clipboard format name plus data (see msdn site below) * 0 - No data * * References: * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/stg/stg/propvariant.asp * http://jakarta.apache.org/poi/hpsf/thumbnails.html * http://linux.com.hk/docs/poi/org/apache/poi/hpsf/Thumbnail.html * http://sparks.discreet.com/knowledgebase/public/solutions/ExtractThumbnailImg.htm */ guint32 clip_size, clip_data_size; gint32 clip_format; GsfBlob *blob; GsfClipData *clip_data; /* Clipboard size field */ if (data_end < *data + 4) { set_error_missing_data (error, "VT_CF", 4, data_end - *data); return FALSE; } clip_size = GSF_LE_GET_GUINT32 (*data); if (clip_size < 4) { /* must emcompass int32 format plus data size */ g_set_error (error, GSF_ERROR, GSF_ERROR_INVALID_DATA, _("Corrupt data in the VT_CF property; clipboard data length must be at least 4 bytes, " "but the data says it only has %" G_GSIZE_FORMAT " bytes available."), (gsize) clip_size); return FALSE; } *data += 4; /* Check clipboard format plus data size */ if (data_end < *data + clip_size) { set_error_missing_data (error, "VT_CF", clip_size, data_end - *data); return FALSE; } clip_format = GSF_LE_GET_GINT32 (*data); *data += 4; switch (clip_format) { case GSF_CLIP_FORMAT_WINDOWS_CLIPBOARD: case GSF_CLIP_FORMAT_MACINTOSH_CLIPBOARD: case GSF_CLIP_FORMAT_GUID: case GSF_CLIP_FORMAT_NO_DATA: /* everything is ok */ break; default: if (clip_format > 0) clip_format = GSF_CLIP_FORMAT_CLIPBOARD_FORMAT_NAME; else clip_format = GSF_CLIP_FORMAT_UNKNOWN; break; } clip_data_size = clip_size - 4; blob = gsf_blob_new (clip_data_size, *data, error); *data += clip_data_size; if (!blob) return FALSE; clip_data = gsf_clip_data_new (clip_format, blob); g_object_unref (blob); g_value_init (res, GSF_TYPE_CLIP_DATA); g_value_set_object (res, clip_data); g_object_unref (clip_data); return TRUE; } static GValue * msole_prop_parse (GsfMSOleMetaDataSection *section, guint32 type, guint8 const **data, guint8 const *data_end) { GValue *res; char *str; guint32 len; gboolean const is_vector = type & VT_VECTOR; GError *error; g_return_val_if_fail (!(type & (unsigned)(~0x1fff)), NULL); /* not valid in a prop set */ type &= 0xfff; if (is_vector) { /* * A vector is basically an array. If the type associated with * it is a variant, then each element can have a different * variant type. Otherwise, each element has the same variant * type associated with the vector. */ unsigned i, n; GsfDocPropVector *vector; g_return_val_if_fail (*data + 4 <= data_end, NULL); n = GSF_LE_GET_GUINT32 (*data); *data += 4; d (g_print (" array with %d elem\n", n); gsf_mem_dump (*data, (unsigned)(data_end - *data));); vector = gsf_docprop_vector_new (); for (i = 0 ; i < n ; i++) { GValue *v; d (g_print ("\t[%d] ", i);); v = msole_prop_parse (section, type, data, data_end); if (v) { if (G_IS_VALUE (v)) { gsf_docprop_vector_append (vector, v); g_value_unset (v); } g_free (v); } } res = g_new0 (GValue, 1); g_value_init (res, GSF_DOCPROP_VECTOR_TYPE); g_value_set_object (res, vector); g_object_unref (vector); return res; } res = g_new0 (GValue, 1); d (g_print ("%s\n", msole_vt_name (type));); switch (type) { case VT_EMPTY : /* * A property with a type indicator of VT_EMPTY has no data * associated with it; that is, the size of the value is zero. */ /* value::unset == empty */ break; case VT_NULL : /* This is like a pointer to NULL */ /* value::unset == null too :-) do we need to distinguish ? */ break; case VT_I2 : /* 2-byte signed integer */ g_return_val_if_fail (*data + 2 <= data_end, NULL); g_value_init (res, G_TYPE_INT); g_value_set_int (res, GSF_LE_GET_GINT16 (*data)); *data += 2; break; case VT_I4 : /* 4-byte signed integer */ g_return_val_if_fail (*data + 4 <= data_end, NULL); g_value_init (res, G_TYPE_INT); g_value_set_int (res, GSF_LE_GET_GINT32 (*data)); *data += 4; break; case VT_R4 : /* 32-bit IEEE floating-point value */ g_return_val_if_fail (*data + 4 <= data_end, NULL); g_value_init (res, G_TYPE_FLOAT); g_value_set_float (res, GSF_LE_GET_FLOAT (*data)); *data += 4; break; case VT_R8 : /* 64-bit IEEE floating-point value */ g_return_val_if_fail (*data + 8 <= data_end, NULL); g_value_init (res, G_TYPE_DOUBLE); g_value_set_double (res, GSF_LE_GET_DOUBLE (*data)); *data += 8; break; case VT_CY : /* 8-byte two's complement integer (scaled by 10,000) */ /* CHEAT : just store as an int64 for now */ g_return_val_if_fail (*data + 8 <= data_end, NULL); g_value_init (res, G_TYPE_INT64); g_value_set_int64 (res, GSF_LE_GET_GINT64 (*data)); break; case VT_DATE : /* * 64-bit floating-point number representing the number of days * (not seconds) since December 31, 1899. */ /* FIXME FIXME FIXME TODO */ break; case VT_BSTR : /* * Pointer to null-terminated Unicode string; the string is pre- * ceeded by a DWORD representing the byte count of the number * of bytes in the string (including the terminating null). */ /* FIXME FIXME FIXME TODO */ break; case VT_DISPATCH : /* FIXME FIXME FIXME TODO */ break; case VT_BOOL : /* A boolean (WORD) value containg 0 (false) or -1 (true). */ g_return_val_if_fail (*data + 1 <= data_end, NULL); g_value_init (res, G_TYPE_BOOLEAN); g_value_set_boolean (res, **data ? TRUE : FALSE); *data += 1; break; case VT_VARIANT : d (g_print ("\tcontaining a ");); /* * A type indicator (a DWORD) followed by the corresponding * value. VT_VARIANT is only used in conjunction with * VT_VECTOR. */ g_free (res); type = GSF_LE_GET_GUINT32 (*data); *data += 4; return msole_prop_parse (section, type, data, data_end); case VT_UI1 : /* 1-byte unsigned integer */ g_return_val_if_fail (*data + 1 <= data_end, NULL); g_value_init (res, G_TYPE_UCHAR); g_value_set_uchar (res, (guchar)(**data)); *data += 1; break; case VT_UI2 : /* 2-byte unsigned integer */ g_return_val_if_fail (*data + 2 <= data_end, NULL); g_value_init (res, G_TYPE_UINT); g_value_set_uint (res, GSF_LE_GET_GUINT16 (*data)); *data += 2; break; case VT_UI4 : /* 4-type unsigned integer */ g_return_val_if_fail (*data + 4 <= data_end, NULL); g_value_init (res, G_TYPE_UINT); g_value_set_uint (res, GSF_LE_GET_GUINT32 (*data)); *data += 4; break; case VT_I8 : d (g_print ("VT_I8\n");); /* 8-byte signed integer */ g_return_val_if_fail (*data + 8 <= data_end, NULL); g_value_init (res, G_TYPE_INT64); g_value_set_int64 (res, GSF_LE_GET_GINT64 (*data)); *data += 8; break; case VT_UI8 : /* 8-byte unsigned integer */ g_return_val_if_fail (*data + 8 <= data_end, NULL); g_value_init (res, G_TYPE_UINT64); g_value_set_uint64 (res, GSF_LE_GET_GUINT64 (*data)); *data += 8; break; case VT_LPSTR : /* * This is the representation of many strings. It is stored in * the same representation as VT_BSTR. Note that the serialized * representation of VP_LPSTR has a preceding byte count, whereas * the in-memory representation does not. */ /* be anal and safe */ g_return_val_if_fail (*data + 4 <= data_end, NULL); len = GSF_LE_GET_GUINT32 (*data); g_return_val_if_fail (len < 0x10000, NULL); g_return_val_if_fail (*data + 4 + len*section->char_size <= data_end, NULL); error = NULL; d (gsf_mem_dump (*data + 4, len * section->char_size);); str = g_convert_with_iconv (*data + 4, len * section->char_size, section->iconv_handle, NULL, NULL, &error); g_value_init (res, G_TYPE_STRING); if (NULL != str) { g_value_set_string (res, str); g_free (str); } else if (NULL != error) { g_warning ("error: %s", error->message); g_error_free (error); } else { g_warning ("unknown error converting string property, using blank"); } *data += 4 + len * section->char_size; break; case VT_LPWSTR : /* * A counted and null-terminated Unicode string; a DWORD character * count (where the count includes the terminating null) followed * by that many Unicode (16-bit) characters. Note that the count * is character count, not byte count. */ /* be anal and safe */ g_return_val_if_fail (*data + 4 <= data_end, NULL); len = GSF_LE_GET_GUINT32 (*data); g_return_val_if_fail (len < 0x10000, NULL); g_return_val_if_fail (*data + 4 + len * 2 <= data_end, NULL); error = NULL; d (gsf_mem_dump (*data + 4, len*2);); str = g_convert (*data + 4, len*2, "UTF-8", "UTF-16LE", NULL, NULL, &error); g_value_init (res, G_TYPE_STRING); if (NULL != str) { g_value_set_string (res, str); g_free (str); } else if (NULL != error) { g_warning ("error: %s", error->message); g_error_free (error); } else { g_warning ("unknown error converting string property, using blank"); } *data += 4 + len*2; break; case VT_FILETIME : /* 64-bit FILETIME structure, as defined by Win32. */ g_return_val_if_fail (*data + 8 <= data_end, NULL); { /* ft * 100ns since Jan 1 1601 */ guint64 ft = GSF_LE_GET_GUINT64 (*data); GsfTimestamp ts; ft /= 10000000; /* convert to seconds */ ft -= G_GINT64_CONSTANT (11644473600); /* move to Jan 1 1970 */ ts.timet = (time_t)ft; g_value_init (res, GSF_TIMESTAMP_TYPE); gsf_value_set_timestamp (res, &ts); *data += 8; break; } case VT_BLOB : /* * A DWORD count of bytes, followed by that many bytes of data. * The byte count does not include the four bytes for the length * of the count itself: An empty blob would have a count of * zero, followed by zero bytes. Thus the serialized represen- * tation of a VT_BLOB is similar to that of a VT_BSTR but does * not guarantee a null byte at the end of the data. */ /* FIXME FIXME FIXME TODO */ g_free (res); res = NULL; break; case VT_STREAM : /* * Indicates the value is stored in a stream that is sibling to * the CONTENTS stream. Following this type indicator is data * in the format of a serialized VT_LPSTR, which names the stream * containing the data. */ /* FIXME FIXME FIXME TODO */ g_free (res); res = NULL; break; case VT_STORAGE : /* * Indicates the value is stored in an IStorage that is sibling * to the CONTENTS stream. Following this type indicator is data * in the format of a serialized VT_LPSTR, which names the * IStorage containing the data. */ /* FIXME FIXME FIXME TODO */ g_free (res); res = NULL; break; case VT_STREAMED_OBJECT: /* * Same as VT_STREAM, but indicates that the stream contains a * serialized object, which is a class ID followed by initiali- * zation data for the class. */ /* FIXME FIXME FIXME TODO */ g_free (res); res = NULL; break; case VT_STORED_OBJECT : /* * Same as VT_STORAGE, but indicates that the designated IStorage * contains a loadable object. */ /* FIXME FIXME FIXME TODO */ g_free (res); res = NULL; break; case VT_BLOB_OBJECT : /* * Contains a serialized object in the same representation as * would appear in a VT_STREAMED_OBJECT. That is, following the * VT_BLOB_OBJECT tag is a DWORD byte count of the remaining data * (where the byte count does not include the size of itself) * which is in the format of a class ID followed by initialization * data for that class */ /* FIXME FIXME FIXME TODO */ g_free (res); res = NULL; break; case VT_CF : error = NULL; if (!parse_vt_cf (res, data, data_end, &error)) { /* suck, we can't propagate the error upwards */ g_warning ("error: %s", error->message); g_error_free (error); g_free (res); res = NULL; } break; case VT_CLSID : /* A class ID (or other GUID) */ *data += 16; g_free (res); res = NULL; break; case VT_ERROR : /* A DWORD containing a status code. */ case VT_UNKNOWN : case VT_DECIMAL : case VT_I1 : /* 1-byte signed integer */ case VT_INT : case VT_UINT : case VT_VOID : case VT_HRESULT : case VT_PTR : case VT_SAFEARRAY : case VT_CARRAY : case VT_USERDEFINED : g_warning ("type %s (0x%x) is not permitted in property sets", msole_vt_name (type), type); g_free (res); res = NULL; break; default : g_warning ("Unknown property type %d (0x%x)", type, type); g_free (res); res = NULL; } if (res != NULL && G_IS_VALUE (res)) { d ( { char *val = g_strdup_value_contents (res); g_print ("%s\n", val); g_free (val); }); } else { d ({ char const *type_name = msole_vt_name (type); if (type_name) { g_print ("A '%s' property could not be parsed\n", type_name); } else { g_print ("A %d property could not be parsed\n", type); } }); } return res; } static gboolean msole_prop_read (GsfInput *in, GsfMSOleMetaDataSection *section, GsfMSOleMetaDataProp *props, unsigned i, GsfDocMetaData *accum) { guint32 type; guint8 const *data; /* FIXME : why size-4 ? I must be missing something */ gsf_off_t size = ((i+1) >= section->num_props) ? section->size-4 : props[i+1].offset; char *name; GValue *val; g_return_val_if_fail (i < section->num_props, FALSE); g_return_val_if_fail (size >= props[i].offset + 4, FALSE); size -= props[i].offset; /* includes the type id */ if (gsf_input_seek (in, section->offset+props[i].offset, G_SEEK_SET) || NULL == (data = gsf_input_read (in, size, NULL))) { g_warning ("failed to read prop #%d", i); return FALSE; } type = GSF_LE_GET_GUINT32 (data); data += 4; /* dictionary is magic */ if (props[i].id == 0) { guint32 len, id, i, n; gsize gslen; char *name; guint8 const *start = data; g_return_val_if_fail (section->dict == NULL, FALSE); section->dict = g_hash_table_new_full ( g_direct_hash, g_direct_equal, NULL, g_free); d ({ g_print ("Dictionary = \n"); gsf_mem_dump (data-4, size); }); n = type; for (i = 0 ; i < n ; i++) { id = GSF_LE_GET_GUINT32 (data); len = GSF_LE_GET_GUINT32 (data + 4); g_return_val_if_fail (len < 0x10000, FALSE); gslen = 0; name = g_convert_with_iconv (data + 8, len * section->char_size, section->iconv_handle, &gslen, NULL, NULL); len = (guint32)gslen; data += 8 + len; d (g_print ("\t%u == %s\n", id, name);); g_hash_table_replace (section->dict, GINT_TO_POINTER (id), name); /* MS documentation blows goats ! * The docs claim there are padding bytes in the dictionary. * Their examples show padding bytes. * In reality non-unicode strings do not see to have padding. */ if (section->char_size != 1 && (data - start) % 4) data += 4 - ((data - start) % 4); } } else { gboolean linked; d (g_print ("===> %u) ", i); gsf_mem_dump (data-4, size);); name = g_strdup (msole_prop_id_to_gsf (section, props[i].id, &linked)); d (g_print (" @ %x %x = ", (unsigned)props[i].offset, (unsigned)size);); val = msole_prop_parse (section, type, &data, data + size); if (NULL != name && NULL != val) { if (linked) { GsfDocProp *prop = gsf_doc_meta_data_lookup (accum, name); if (NULL == prop) { g_warning ("linking property '%s' before it\'s value is specified", (name ? name : "")); } else if (!G_VALUE_HOLDS_STRING (val)) { g_warning ("linking property '%s' before it\'s value is specified", (name ? name : "")); } else gsf_doc_prop_set_link (prop, g_value_dup_string (val)); } else { gsf_doc_meta_data_insert (accum, name, val); val = NULL; name = NULL; } } if (NULL != val) { if (G_IS_VALUE (val)) g_value_unset (val); g_free (val); } g_free (name); } return TRUE; } static int msole_prop_cmp (gconstpointer a, gconstpointer b) { GsfMSOleMetaDataProp const *prop_a = a; GsfMSOleMetaDataProp const *prop_b = b; if (prop_a->offset < prop_b->offset) return -1; else if (prop_a->offset > prop_b->offset) return +1; else return 0; } /** * gsf_msole_metadata_read : * @in : #GsfInput * @accum : #GsfDocMetaData * * Read a stream formated as a set of MS OLE properties from @in and store the * results in @accum. * * Returns: GError which the caller must free on error. **/ GError * gsf_msole_metadata_read (GsfInput *in, GsfDocMetaData *accum) { guint8 const *data; guint16 version; guint32 os, num_sections; unsigned i, j; GsfMSOleMetaDataSection *sections; GsfMSOleMetaDataProp *props; GsfDocProp *prop; /* http://bugzilla.gnome.org/show_bug.cgi?id=352055 * psiwin generates files with empty property sections */ if (gsf_input_size (in) <= 0) return NULL; data = gsf_input_read (in, 28, NULL); if (NULL == data) return g_error_new (gsf_input_error_id (), 0, "Unable to read MS property stream header"); d ({g_print ("===================================\n" "header class id ==\n"); gsf_mem_dump (data, 28);}); /* * Validate the Property Set Header. * Format (bytes) : * 00 - 01 Byte order 0xfffe * 02 - 03 Format 0 * 04 - 05 OS Version high word is the OS * 06 - 07 low word is the OS version * 0 = win16 * 1 = mac * 2 = win32 * 08 - 23 Class Identifier Usually Format ID * 24 - 27 Section count Should be at least 1 */ os = GSF_LE_GET_GUINT16 (data + 6); version = GSF_LE_GET_GUINT16 (data + 2); num_sections = GSF_LE_GET_GUINT32 (data + 24); if (GSF_LE_GET_GUINT16 (data + 0) != 0xfffe || (version != 0 && version != 1) || os > 2 || num_sections > 100) /* arbitrary sanity check */ return g_error_new (gsf_input_error_id (), 0, "Invalid MS property stream header"); /* extract the section info */ /* * The Format ID/Offset list follows. * Format: * 00 - 16 Section Name Format ID * 16 - 19 Section Offset The offset is the number of * bytes from the start of the * whole stream to where the * section begins. */ sections = (GsfMSOleMetaDataSection *)g_alloca (sizeof (GsfMSOleMetaDataSection)* num_sections); for (i = 0 ; i < num_sections ; i++) { data = gsf_input_read (in, 20, NULL); if (NULL == data) return g_error_new (gsf_input_error_id (), 0, "Unable to read MS property stream header"); if (!memcmp (data, component_guid, sizeof (component_guid))) sections [i].type = COMPONENT_PROP; else if (!memcmp (data, document_guid, sizeof (document_guid))) sections [i].type = DOC_PROP; else if (!memcmp (data, user_guid, sizeof (user_guid))) sections [i].type = USER_PROP; else { sections [i].type = USER_PROP; g_warning ("Unknown property section type, treating it as USER"); gsf_mem_dump (data, 16); } sections [i].offset = GSF_LE_GET_GUINT32 (data + 16); } /* * A section is the third part of the property set stream. * Format (bytes) : * 00 - 03 Section size A byte count for the section (which is inclusive * of the byte count itself and should always be a * multiple of 4); * 04 - 07 Property count A count of the number of properties * 08 - xx An array of 32-bit Property ID/Offset pairs * yy - zz An array of Property Type indicators/Value pairs */ for (i = 0 ; i < num_sections ; i++) { if (gsf_input_seek (in, sections[i].offset, G_SEEK_SET) || NULL == (data = gsf_input_read (in, 8, NULL))) return g_error_new (gsf_input_error_id (), 0, "Invalid MS property section"); sections[i].iconv_handle = (GIConv)-1; sections[i].char_size = 1; sections[i].dict = NULL; sections[i].size = GSF_LE_GET_GUINT32 (data); /* includes header */ sections[i].num_props = GSF_LE_GET_GUINT32 (data + 4); d (g_print ("=============================================\n" "===> section #%d : type %d at offset 0x%x, size 0x%x, numprops = %u\n", i, (int)sections [i].type, (guint32)sections [i].offset, sections[i].size, sections[i].num_props);); if (sections[i].num_props <= 0) continue; if (sections[i].num_props > gsf_input_remaining(in) / 8) return g_error_new (gsf_input_error_id (), 0, "Invalid MS property stream header or file truncated"); /* * Get and save all the Property ID/Offset pairs. * Format (bytes) : * 00 - 03 id Property ID * 04 - 07 offset The distance from the start of the section to the * start of the Property Type/Value pair. */ d (g_print ("Offsets\n");); props = g_new (GsfMSOleMetaDataProp, sections[i].num_props); for (j = 0; j < sections[i].num_props; j++) { if (NULL == (data = gsf_input_read (in, 8, NULL))) { g_free (props); return g_error_new (gsf_input_error_id (), 0, "Invalid MS property section"); } props [j].id = GSF_LE_GET_GUINT32 (data); props [j].offset = GSF_LE_GET_GUINT32 (data + 4); d (g_print ("%d) ID=%d, offset=0x%x\n", j, props [j].id, (unsigned)props [j].offset);); } /* order prop info by offset to facilitate bounds checking */ qsort (props, sections[i].num_props, sizeof (GsfMSOleMetaDataProp), msole_prop_cmp); /* * Find and process the code page. * Property ID 1 is reserved as an indicator of the code page. */ sections[i].iconv_handle = (GIConv)-1; sections[i].char_size = 1; for (j = 0; j < sections[i].num_props; j++) /* first codepage */ if (props[j].id == 1) { msole_prop_read (in, sections+i, props, j, accum); if (NULL != (prop = gsf_doc_meta_data_lookup (accum, GSF_META_NAME_CODEPAGE))) { GValue const *val = gsf_doc_prop_get_val (prop); if (NULL != val && G_VALUE_HOLDS_INT (val)) { int codepage = g_value_get_int (val); sections[i].iconv_handle = gsf_msole_iconv_open_for_import (codepage); if (codepage == 1200 || codepage == 1201) sections[i].char_size = 2; } } } if (sections[i].iconv_handle == (GIConv)-1) sections[i].iconv_handle = gsf_msole_iconv_open_for_import (1252); /* * Find and process the Property Set Dictionary * Property ID 0 is reserved as an indicator of the dictionary. * For User Defined Sections, Property ID 0 is NOT a dictionary. */ for (j = 0; j < sections[i].num_props; j++) /* then dictionary */ if (props[j].id == 0) msole_prop_read (in, sections+i, props, j, accum); /* Process all the properties */ for (j = 0; j < sections[i].num_props; j++) /* the rest */ if (props[j].id > 1) msole_prop_read (in, sections+i, props, j, accum); gsf_iconv_close (sections[i].iconv_handle); g_free (props); if (sections[i].dict != NULL) g_hash_table_destroy (sections[i].dict); } return NULL; } /****************************************************************************/ typedef struct { GsfOutput *out; gboolean doc_not_component; GHashTable *dict; struct { unsigned count; /* includes 2nd prop for links */ GSList *props; } builtin, user; unsigned codepage; } WritePropState; static GsfMSOleVariantType gvalue_to_msole_vt (GValue const *value, GsfMSOleMetaDataPropMap const *map) { g_return_val_if_fail (value != NULL, VT_EMPTY); switch (G_TYPE_FUNDAMENTAL (G_VALUE_TYPE (value))) { case G_TYPE_BOOLEAN: return VT_BOOL; case G_TYPE_UCHAR: return VT_UI1; case G_TYPE_FLOAT: return VT_R4; case G_TYPE_DOUBLE: return VT_R8; case G_TYPE_STRING: return VT_LPSTR; case G_TYPE_INT: return (NULL != map && map->prefered_type == VT_I2) ? VT_I2 : VT_I4; case G_TYPE_UINT: return (NULL != map && map->prefered_type == VT_UI2) ? VT_UI2 : VT_UI4; case G_TYPE_BOXED: if (VAL_IS_GSF_TIMESTAMP (value)) return VT_FILETIME; return VT_UNKNOWN; case G_TYPE_OBJECT: if (VAL_IS_GSF_DOCPROP_VECTOR (value)) { GValueArray *vector = gsf_value_get_docprop_varray (value); unsigned i, n; GsfMSOleVariantType type, tmp; if (vector == NULL) return VT_UNKNOWN; if (map != NULL) { type = map->prefered_type & (~VT_VECTOR); if (type == VT_VARIANT) return VT_VECTOR | VT_VARIANT; } else type = VT_UNKNOWN; n = vector->n_values; for (i = 0; i < n; i++) { tmp = gvalue_to_msole_vt ( g_value_array_get_nth (vector, i), NULL); if (type == VT_UNKNOWN) type = tmp; else if (type != tmp) return VT_VECTOR | VT_VARIANT; } return VT_VECTOR | type; } break; } return VT_UNKNOWN; } /* Returns: TRUE on success */ static gboolean msole_metadata_write_prop (WritePropState *state, char const *name, GValue const *value, gboolean suppress_type) { static guint8 const zero[1] = { '\0' }; GsfMSOleMetaDataPropMap const *map = (name != NULL) ? msole_gsf_name_to_prop (name) : NULL; GsfMSOleVariantType type; guint8 buf[8]; g_return_val_if_fail (value != NULL, FALSE); type = gvalue_to_msole_vt (value, map); if (!suppress_type) { GSF_LE_SET_GUINT32 (buf, type); gsf_output_write (state->out, 4, buf); } if (NULL != map && map->prefered_type != type) { d(g_print ("Exporting property '%s' with type 0x%x rather than the usual 0x%x\n", map->gsf_name, type, map->prefered_type);); } if (type & VT_VECTOR) { GValueArray *vector = gsf_value_get_docprop_varray (value); unsigned i, n = vector->n_values; gboolean res; GSF_LE_SET_GINT32 (buf, n); res = gsf_output_write (state->out, 4, buf); for (i = 0; i < n; i++) res &= msole_metadata_write_prop (state, NULL, g_value_array_get_nth (vector, i), type != (VT_VECTOR | VT_VARIANT)); return res; } switch (type) { case VT_BOOL: if (g_value_get_boolean (value)) GSF_LE_SET_GINT32 (buf, 0xffffffff); else GSF_LE_SET_GINT32 (buf, 0); return gsf_output_write (state->out, 4, buf); case VT_UI1: GSF_LE_SET_GUINT32 (buf, g_value_get_uchar (value)); return gsf_output_write (state->out, 4, buf); case VT_I2: GSF_LE_SET_GINT16 (buf, g_value_get_int (value)); GSF_LE_SET_GUINT16 (buf+2, 0); return gsf_output_write (state->out, 4, buf); case VT_I4: GSF_LE_SET_GINT32 (buf, g_value_get_int (value)); return gsf_output_write (state->out, 4, buf); case VT_UI2: case VT_UI4: GSF_LE_SET_GUINT32 (buf, g_value_get_uint (value)); return gsf_output_write (state->out, 4, buf); case VT_R4: GSF_LE_SET_FLOAT (buf, g_value_get_float (value)); return gsf_output_write (state->out, 4, buf); case VT_R8: GSF_LE_SET_DOUBLE (buf, g_value_get_double (value)); return gsf_output_write (state->out, 8, buf); case VT_LPSTR : { /* FIXME FIXME FIXME TODO : use iconv from codepage */ char const *txt = g_value_get_string (value); unsigned len = (NULL != txt) ? strlen (txt) : 0; GSF_LE_SET_GUINT32 (buf, len+1); return gsf_output_write (state->out, 4, buf) && gsf_output_write (state->out, len, txt) && gsf_output_write (state->out, 1, zero); } case VT_FILETIME : { GsfTimestamp const *ts = g_value_get_boxed (value); gint32 timet_signed = (gint32) ts->timet; guint64 ft; ft = timet_signed + G_GINT64_CONSTANT (11644473600); ft *= 10000000; GSF_LE_SET_GUINT64 (buf, ft); return gsf_output_write (state->out, 8, buf); } default: break; } g_warning ("Ignoring property '%s', how do we export a property of type '%s'", name ? name : "", g_type_name (G_TYPE_FUNDAMENTAL (G_VALUE_TYPE (value)))); return FALSE; } static void cb_write_dict (char const *name, gpointer id, WritePropState *state) { static guint8 const zero[1] = { '\0' }; guint8 buf [8]; unsigned len = strlen (name) + 1; GSF_LE_SET_GUINT32 (buf, GPOINTER_TO_UINT (id)); GSF_LE_SET_GUINT32 (buf+4, len+1); gsf_output_write (state->out, 8, buf); gsf_output_write (state->out, len, name); gsf_output_write (state->out, 1, zero); } static gboolean msole_metadata_write_section (WritePropState *state, gboolean user) { char const *name; guint8 buf [8]; GSList *ptr = user ? state->user.props : state->builtin.props; unsigned count = user ? state->user.count : state->builtin.count; gsf_off_t len, base = gsf_output_tell (state->out); GsfMSOleMetaDataProp *offsets; GsfMSOleMetaDataPropMap const *map; GsfDocProp const *prop; gpointer tmp; unsigned i; GValue scratch; if (user && state->dict == NULL) return TRUE; /* Skip past the size and id/offset pairs */ if (!gsf_output_seek (state->out, 4 /* length */ + 4 /* count */ + 8 * count /* id/offset pairs */, G_SEEK_END)) return FALSE; memset (&scratch, 0, sizeof (GValue)); g_value_init (&scratch, G_TYPE_STRING); offsets = g_alloca (sizeof (GsfMSOleMetaDataProp) * count); /* 0) codepage */ offsets[0].id = 1; offsets[0].offset = gsf_output_tell (state->out); GSF_LE_SET_GUINT32 (buf, VT_I2); GSF_LE_SET_GUINT32 (buf+4, state->codepage); gsf_output_write (state->out, 8, buf); /* 1) dictionary */ if (user) { offsets[1].id = 0; offsets[1].offset = gsf_output_tell (state->out); GSF_LE_SET_GUINT32 (buf, g_hash_table_size (state->dict)); gsf_output_write (state->out, 4, buf); g_hash_table_foreach (state->dict, (GHFunc) cb_write_dict, state); i = 2; } else i = 1; /* 2) props */ for (; ptr != NULL && i < count ; ptr = ptr->next, i++) { prop = ptr->data; name = gsf_doc_prop_get_name (prop); if (user) { tmp = g_hash_table_lookup (state->dict, name); offsets[i].id = GPOINTER_TO_INT (tmp); if (offsets[i].id < 2) { g_warning ("Invalid ID (%d) for custom name '%s'", offsets[i].id, name); continue; } } else { map = msole_gsf_name_to_prop (name); if (map == NULL) { g_warning ("Missing map for builting property '%s'", name); continue; } offsets[i].id = map->id; } offsets[i].offset = gsf_output_tell (state->out); msole_metadata_write_prop (state, name, gsf_doc_prop_get_val (prop), FALSE); if (gsf_doc_prop_get_link (prop)) { i++; offsets[i].id = offsets[i-1].id | 0x1000000; offsets[i].offset = gsf_output_tell (state->out); g_value_set_static_string (&scratch, gsf_doc_prop_get_link (prop)); msole_metadata_write_prop (state, NULL, &scratch, FALSE); } } len = gsf_output_tell (state->out) - base; gsf_output_seek (state->out, base, G_SEEK_SET); GSF_LE_SET_GUINT32 (buf, len); GSF_LE_SET_GUINT32 (buf+4, count); gsf_output_write (state->out, 8, buf); for (i = 0 ; i < count ; i++) { GSF_LE_SET_GUINT32 (buf, offsets[i].id); GSF_LE_SET_GUINT32 (buf+4, offsets[i].offset - base); gsf_output_write (state->out, 8, buf); } return gsf_output_seek (state->out, 0, G_SEEK_END); } static void cb_count_props (char const *name, GsfDocProp *prop, WritePropState *state) { GsfMSOleMetaDataPropMap const *map = msole_gsf_name_to_prop (name); /* allocate predefined ids or add it to the dictionary */ if (map != NULL) { if (map->id == 0) return; /* dictionary is handled elsewhere */ if (map->section == (state->doc_not_component ? COMPONENT_PROP : DOC_PROP)) return; if (map->id == 1) { /*codepage */ GValue const *val = gsf_doc_prop_get_val (prop); if (NULL != val && G_VALUE_HOLDS_INT (val)) state->codepage = g_value_get_int (val); return; } d (g_print ("%d) Adding builtin %s'\n", state->builtin.count, map->gsf_name);); state->builtin.count += gsf_doc_prop_get_link (prop) ? 2 : 1; state->builtin.props = g_slist_prepend (state->builtin.props, prop); } else if (state->doc_not_component) { /* keep user props in the document */ d (g_print("user defined named '%s' assigned id = %d\n", name, state->user.count);); if (NULL == state->dict) state->dict = g_hash_table_new (g_str_hash, g_str_equal); g_hash_table_insert (state->dict, (gpointer) name, GINT_TO_POINTER (state->user.count)); state->user.count += gsf_doc_prop_get_link (prop) ? 2 : 1; state->user.props = g_slist_prepend (state->user.props, prop); } } /** * gsf_msole_metadata_write : * @out : #GsfOutput * @meta_data : #GsfDocMetaData * @doc_not_component : a kludge to differentiate DocumentSummary from Summary * * Returns: %TRUE on success; **/ gboolean gsf_msole_metadata_write (GsfOutput *out, GsfDocMetaData const *meta_data, gboolean doc_not_component) { static guint8 const header[] = { 0xfe, 0xff, /* byte order */ 0, 0, /* Format */ 0x04, 0x0a, /* OS : XP == 0xA04 */ 0x02, 0x00, /* win32 == 2 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, /* clasid = 0 */ }; gboolean success = FALSE; guint8 buf [4]; WritePropState state; state.codepage = 1252; state.out = out; state.dict = NULL; state.builtin.count = 1; /* codepage */ state.user.count = 2; /* codepage and dictionary */ state.builtin.props = state.user.props = NULL; state.doc_not_component = doc_not_component; d (g_print ("================================\nFinding props\n");); gsf_doc_meta_data_foreach (meta_data, (GHFunc) cb_count_props, &state); d (g_print ("Done\n" "================================\n");); /* Write stream header */ GSF_LE_SET_GUINT32 (buf, (state.dict != NULL) ? 2 : 1); if (!gsf_output_write (out, sizeof (header), header) || !gsf_output_write (out, 4, buf)) goto err; /* Write section header(s) */ GSF_LE_SET_GUINT32 (buf, (state.dict != NULL) ? 0x44 : 0x30); if (!gsf_output_write (out, 16, doc_not_component ? document_guid : component_guid) || !gsf_output_write (out, 4, buf)) goto err; if (state.dict != NULL) { GSF_LE_SET_GUINT32 (buf, 0); if (!gsf_output_write (out, sizeof (user_guid), user_guid) || !gsf_output_write (out, 4, buf)) /* bogus position, fix it later */ goto err; } /* Write section(s) */ if (!msole_metadata_write_section (&state, FALSE)) goto err; if (state.dict != NULL) { gsf_off_t base = gsf_output_tell (state.out); GSF_LE_SET_GUINT32 (buf, base); if (!gsf_output_seek (state.out, 0x40, G_SEEK_SET) || !gsf_output_write (out, 4, buf) || !gsf_output_seek (state.out, 0, G_SEEK_END) || !msole_metadata_write_section (&state, TRUE)) goto err; } success = TRUE; err : g_slist_free (state.builtin.props); g_slist_free (state.user.props); if (state.dict != NULL) g_hash_table_destroy (state.dict); return success; } static struct { char const *tag; guint lid; } const gsf_msole_language_ids[] = { { "-none-", 0x0000 }, /* none (language neutral) */ { "-none-", 0x0400 }, /* none */ { "af_ZA", 0x0436 }, /* Afrikaans */ { "am", 0x045e }, /* Amharic */ { "sq_AL", 0x041c }, /* Albanian */ { "ar_SA", 0x0401 }, /* Arabic (Saudi) */ { "ar_IQ", 0x0801 }, /* Arabic (Iraq) */ { "ar_EG", 0x0c01 }, /* Arabic (Egypt) */ { "ar_LY", 0x1001 }, /* Arabic (Libya) */ { "ar_DZ", 0x1401 }, /* Arabic (Algeria) */ { "ar_MA", 0x1801 }, /* Arabic (Morocco) */ { "ar_TN", 0x1c01 }, /* Arabic (Tunisia) */ { "ar_OM", 0x2001 }, /* Arabic (Oman) */ { "ar_YE", 0x2401 }, /* Arabic (Yemen) */ { "ar_SY", 0x2801 }, /* Arabic (Syria) */ { "ar_JO", 0x2c01 }, /* Arabic (Jordan) */ { "ar_LB", 0x3001 }, /* Arabic (Lebanon) */ { "ar_KW", 0x3401 }, /* Arabic (Kuwait) */ { "ar_AE", 0x3801 }, /* Arabic (United Arab Emirates) */ { "ar_BH", 0x3c01 }, /* Arabic (Bahrain) */ { "ar_QA", 0x4001 }, /* Arabic (Qatar) */ { "as", 0x044d }, /* Assamese */ { "az", 0x042c }, /* Azerbaijani */ { "hy_AM", 0x042b }, /* Armenian */ { "az", 0x044c }, /* Azeri (Latin) az_ */ { "az", 0x082c }, /* Azeri (Cyrillic) az_ */ { "eu_ES", 0x042d }, /* Basque */ { "be_BY", 0x0423 }, /* Belarussian */ { "bn", 0x0445 }, /* Bengali bn_ */ { "bg_BG", 0x0402 }, /* Bulgarian */ { "ca_ES", 0x0403 }, /* Catalan */ { "zh_TW", 0x0404 }, /* Chinese (Taiwan) */ { "zh_CN", 0x0804 }, /* Chinese (PRC) */ { "zh_HK", 0x0c04 }, /* Chinese (Hong Kong) */ { "zh_SG", 0x1004 }, /* Chinese (Singapore) */ { "ch_MO", 0x1404 }, /* Chinese (Macau SAR) */ { "hr_HR", 0x041a }, /* Croatian */ { "cs_CZ", 0x0405 }, /* Czech */ { "da_DK", 0x0406 }, /* Danish */ { "div", 0x465 }, /* Divehi div_*/ { "nl_NL", 0x0413 }, /* Dutch (Netherlands) */ { "nl_BE", 0x0813 }, /* Dutch (Belgium) */ { "en_US", 0x0409 }, /* English (USA) */ { "en_GB", 0x0809 }, /* English (UK) */ { "en_AU", 0x0c09 }, /* English (Australia) */ { "en_CA", 0x1009 }, /* English (Canada) */ { "en_NZ", 0x1409 }, /* English (New Zealand) */ { "en_IE", 0x1809 }, /* English (Ireland) */ { "en_ZA", 0x1c09 }, /* English (South Africa) */ { "en_JM", 0x2009 }, /* English (Jamaica) */ { "en", 0x2409 }, /* English (Caribbean) */ { "en_BZ", 0x2809 }, /* English (Belize) */ { "en_TT", 0x2c09 }, /* English (Trinidad) */ { "en_ZW", 0x3009 }, /* English (Zimbabwe) */ { "en_PH", 0x3409 }, /* English (Phillipines) */ { "et_EE", 0x0425 }, /* Estonian */ { "fo", 0x0438 }, /* Faeroese fo_ */ { "fa_IR", 0x0429 }, /* Farsi */ { "fi_FI", 0x040b }, /* Finnish */ { "fr_FR", 0x040c }, /* French (France) */ { "fr_BE", 0x080c }, /* French (Belgium) */ { "fr_CA", 0x0c0c }, /* French (Canada) */ { "fr_CH", 0x100c }, /* French (Switzerland) */ { "fr_LU", 0x140c }, /* French (Luxembourg) */ { "fr_MC", 0x180c }, /* French (Monaco) */ { "gl", 0x0456 }, /* Galician gl_ */ { "ga_IE", 0x083c }, /* Irish Gaelic */ { "gd_GB", 0x100c }, /* Scottish Gaelic */ { "ka_GE", 0x0437 }, /* Georgian */ { "de_DE", 0x0407 }, /* German (Germany) */ { "de_CH", 0x0807 }, /* German (Switzerland) */ { "de_AT", 0x0c07 }, /* German (Austria) */ { "de_LU", 0x1007 }, /* German (Luxembourg) */ { "de_LI", 0x1407 }, /* German (Liechtenstein) */ { "el_GR", 0x0408 }, /* Greek */ { "gu", 0x0447 }, /* Gujarati gu_ */ { "ha", 0x0468 }, /* Hausa */ { "he_IL", 0x040d }, /* Hebrew */ { "hi_IN", 0x0439 }, /* Hindi */ { "hu_HU", 0x040e }, /* Hungarian */ { "is_IS", 0x040f }, /* Icelandic */ { "id_ID", 0x0421 }, /* Indonesian */ { "iu", 0x045d }, /* Inkutitut */ { "it_IT", 0x0410 }, /* Italian (Italy) */ { "it_CH", 0x0810 }, /* Italian (Switzerland) */ { "ja_JP", 0x0411}, /* Japanese */ { "kn", 0x044b }, /* Kannada kn_ */ { "ks", 0x0860 }, /* Kashmiri (India) ks_ */ { "kk", 0x043f }, /* Kazakh kk_ */ { "kok", 0x0457 }, /* Konkani kok_ */ { "ko_KR", 0x0412 }, /* Korean */ { "ko", 0x0812 }, /* Korean (Johab) ko_ */ { "kir", 0x0440 }, /* Kyrgyz */ { "la", 0x0476 }, /* Latin */ { "lo", 0x0454 }, /* Laothian */ { "lv_LV", 0x0426 }, /* Latvian */ { "lt_LT", 0x0427 }, /* Lithuanian */ { "lt_LT", 0x0827 }, /* Lithuanian (Classic) */ { "mk", 0x042f }, /* FYRO Macedonian */ { "my_MY", 0x043e }, /* Malaysian */ { "my_BN", 0x083e }, /* Malay Brunei Darussalam */ { "ml", 0x044c }, /* Malayalam ml_ */ { "mr", 0x044e }, /* Marathi mr_ */ { "mt", 0x043a }, /* Maltese */ { "mo", 0x0450 }, /* Mongolian */ { "ne_NP", 0x0461 }, /* Napali (Nepal) */ { "ne_IN", 0x0861 }, /* Nepali (India) */ { "nb_NO", 0x0414 }, /* Norwegian (Bokmaal) */ { "nn_NO", 0x0814 }, /* Norwegian (Nynorsk) */ { "or", 0x0448 }, /* Oriya or_ */ { "om", 0x0472 }, /* Oromo (Afan, Galla) */ { "pl_PL", 0x0415 }, /* Polish */ { "pt_BR", 0x0416 }, /* Portuguese (Brazil) */ { "pt_PT", 0x0816 }, /* Portuguese (Portugal) */ { "pa", 0x0446 }, /* Punjabi pa_ */ { "ps", 0x0463 }, /* Pashto (Pushto) */ { "rm", 0x0417 }, /* Rhaeto_Romanic rm_ */ { "ro_RO", 0x0418 }, /* Romanian */ { "ro_MD", 0x0818 }, /* Romanian (Moldova) */ { "ru_RU", 0x0419 }, /* Russian */ { "ru_MD", 0x0819 }, /* Russian (Moldova) */ { "se", 0x043b }, /* Sami (Lappish) se_ */ { "sa", 0x044f }, /* Sanskrit sa_ */ { "sr", 0x0c1a }, /* Serbian (Cyrillic) sr_ */ { "sr", 0x081a }, /* Serbian (Latin) sr_ */ { "sd", 0x0459 }, /* Sindhi sd_ */ { "sk_SK", 0x041b }, /* Slovak */ { "sl_SI", 0x0424 }, /* Slovenian */ { "wen", 0x042e }, /* Sorbian wen_ */ { "so", 0x0477 }, /* Somali */ { "es_ES", 0x040a }, /* Spanish (Spain, Traditional) */ { "es_MX", 0x080a }, /* Spanish (Mexico) */ { "es_ES", 0x0c0a }, /* Spanish (Modern) */ { "es_GT", 0x100a }, /* Spanish (Guatemala) */ { "es_CR", 0x140a }, /* Spanish (Costa Rica) */ { "es_PA", 0x180a }, /* Spanish (Panama) */ { "es_DO", 0x1c0a }, /* Spanish (Dominican Republic) */ { "es_VE", 0x200a }, /* Spanish (Venezuela) */ { "es_CO", 0x240a }, /* Spanish (Colombia) */ { "es_PE", 0x280a }, /* Spanish (Peru) */ { "es_AR", 0x2c0a }, /* Spanish (Argentina) */ { "es_EC", 0x300a }, /* Spanish (Ecuador) */ { "es_CL", 0x340a }, /* Spanish (Chile) */ { "es_UY", 0x380a }, /* Spanish (Uruguay) */ { "es_PY", 0x3c0a }, /* Spanish (Paraguay) */ { "es_BO", 0x400a }, /* Spanish (Bolivia) */ { "es_SV", 0x440a }, /* Spanish (El Salvador) */ { "es_HN", 0x480a }, /* Spanish (Honduras) */ { "es_NI", 0x4c0a }, /* Spanish (Nicaragua) */ { "es_PR", 0x500a }, /* Spanish (Puerto Rico) */ { "sx", 0x0430 }, /* Sutu */ { "sw", 0x0441 }, /* Swahili (Kiswahili/Kenya) */ { "sv_SE", 0x041d }, /* Swedish */ { "sv_FI", 0x081d }, /* Swedish (Finland) */ { "ta", 0x0449 }, /* Tamil ta_ */ { "tt", 0x0444 }, /* Tatar (Tatarstan) tt_ */ { "te", 0x044a }, /* Telugu te_ */ { "th_TH", 0x041e }, /* Thai */ { "ts", 0x0431 }, /* Tsonga ts_ */ { "tn", 0x0432 }, /* Tswana tn_ */ { "tr_TR", 0x041f }, /* Turkish */ { "tl", 0x0464 }, /* Tagalog */ { "tg", 0x0428 }, /* Tajik */ { "bo", 0x0451 }, /* Tibetan */ { "ti", 0x0473 }, /* Tigrinya */ { "uk_UA", 0x0422 }, /* Ukrainian */ { "ur_PK", 0x0420 }, /* Urdu (Pakistan) */ { "ur_IN", 0x0820 }, /* Urdu (India) */ { "uz", 0x0443 }, /* Uzbek (Latin) uz_ */ { "uz", 0x0843 }, /* Uzbek (Cyrillic) uz_ */ { "ven", 0x0433 }, /* Venda ven_ */ { "vi_VN", 0x042a }, /* Vietnamese */ { "cy_GB", 0x0452 }, /* Welsh */ { "xh", 0x0434 }, /* Xhosa xh */ { "yi", 0x043d }, /* Yiddish yi_ */ { "yo", 0x046a }, /* Yoruba */ { "zu", 0x0435 }, /* Zulu zu_ */ { "en_US", 0x0800 } /* Default */ }; /** * gsf_msole_lid_for_language * @lang : * * Returns: the LID (Language Identifier) for the input language. * If lang is %null, return 0x0400 ("-none-"), and not 0x0000 ("no proofing") **/ guint gsf_msole_lid_for_language (char const *lang) { guint i = 0 ; size_t len; if (lang == NULL) return 0x0400; /* return -none- */ /* Allow lang to match as a prefix (eg fr == fr_FR@euro) */ len = strlen (lang); for (i = 0 ; i < G_N_ELEMENTS(gsf_msole_language_ids); i++) if (!strncmp (lang, gsf_msole_language_ids[i].tag, len)) return gsf_msole_language_ids[i].lid; return 0x0400 ; /* return -none- */ } /** * gsf_msole_language_for_lid : * @lid : * * Returns: the xx_YY style string (can be just xx or xxx) for the given LID. * Return value must not be freed. If the LID is not found, is set to * 0x0400, or is set to 0x0000, will return "-none-" **/ char const * gsf_msole_language_for_lid (guint lid) { guint i = 0 ; for (i = 0 ; i < G_N_ELEMENTS(gsf_msole_language_ids); i++) if (gsf_msole_language_ids[i].lid == lid) return gsf_msole_language_ids[i].tag; return "-none-"; /* default */ } /** * gsf_msole_locale_to_lid : * * Covert the the codepage into an applicable LID **/ guint gsf_msole_codepage_to_lid (int codepage) { switch (codepage) { case 77: /* MAC_CHARSET */ return 0xFFF; /* This number is a hack */ case 128: /* SHIFTJIS_CHARSET */ return 0x411; /* Japanese */ case 129: /* HANGEUL_CHARSET */ return 0x412; /* Korean */ case 130: /* JOHAB_CHARSET */ return 0x812; /* Korean (Johab) */ case 134: /* GB2312_CHARSET - Chinese Simplified */ return 0x804; /* China PRC - And others!! */ case 136: /* CHINESEBIG5_CHARSET - Chinese Traditional */ return 0x404; /* Taiwan - And others!! */ case 161: /* GREEK_CHARSET */ return 0x408; /* Greek */ case 162: /* TURKISH_CHARSET */ return 0x41f; /* Turkish */ case 163: /* VIETNAMESE_CHARSET */ return 0x42a; /* Vietnamese */ case 177: /* HEBREW_CHARSET */ return 0x40d; /* Hebrew */ case 178: /* ARABIC_CHARSET */ return 0x01; /* Arabic */ case 186: /* BALTIC_CHARSET */ return 0x425; /* Estonian - And others!! */ case 204: /* RUSSIAN_CHARSET */ return 0x419; /* Russian - And others!! */ case 222: /* THAI_CHARSET */ return 0x41e; /* Thai */ case 238: /* EASTEUROPE_CHARSET */ return 0x405; /* Czech - And many others!! */ } /* default */ return 0x0; } /** * gsf_msole_lid_to_codepage * @lid : * * Returns: our best guess at the codepage for the given language id **/ int gsf_msole_lid_to_codepage (guint lid) { if (lid == 0x0FFF) /* Macintosh Hack */ return 0x0FFF; switch (lid & 0xff) { case 0x01: /* Arabic */ return 1256; case 0x02: /* Bulgarian */ return 1251; case 0x03: /* Catalan */ return 1252; case 0x04: /* Chinese */ switch (lid) { case 0x1004: /* Chinese (Singapore) */ case 0x0404: /* Chinese (Taiwan) */ case 0x1404: /* Chinese (Macau SAR) */ case 0x0c04: /* Chinese (Hong Kong SAR, PRC) */ return 950; case 0x0804: /* Chinese (PRC) */ return 936; default : break; } break; case 0x05: /* Czech */ return 1250; case 0x06: /* Danish */ return 1252; case 0x07: /* German */ return 1252; case 0x08: /* Greek */ return 1253; case 0x09: /* English */ return 1252; case 0x0a: /* Spanish */ return 1252; case 0x0b: /* Finnish */ return 1252; case 0x0c: /* French */ return 1252; case 0x0d: /* Hebrew */ return 1255; case 0x0e: /* Hungarian */ return 1250; case 0x0f: /* Icelandic */ return 1252; case 0x10: /* Italian */ return 1252; case 0x11: /* Japanese */ return 932; case 0x12: /* Korean */ switch (lid) { case 0x0812: /* Korean (Johab) */ return 1361; case 0x0412: /* Korean */ return 949; default : break; } break; case 0x13: /* Dutch */ return 1252; case 0x14: /* Norwegian */ return 1252; case 0x15: /* Polish */ return 1250; case 0x16: /* Portuguese */ return 1252; case 0x17: /* Rhaeto-Romanic */ return 1252; case 0x18: /* Romanian */ return 1250; case 0x19: /* Russian */ return 1251; case 0x1a: /* Serbian, Croatian, (Bosnian?) */ switch (lid) { case 0x041a: /* Croatian */ return 1252; case 0x0c1a: /* Serbian (Cyrillic) */ return 1251; case 0x081a: /* Serbian (Latin) */ return 1252; default : break; } break; case 0x1b: /* Slovak */ return 1250; case 0x1c: /* Albanian */ return 1251; case 0x1d: /* Swedish */ return 1252; case 0x1e: /* Thai */ return 874; case 0x1f: /* Turkish */ return 1254; case 0x20: /* Urdu. This is Unicode only. */ return 0; case 0x21: /* Bahasa Indonesian */ return 1252; case 0x22: /* Ukrainian */ return 1251; case 0x23: /* Byelorussian / Belarusian */ return 1251; case 0x24: /* Slovenian */ return 1250; case 0x25: /* Estonian */ return 1257; case 0x26: /* Latvian */ return 1257; case 0x27: /* Lithuanian */ return 1257; case 0x29: /* Farsi / Persian. This is Unicode only. */ return 0; case 0x2a: /* Vietnamese */ return 1258; case 0x2b: /* Windows 2000: Armenian. This is Unicode only. */ return 0; case 0x2c: /* Azeri */ switch (lid) { case 0x082c: /* Azeri (Cyrillic) */ return 1251; default : break; } break; case 0x2d: /* Basque */ return 1252; case 0x2f: /* Macedonian */ return 1251; case 0x36: /* Afrikaans */ return 1252; case 0x37: /* Windows 2000: Georgian. This is Unicode only. */ return 0; case 0x38: /* Faeroese */ return 1252; case 0x39: /* Windows 2000: Hindi. This is Unicode only. */ return 0; case 0x3E: /* Malaysian / Malay */ return 1252; case 0x41: /* Swahili */ return 1252; case 0x43: /* Uzbek */ switch (lid) { case 0x0843: /* Uzbek (Cyrillic) */ return 1251; default : break; } break; case 0x45: /* Windows 2000: Bengali. This is Unicode only. */ case 0x46: /* Windows 2000: Punjabi. This is Unicode only. */ case 0x47: /* Windows 2000: Gujarati. This is Unicode only. */ case 0x48: /* Windows 2000: Oriya. This is Unicode only. */ case 0x49: /* Windows 2000: Tamil. This is Unicode only. */ case 0x4a: /* Windows 2000: Telugu. This is Unicode only. */ case 0x4b: /* Windows 2000: Kannada. This is Unicode only. */ case 0x4c: /* Windows 2000: Malayalam. This is Unicode only. */ case 0x4d: /* Windows 2000: Assamese. This is Unicode only. */ case 0x4e: /* Windows 2000: Marathi. This is Unicode only. */ case 0x4f: /* Windows 2000: Sanskrit. This is Unicode only. */ case 0x55: /* Myanmar / Burmese. This is Unicode only. */ case 0x57: /* Windows 2000: Konkani. This is Unicode only. */ case 0x61: /* Windows 2000: Nepali (India). This is Unicode only. */ return 0; #if 0 /****************************************************************** * Below this line is untested, unproven, and are just guesses. * * Insert above and use at your own risk * ******************************************************************/ case 0x042c: /* Azeri (Latin) */ case 0x0443: /* Uzbek (Latin) */ case 0x30: /* Sutu */ return 1252; /* UNKNOWN, believed to be CP1252 */ case 0x3f: /* Kazakh */ return 1251; /* JUST UNKNOWN, probably CP1251 */ case 0x44: /* Tatar */ case 0x58: /* Manipuri */ case 0x59: /* Sindhi */ case 0x60: /* Kashmiri (India) */ return 0; /* UNKNOWN, believed to be Unicode only */ #endif }; /* This is just a guess, but it will be a frequent guess */ return 1252; } /** * gsf_msole_lid_to_codepage_str * @lid : * * Returns: the Iconv codepage string for the given LID. * Return value must be g_free ()'d **/ gchar * gsf_msole_lid_to_codepage_str (guint lid) { guint cp = 0; if (lid == 0x0FFF) /* Macintosh Hack */ return g_strdup ("MACINTOSH"); cp = gsf_msole_lid_to_codepage (lid); return g_strdup_printf ("CP%d", cp); } /** * gsf_msole_iconv_win_codepage : * * Returns: our best guess at the applicable windows code page based on an * environment variable or the current locale. **/ int gsf_msole_iconv_win_codepage (void) { char *lang; if ((lang = getenv("WINDOWS_LANGUAGE")) == NULL) { char const *locale = setlocale (LC_CTYPE, NULL); if (locale != NULL) { char const *lang_sep = strchr (locale, '.'); if (lang_sep) lang = g_strndup (locale, (unsigned)(lang_sep - locale)); else lang = g_strdup (locale); /* simplifies exit */ } } if (lang != NULL) { guint lid = gsf_msole_lid_for_language (lang); g_free (lang); return gsf_msole_lid_to_codepage (lid); } return 1252; /* default ansi */ } static GSList * gsf_msole_iconv_get_codepage_string_list (int codepage) { GSList *cp_list = NULL; switch (codepage) { case 1200: cp_list = g_slist_prepend (cp_list, g_strdup ("UTF-16LE")); break; case 1201: cp_list = g_slist_prepend (cp_list, g_strdup ("UTF-16BE")); break; case 0x8000: case 10000: cp_list = g_slist_prepend (cp_list, g_strdup ("MACROMAN")); cp_list = g_slist_prepend (cp_list, g_strdup ("MACINTOSH")); break; case -535: case 65001: cp_list = g_slist_prepend (cp_list, g_strdup ("UTF-8")); break; case 0x8001: /* according to OOo docs 8001 is a synonym CP1252 */ codepage = 1252; default: cp_list = g_slist_prepend (cp_list, g_strdup_printf ("CP%u", codepage)); } return cp_list; } /** * gsf_msole_iconv_open_codepage_for_import : * @to: * @codepage : * * Returns: an iconv converter for @codepage -> utf8. **/ GIConv gsf_msole_iconv_open_codepage_for_import (char const *to, int codepage) { GIConv iconv_handle = (GIConv)(-1); gchar *codepage_str; GSList *codepage_list, *cp; g_return_val_if_fail (to != NULL, (GIConv)(-1)); cp = codepage_list = gsf_msole_iconv_get_codepage_string_list (codepage); while (cp) { codepage_str = cp->data; if (iconv_handle == (GIConv)(-1)) iconv_handle = g_iconv_open (to, codepage_str); g_free (codepage_str); cp = cp->next; } g_slist_free (codepage_list); if (iconv_handle == (GIConv)(-1)) g_warning ("Unable to open an iconv handle from codepage %d -> %s", codepage, to); return iconv_handle; } /** * gsf_msole_iconv_open_for_import : * @codepage : * * Returns: an iconv converter for single byte encodings @codepage -> utf8. * Attempt to handle the semantics of a specification for multibyte encodings * since this is only supposed to be used for single bytes. **/ GIConv gsf_msole_iconv_open_for_import (int codepage) { return gsf_msole_iconv_open_codepage_for_import ("UTF-8", codepage); } /** * gsf_msole_iconv_open_codepages_for_export : * @codepage_to : * @from : * * Returns: an iconv converter to go from utf8 -> to our best guess at a useful * windows codepage. **/ GIConv gsf_msole_iconv_open_codepages_for_export (int codepage_to, char const *from) { GIConv iconv_handle = (GIConv)(-1); gchar *codepage_str; GSList *codepage_list, *cp; g_return_val_if_fail (from != NULL, (GIConv)(-1)); cp = codepage_list = gsf_msole_iconv_get_codepage_string_list (codepage_to); while (cp) { codepage_str = cp->data; if (iconv_handle == (GIConv)(-1)) iconv_handle = g_iconv_open (codepage_str, from); g_free (codepage_str); cp = cp->next; } g_slist_free (codepage_list); if (iconv_handle == (GIConv)(-1)) g_warning ("Unable to open an iconv handle from %s -> codepage %u", from, codepage_to); return iconv_handle; } /** * gsf_msole_iconv_open_codepage_for_export : * @codepage_to: * * Returns: an iconv converter to go from utf8 -> to our best guess at a useful * windows codepage. **/ GIConv gsf_msole_iconv_open_codepage_for_export (int codepage_to) { return gsf_msole_iconv_open_codepages_for_export (codepage_to, "UTF-8"); } /** * gsf_msole_iconv_open_for_export : * * Returns: an iconv convert to go from utf8 -> to our best guess at a useful * windows codepage. **/ GIConv gsf_msole_iconv_open_for_export (void) { return gsf_msole_iconv_open_codepage_for_export (gsf_msole_iconv_win_codepage ()); } #define VBA_COMPRESSION_WINDOW 4096 /** * gsf_msole_inflate: * @input: stream to read from * @offset: offset into it for start byte of compresse stream * * Decompresses an LZ compressed stream. * * Return value: A GByteArray that the caller is responsible for freeing **/ GByteArray * gsf_msole_inflate (GsfInput *input, gsf_off_t offset) { GByteArray *res; unsigned i, win_pos, pos = 0; unsigned mask, shift, distance; guint8 flag, buffer [VBA_COMPRESSION_WINDOW]; guint8 const *tmp; guint16 token, len; gboolean clean = TRUE; if (gsf_input_seek (input, offset, G_SEEK_SET)) return NULL; res = g_byte_array_new (); /* explaination from libole2/ms-ole-vba.c */ /* The first byte is a flag byte. Each bit in this byte * determines what the next byte is. If the bit is zero, * the next byte is a character. Otherwise the next two * bytes contain the number of characters to copy from the * umcompresed buffer and where to copy them from (offset, * length). */ while (NULL != gsf_input_read (input, 1, &flag)) for (mask = 1; mask < 0x100 ; mask <<= 1) if (flag & mask) { if (NULL == (tmp = gsf_input_read (input, 2, NULL))) break; win_pos = pos % VBA_COMPRESSION_WINDOW; if (win_pos <= 0x80) { if (win_pos <= 0x20) shift = (win_pos <= 0x10) ? 12 : 11; else shift = (win_pos <= 0x40) ? 10 : 9; } else { if (win_pos <= 0x200) shift = (win_pos <= 0x100) ? 8 : 7; else if (win_pos <= 0x800) shift = (win_pos <= 0x400) ? 6 : 5; else shift = 4; } token = GSF_LE_GET_GUINT16 (tmp); len = (token & ((1 << shift) - 1)) + 3; distance = token >> shift; clean = TRUE; /* fprintf (stderr, "Shift %d, token len %d, distance %d bytes %.2x %.2x\n", shift, len, distance, (token & 0xff), (token >> 8)); */ for (i = 0; i < len; i++) { unsigned srcpos = (pos - distance - 1) % VBA_COMPRESSION_WINDOW; guint8 c = buffer [srcpos]; buffer [pos++ % VBA_COMPRESSION_WINDOW] = c; } } else { if ((pos != 0) && ((pos % VBA_COMPRESSION_WINDOW) == 0) && clean) { (void) gsf_input_read (input, 2, NULL); clean = FALSE; g_byte_array_append (res, buffer, VBA_COMPRESSION_WINDOW); break; } if (NULL != gsf_input_read (input, 1, buffer + (pos % VBA_COMPRESSION_WINDOW))) pos++; clean = TRUE; } if (pos % VBA_COMPRESSION_WINDOW) g_byte_array_append (res, buffer, pos % VBA_COMPRESSION_WINDOW); return res; }