/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
* gsf-infile-msvba.c :
*
* Copyright (C) 2002-2006 Jody Goldberg (jody@gnome.org)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2.1 of the GNU Lesser General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
/* Info extracted from
* svx/source/msfilter/msvbasic.cxx
* Costin Raiu, Kaspersky Labs, 'Apple of Discord'
* Virus bulletin's bontchev.pdf, svajcer.pdf
*
* and lots and lots of reading. There are lots of pieces missing still
* but the structure seems to hold together.
*/
#include <gsf-config.h>
#include <gsf/gsf-infile-msvba.h>
#include <gsf/gsf-infile-impl.h>
#include <gsf/gsf-input-memory.h>
#include <gsf/gsf-impl-utils.h>
#include <gsf/gsf-msole-utils.h>
#include <gsf/gsf-utils.h>
#include <stdio.h>
#include <string.h>
static GObjectClass *parent_class;
struct _GsfInfileMSVBA {
GsfInfile parent;
GsfInfile *source;
GList *children;
};
typedef GsfInfileClass GsfInfileMSVBAClass;
#define GSF_INFILE_MSVBA_CLASS(k) (G_TYPE_CHECK_CLASS_CAST ((k), GSF_INFILE_MSVBA_TYPE, GsfInfileMSVBAClass))
#define GSF_IS_INFILE_MSVBA_CLASS(k) (G_TYPE_CHECK_CLASS_TYPE ((k), GSF_INFILE_MSVBA_TYPE))
static guint8 *
gsf_vba_inflate (GsfInput *input, gsf_off_t offset, int *size, gboolean add_null_terminator)
{
GByteArray *res = gsf_msole_inflate (input, offset + 3);
if (res == NULL)
return NULL;
*size = res->len;
if (add_null_terminator)
g_byte_array_append (res, "", 1);
return g_byte_array_free (res, FALSE);
}
static void
vba_extract_module_source (GsfInfileMSVBA *vba, char const *name, guint32 src_offset)
{
GsfInput *module;
guint8 *src_code;
int inflated_size;
g_return_if_fail (name != NULL);
module = gsf_infile_child_by_name (vba->source, name);
if (module == NULL)
return;
src_code = gsf_vba_inflate (module, (gsf_off_t) src_offset, &inflated_size, TRUE);
if (src_code != NULL) {
printf ("<module name=\"%s\">\n<![CDATA[%s]]>\n</module>\n", name, src_code);
g_free (src_code);
} else
g_warning ("Problems extracting the source for %s @ %u", name, src_offset);
g_object_unref (module);
module = NULL;
}
/**
* vba_dir_read :
* @vba :
* @err : optionally NULL
*
* Read an VBA dirctory and its project file.
* along the way.
*
* Return value: FALSE on error setting @err if it is supplied.
**/
static gboolean
vba_dir_read (GsfInfileMSVBA *vba, GError **err)
{
int inflated_size, element_count = -1;
char const *msg = NULL;
char *name, *elem_stream = NULL;
guint32 len;
guint16 tag;
guint8 *inflated_data, *end, *ptr;
GsfInput *dir;
gboolean failed = TRUE;
/* 0. get the stream */
dir = gsf_infile_child_by_name (vba->source, "dir");
if (dir == NULL) {
msg = "Can't find the VBA directory stream.";
goto fail_stream;
}
/* 1. decompress it */
ptr = inflated_data = gsf_vba_inflate (dir, 0, &inflated_size, FALSE);
if (inflated_data == NULL)
goto fail_compression;
end = inflated_data + inflated_size;
/* 2. GUESS : based on several xls with macros and XL8GARY this looks like a
* series of sized records. Be _extra_ careful */
do {
/* I have seen
* type len data
* 1 4 1 0 0 0
* 2 4 9 4 0 0
* 3 2 4 e4
* 4 <var> project name
* 5 0
* 6 0
* 7 4
* 8 4
* 0x3d 0
* 0x40 0
* 0x14 4 9 4 0 0
*
* 0x0f == number of elements
* 0x1c == (Size 0)
* 0x1e == (Size 4)
* 0x48 == (Size 0)
* 0x31 == stream offset of the compressed source !
*
* 0x16 == an ascii dependency name
* 0x3e == a unicode dependency name
* 0x33 == a classid for a dependency with no trialing data
*
* 0x2f == a dummy classid
* 0x30 == a classid
* 0x0d == the classid
* 0x2f, and 0x0d appear contain
* uint32 classid_size;
* <classid>
* 00 00 00 00 00 00
* and sometimes some trailing junk
**/
if ((ptr + 6) > end) {
msg = "vba project header problem";
goto fail_content;
}
tag = GSF_LE_GET_GUINT16 (ptr);
len = GSF_LE_GET_GUINT32 (ptr + 2);
ptr += 6;
if ((ptr + len) > end) {
msg = "vba project header problem";
goto fail_content;
}
switch (tag) {
case 4:
name = g_strndup (ptr, len);
puts ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
printf ("<project name=\"%s\">", name);
g_free (name);
break;
case 9:
/* this seems to have an extra two bytes that are not
* part of the length ..?? */
len += 2;
break;
case 0xf :
if (len != 2) {
g_warning ("element count is not what we expected");
break;
}
if (element_count >= 0) {
g_warning ("More than one element count ??");
break;
}
element_count = GSF_LE_GET_GUINT16 (ptr);
break;
/* dependencies */
case 0x0d : break;
case 0x2f : break;
case 0x30 : break;
case 0x33 : break;
case 0x3e : break;
case 0x16:
#if 0
name = g_strndup (ptr, len);
g_print ("Depend Name : '%s'\n", name);
g_free (name);
#endif
break;
/* elements */
case 0x47 : break;
case 0x32 : break;
case 0x1a:
#if 0
name = g_strndup (ptr, len);
g_print ("Element Name : '%s'\n", name);
g_free (name);
#endif
break;
case 0x19: elem_stream = g_strndup (ptr, len); break;
case 0x31:
if (len != 4) {
g_warning ("source offset property is not what we expected");
break;
}
vba_extract_module_source (vba, elem_stream,
GSF_LE_GET_GUINT32 (ptr));
g_free (elem_stream); elem_stream = NULL;
element_count--;
break;
default :
#if 0
g_print ("tag %hx : len %u\n", tag, len);
gsf_mem_dump (ptr, len);
#endif
break;
}
ptr += len;
} while (tag != 0x10);
g_free (elem_stream);
if (element_count != 0)
g_warning ("Number of elements differs from expectations");
failed = FALSE;
fail_content :
g_free (inflated_data);
puts ("</project>");
fail_compression :
g_object_unref (G_OBJECT (dir));
fail_stream :
if (failed) {
if (err != NULL)
*err = g_error_new (gsf_input_error_id (), 0, msg);
return FALSE;
}
return TRUE;
}
#define VBA56_DIRENT_RECORD_COUNT (2 + /* magic */ \
4 + /* version */ \
2 + /* 0x00 0xff */ \
22) /* unknown */
#define VBA56_DIRENT_HEADER_SIZE (VBA56_DIRENT_RECORD_COUNT + \
2 + /* type1 record count */ \
2) /* unknown */
#if 0
/**
* vba_project_read :
* @vba :
* @err : optionally NULL
*
* Read an VBA dirctory and its project file.
* along the way.
*
* Return value: FALSE on error setting @err if it is supplied.
**/
static gboolean
vba_project_read (GsfInfileMSVBA *vba, GError **err)
{
/* NOTE : This seems constant, find some confirmation */
static guint8 const signature[] = { 0xcc, 0x61 };
static struct {
guint8 const signature[4];
char const * const name;
int const vba_version;
gboolean const is_mac;
} const versions [] = {
{ { 0x5e, 0x00, 0x00, 0x01 }, "Office 97", 5, FALSE},
{ { 0x5f, 0x00, 0x00, 0x01 }, "Office 97 SR1", 5, FALSE },
{ { 0x65, 0x00, 0x00, 0x01 }, "Office 2000 alpha?", 6, FALSE },
{ { 0x6b, 0x00, 0x00, 0x01 }, "Office 2000 beta?", 6, FALSE },
{ { 0x6d, 0x00, 0x00, 0x01 }, "Office 2000", 6, FALSE },
{ { 0x6f, 0x00, 0x00, 0x01 }, "Office 2000", 6, FALSE },
{ { 0x70, 0x00, 0x00, 0x01 }, "Office XP beta 1/2", 6, FALSE },
{ { 0x73, 0x00, 0x00, 0x01 }, "Office XP", 6, FALSE },
{ { 0x76, 0x00, 0x00, 0x01 }, "Office 2003", 6, FALSE },
{ { 0x79, 0x00, 0x00, 0x01 }, "Office 2003", 6, FALSE },
{ { 0x60, 0x00, 0x00, 0x0e }, "MacOffice 98", 5, TRUE },
{ { 0x62, 0x00, 0x00, 0x0e }, "MacOffice 2001", 5, TRUE },
{ { 0x63, 0x00, 0x00, 0x0e }, "MacOffice X", 6, TRUE },
{ { 0x64, 0x00, 0x00, 0x0e }, "MacOffice 2004", 6, TRUE },
};
guint8 const *data;
unsigned i, count, len;
gunichar2 *uni_name;
char *name;
GsfInput *dir;
dir = gsf_infile_child_by_name (vba->source, "dir");
if (dir == NULL) {
if (err != NULL)
*err = g_error_new (gsf_input_error_id (), 0,
"Can't find the VBA directory stream.");
return FALSE;
}
if (gsf_input_seek (dir, 0, G_SEEK_SET) ||
NULL == (data = gsf_input_read (dir, VBA56_DIRENT_HEADER_SIZE, NULL)) ||
0 != memcmp (data, signature, sizeof (signature))) {
if (err != NULL)
*err = g_error_new (gsf_input_error_id (), 0,
"No VBA signature");
return FALSE;
}
for (i = 0 ; i < G_N_ELEMENTS (versions); i++)
if (!memcmp (data+2, versions[i].signature, 4))
break;
if (i >= G_N_ELEMENTS (versions)) {
if (err != NULL)
*err = g_error_new (gsf_input_error_id (), 0,
"Unknown VBA version signature 0x%x%x%x%x",
data[2], data[3], data[4], data[5]);
return FALSE;
}
puts (versions[i].name);
/* these depend strings seem to come in 2 blocks */
count = GSF_LE_GET_GUINT16 (data + VBA56_DIRENT_RECORD_COUNT);
for (; count > 0 ; count--) {
if (NULL == ((data = gsf_input_read (dir, 2, NULL))))
break;
len = GSF_LE_GET_GUINT16 (data);
if (NULL == ((data = gsf_input_read (dir, len, NULL)))) {
printf ("len == 0x%x ??\n", len);
break;
}
uni_name = g_new0 (gunichar2, len/2 + 1);
/* be wary about endianness */
for (i = 0 ; i < len ; i += 2)
uni_name [i/2] = GSF_LE_GET_GUINT16 (data + i);
name = g_utf16_to_utf8 (uni_name, -1, NULL, NULL, NULL);
g_free (uni_name);
printf ("%d %s\n", count, name);
/* ignore this blob ???? */
if (!strncmp ("*\\G", name, 3)) {
if (NULL == ((data = gsf_input_read (dir, 12, NULL)))) {
printf ("len == 0x%x ??\n", len);
break;
}
}
g_free (name);
}
g_return_val_if_fail (count == 0, FALSE);
return TRUE;
}
#endif
static void
gsf_infile_msvba_finalize (GObject *obj)
{
GsfInfileMSVBA *vba = GSF_INFILE_MSVBA (obj);
if (vba->source != NULL) {
g_object_unref (G_OBJECT (vba->source));
vba->source = NULL;
}
parent_class->finalize (obj);
}
static void
gsf_infile_msvba_init (GObject *obj)
{
GsfInfileMSVBA *vba = GSF_INFILE_MSVBA (obj);
vba->source = NULL;
vba->children = NULL;
}
static void
gsf_infile_msvba_class_init (GObjectClass *gobject_class)
{
gobject_class->finalize = gsf_infile_msvba_finalize;
parent_class = g_type_class_peek_parent (gobject_class);
}
GSF_CLASS (GsfInfileMSVBA, gsf_infile_msvba,
gsf_infile_msvba_class_init, gsf_infile_msvba_init,
GSF_INFILE_TYPE)
GsfInfile *
gsf_infile_msvba_new (GsfInfile *source, GError **err)
{
GsfInfileMSVBA *vba;
g_return_val_if_fail (GSF_IS_INFILE (source), NULL);
vba = g_object_new (GSF_INFILE_MSVBA_TYPE, NULL);
if (G_UNLIKELY (NULL == vba)) return NULL;
g_object_ref (G_OBJECT (source));
vba->source = source;
/* vba_project_read (vba, err); */
/* find the name offset pairs */
if (vba_dir_read (vba, err))
return GSF_INFILE (vba);
if (err != NULL && *err == NULL)
*err = g_error_new (gsf_input_error_id (), 0,
"Unable to parse VBA header");
g_object_unref (G_OBJECT (vba));
return NULL;
}
syntax highlighted by Code2HTML, v. 0.9.1