/*
 *  libzvbi - Unicode conversion helper functions
 *
 *  Copyright (C) 2003-2006 Michael H. Schimek
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/* $Id: conv.c,v 1.3 2006/10/08 06:19:48 mschimek Exp $ */

#ifdef HAVE_CONFIG_H
#  include "config.h"
#endif

#include <errno.h>
#include <langinfo.h>

#include "misc.h"
#include "version.h"
#include "intl-priv.h"
#include "conv.h"

extern const char _zvbi_intl_domainname[];

#ifdef HAVE_ICONV

#  include <iconv.h>

struct _vbi_iconv_t {
	iconv_t			icd;
	uint16_t		ucs2_repl[1];
};

#else

struct _vbi_iconv_t {
	int			dummy;
};

#endif

#ifdef HAVE_ICONV

/**
 * @internal
 * @param cd Conversion object returned by vbi_iconv_open().
 * @param dst Pointer to output buffer pointer, will be incremented
 *   by the number of bytes written.
 * @param dst_left Space available in the output buffer, in bytes.
 * @param src Pointer to input buffer pointer, will be incremented
 *   by the number of bytes read.
 * @param src_left Number of bytes left to read in the input buffer.
 *
 * Like iconv(), but converts unrepresentable characters to
 * @a cd->ucs2_repl. The source is assumed to be in UCS-2 format
 * (so we know how to skip unrepresentable characters).
 *
 * @returns
 * See iconv().
 *
 * @since 0.2.23
 */
static size_t
iconv_ucs2			(vbi_iconv_t *		cd,
				 char **		dst,
				 size_t *		dst_left,
				 const char **		src,
				 size_t *		src_left)
{
	size_t r;

	assert (NULL != cd);
	assert (NULL != dst);
	assert (NULL != dst_left);
	assert (NULL != src);
	assert (NULL != src_left);

	r = 0;

	while (*src_left > 0) {
		const char *src1;
		size_t src_left1;

		/* iconv() source pointer may be defined as char **,
		   should be const char ** or const void **. Ignore
		   compiler warnings. */
		r = iconv (cd->icd, (void *) src, src_left,
			   dst, dst_left);
		if (likely ((size_t) -1 != r))
			break; /* success */

		if (EILSEQ != errno)
			break;

		if (0 == cd->ucs2_repl[0])
			return -1; /* do not replace */

		src1 = (const char *) cd->ucs2_repl;
		src_left1 = 2;

		r = iconv (cd->icd, (void *) &src1, &src_left1,
			   dst, dst_left);
		if (unlikely ((size_t) -1 == r))
			break; /* failed */

		*src += 2; /* in UCS-2 format */
		*src_left -= 2;
	}

	return r;
}

#endif /* HAVE_ICONV */

/** @internal */
vbi_bool
_vbi_iconv_ucs2			(vbi_iconv_t *		cd,
				 char **		dst,
				 unsigned long		dst_size,
				 const uint16_t *	src,
				 long			src_length)
{
	const char *s;
	size_t d_left;
	size_t s_left;
	size_t r;

	assert (NULL != cd);
	assert (NULL != dst);
	assert (NULL != *dst);

	if (NULL == src || 0 == src_length)
		return TRUE;

#ifdef HAVE_ICONV
	if (src_length < 0)
		src_length = vbi_strlen_ucs2 (src) + 1;

	s = (const char *) src;
	s_left = src_length * 2;

	d_left = dst_size;

	r = iconv_ucs2 (cd, dst, &d_left, &s, &s_left);

	return ((size_t) -1 != r && 0 == s_left);
#else
	return FALSE;
#endif
}

/**
 * @internal
 * @param cd Conversion object returned by vbi_iconv_open().
 *
 * Frees all resources associated with the conversion object.
 *
 * @since 0.2.23
 */
void
_vbi_iconv_close		(vbi_iconv_t *		cd)
{
	if (NULL == cd)
		return;

#ifdef HAVE_ICONV
	if ((iconv_t) -1 != cd->icd) {
		iconv_close (cd->icd);
		cd->icd = (iconv_t) -1;
	}

	free (cd);
#endif
}

/**
 * @internal
 * @param dst_codeset Character set name for iconv() conversion,
 *   for example "ISO-8859-1". When @c NULL the default is UTF-8.
 * @param src_codeset Character set name for iconv() conversion,
 *   for example "ISO-8859-1". When @c NULL the default is UTF-8.
 * @param dst Pointer to output buffer pointer, which will be
 *   incremented by the number of bytes written. Can be @c NULL.
 * @param dst_size Space available in the output buffer, in bytes.
 * @param repl_char UCS-2 replacement for characters which are not
 *   representable in @a dst_codeset. When zero the _vbi_iconv_ucs2()
 *   function will return @c FALSE instead.
 *
 * Helper function to convert text. A start byte sequence will be
 * stored in @a dst if necessary.
 *
 * @returns
 * @c NULL when the conversion is impossible.
 *
 * @since 0.2.23
 */
vbi_iconv_t *
_vbi_iconv_open			(const char *		dst_codeset,
				 const char *		src_codeset,
				 char **		dst,
				 unsigned long		dst_size,
				 int			repl_char)
{
#ifdef HAVE_ICONV
	vbi_iconv_t *cd;

	if (NULL == dst_codeset)
		dst_codeset = "UTF-8";

	if (NULL == src_codeset)
		src_codeset = "UCS-2";

	cd = malloc (sizeof (*cd));
	if (NULL == cd)
		return NULL;

	cd->icd = iconv_open (dst_codeset, src_codeset);
	if ((iconv_t) -1 == cd->icd) {
		free (cd);
		return NULL;
	}

	cd->ucs2_repl[0] = repl_char;

	if (NULL != dst) {
		size_t d_left;
		size_t n;

		d_left = dst_size;

		/* Write out the byte sequence to get into the
		   initial state if this is necessary. */
		n = iconv (cd->icd, NULL, NULL, dst, &d_left);

		if ((size_t) -1 == n) {
			_vbi_iconv_close (cd);
			return NULL;
		}
	}

	return cd;

#else /* !HAVE_ICONV */
	return NULL;
#endif
}

/** @internal */
static vbi_bool
same_codeset			(const char *		dst_codeset,
				 const char *		src_codeset)
{
	assert (NULL != dst_codeset);
	assert (NULL != src_codeset);

	for (;;) {
		char d, s;

		d = *dst_codeset;
		s = *src_codeset;

		if (d == s) {
			if (0 == d)
				return TRUE;

			++dst_codeset;
			++src_codeset;
		} else if ('-' == d || '_' == d) {
			++dst_codeset;
		} else if ('-' == s || '_' == s) {
			++src_codeset;
		} else {
			return FALSE;
		}
	}
}

/**
 * @ingroup Conv
 * @param src NUL-terminated UCS-2 string.
 *
 * Counts the characters in the string, up to and excluding
 * the terminating NUL.
 *
 * @since 0.2.23
 */
unsigned long
vbi_strlen_ucs2			(const uint16_t *	src)
{
	const uint16_t *s;

	if (NULL == src)
		return 0;

	for (s = src; 0 != *s; ++s)
		;

	return s - src;
}

/**
 * @internal
 * @param out_size If not @c NULL the actual number of bytes stored
 *   in the buffer (excluding the terminating NUL) will be stored here.
 * @param src Source string.
 * @param src_size Number of bytes in the source string (excluding
 *   the terminating NUL, if any).
 *
 * Copies a string into a newly allocated buffer, with a terminating
 * NUL (4 bytes).
 *
 * @returns
 * A pointer to the allocated buffer. You must free() the buffer
 * when it is no longer needed. The function returns @c NULL when
 * it runs out of memory, or when @a src is @c NULL.
 *
 * @since 0.2.23
 */
static char *
strndup_identity		(unsigned long *	out_size,
				 const char *		src,
				 unsigned long		src_size)
{
	char *buffer;

	buffer = vbi_malloc (src_size + 4);
	if (NULL == buffer) {
		if (NULL != out_size)
			*out_size = 0;

		return NULL;
	}

	memcpy (buffer, src, src_size);
	memset (buffer + src_size, 0, 4);

	if (NULL != out_size)
		*out_size = src_size;

	return buffer;
}

/**
 * @internal
 * @param out_size If not @c NULL the actual number of bytes stored
 *   in the buffer (excluding the terminating NUL) will be stored here.
 * @param src Source string in UCS-2 format, can be @c NULL.
 * @param src_length Number of characters (not bytes) in the source
 *   string. Can be -1 if the string is NUL terminated.
 *
 * Converts a string from UCS-2 to UTF-8 format and writes the
 * result with a terminating NUL character into a newly allocated
 * buffer. Note the buffer may be larger than necessary.
 *
 * @returns
 * A pointer to the allocated buffer. You must free() the buffer
 * when it is no longer needed. The function returns @c NULL when
 * it runs out of memory, or when @a src is @c NULL.
 *
 * @since 0.2.23
 */
static char *
strndup_utf8_ucs2		(unsigned long *	out_size,
				 const uint16_t *	src,
				 long			src_length)
{
	char *d;
	char *buffer;
	const uint16_t *end;

	if (NULL != out_size)
		*out_size = 0;

	if (unlikely (NULL == src))
		return NULL;

	if (src_length < 0)
		src_length = vbi_strlen_ucs2 (src);

	buffer = vbi_malloc (src_length * 3 + 1);
	if (NULL == buffer)
		return NULL;

	d = buffer;

	for (end = src + src_length; src < end; ++src) {
		unsigned int c = *src;

		if (c < 0x80) {
			*d++ = c;
		} else if (c < 0x800) {
			d[0] = 0xC0 | (c >> 6);
			d[1] = 0x80 | (c & 0x3F);
			d += 2;
		} else {
			d[0] = 0xE0 | (c >> 12);
			d[1] = 0x80 | ((c >> 6) & 0x3F);
			d[2] = 0x80 | (c & 0x3F);
			d += 3;
		}
	}

	if (NULL != out_size)
		*out_size = d - buffer;

	*d = 0;

	return buffer;
}

/**
 * @internal
 * @param out_size If not @c NULL the actual number of bytes stored
 *   in the buffer (excluding the terminating NUL) will be stored here.
 * @param dst_codeset Character set name for iconv() conversion,
 *   for example "ISO-8859-1". When @c NULL the default is UTF-8.
 * @param src Source string in UCS-2 format, can be @c NULL.
 * @param src_length Number of characters (not bytes) in the source
 *   string. Can be -1 if the string is NUL terminated.
 * @param repl_char UCS-2 replacement for characters which are not
 *   representable in @a dst_codeset. When zero the function will
 *   fail if the source buffer contains unrepresentable characters.
 *
 * Converts a UCS-2 string with iconv() and writes the result with a
 * terminating NUL character into a newly allocated buffer. Note the
 * buffer may be larger than necessary.
 *
 * @returns
 * A pointer to the allocated buffer. You must free() the buffer
 * when it is no longer needed. The function returns @c NULL when
 * the conversion fails, when it runs out of memory or when @a src
 * is @c NULL.
 *
 * @since 0.2.23
 */
static char *
strndup_iconv_from_ucs2		(unsigned long *	out_size,
				 const char *		dst_codeset,
				 const uint16_t *	src,
				 long			src_length,
				 int			repl_char)
{
	char *d;
	uint32_t *d32;
	char *buffer;
	unsigned long buffer_size;

	if (NULL == dst_codeset || same_codeset (dst_codeset, "UTF8")) {
		return strndup_utf8_ucs2 (out_size, src, src_length);
	} else if (same_codeset (dst_codeset, "UCS2")) {
		return strndup_identity (out_size, (const char *) src,
					 src_length * 2);
	}

	if (NULL != out_size)
		*out_size = 0;

	if (unlikely (NULL == src))
		return NULL;

	buffer = NULL;
	buffer_size = 0;

#ifdef HAVE_ICONV

	if (unlikely (src_length < 0))
		src_length = vbi_strlen_ucs2 (src);

	for (;;) {
		vbi_iconv_t *cd;
		const char *s;
		size_t d_left;
		size_t s_left;
		size_t r;

		d_left = src_length * 4;
		if (buffer_size > 0)
			d_left = buffer_size * 2;

		d = vbi_malloc (d_left);
		if (unlikely (NULL == d)) {
			errno = ENOMEM;
			return NULL;
		}

		buffer = d;
		buffer_size = d_left;

		cd = _vbi_iconv_open (dst_codeset, "UCS-2",
				      &d, d_left, repl_char);
		if (NULL == cd) {
			free (buffer);
			buffer = NULL;

			return NULL;
		}

		d_left = buffer_size - (d - buffer)
			- 4 /* room for a UCS-4 NUL */;

		s = (const char *) src;
		s_left = src_length * 2;

		r = iconv_ucs2 (cd, &d, &d_left, &s, &s_left);

		_vbi_iconv_close (cd);
		cd = NULL;

		if (likely ((size_t) -1 != r))
			break;

		free (buffer);
		buffer = NULL;

		if (E2BIG != errno)
			return NULL;

		/* Buffer was too small, try again. */
	}

	if (NULL != out_size)
		*out_size = d - buffer;

	d32 = (uint32_t *) d;
	*d32 = 0;

#endif /* HAVE_ICONV */

	return buffer;
}

/**
 * @ingroup Conv
 * @param dst_codeset Character set name for iconv() conversion,
 *   for example "ISO-8859-1". When @c NULL the default is UTF-8.
 * @param src Source string in UCS-2 format, can be @c NULL.
 * @param src_length Number of characters (not bytes) in the source
 *   string. Can be -1 if the string is NUL terminated.
 * @param repl_char UCS-2 replacement for characters which are not
 *   representable in @a dst_codeset. When zero the function will
 *   fail if the source buffer contains unrepresentable characters.
 *
 * Converts a UCS-2 string with iconv() and writes the result with a
 * terminating NUL character into a newly allocated buffer.
 *
 * @returns
 * A pointer to the allocated buffer. You must free() the buffer
 * when it is no longer needed. The function returns @c NULL when
 * the conversion fails, when it runs out of memory or when @a src
 * is @c NULL.
 *
 * @since 0.2.23
 */
char *
vbi_strndup_iconv_ucs2		(const char *		dst_codeset,
				 const uint16_t *	src,
				 long			src_length,
				 int			repl_char)
{
	char *buffer;
	char *result;
	unsigned long size;

	buffer = strndup_iconv_from_ucs2 (&size,
					  dst_codeset,
					  src, src_length,
					  repl_char);
	if (NULL == buffer)
		return NULL;

	result = realloc (buffer, size + 4);
	if (NULL == result)
		result = buffer;

	return result;
}

/**
 * @internal
 * @param out_size If not @c NULL the actual number of bytes stored
 *   in the buffer (excluding the terminating NUL) will be stored here.
 * @param src Source string in EIA 608 (Closed Caption) format, can
 *   be @c NULL.
 * @param src_length Number of characters (= bytes) in the source
 *   string. Can be -1 if the string is NUL terminated.
 * @param to_upper Convert the string to upper case.
 *
 * Converts a string from EIA 608 to UCS-2 format and writes the
 * result with a terminating NUL character into a newly allocated
 * buffer. The function ignores parity bits and the bytes 0x00 ...
 * 0x1F except two byte special and extended characters (e.g.
 * music note 0x11 0x37). Note the buffer may be larger than necessary.
 *
 * @returns
 * A pointer to the allocated buffer. You must free() the buffer
 * when it is no longer needed. The function returns @c NULL when the
 * source buffer contains invalid two byte characters, when
 * it runs out of memory, or when @a src is @c NULL.
 *
 * @since 0.2.23
 */
static char *
strndup_ucs2_eia608		(unsigned long *	out_size,
				 const char *		src,
				 long			src_length,
				 vbi_bool		to_upper)
{
	uint16_t *d16;
	char *buffer;
	long i;

	if (NULL != out_size)
		*out_size = 0;

	if (unlikely (NULL == src))
		return NULL;

	if (src_length < 0)
		src_length = strlen (src);

	buffer = vbi_malloc (src_length * 2 + 2);
	if (unlikely (NULL == buffer))
		return NULL;

	d16 = (uint16_t *) buffer;

	for (i = 0; i < src_length; ++i) {
		unsigned int c = src[i] & 0x7F;

		switch (c) {
		case 0x11 ... 0x13:
		case 0x19 ... 0x1B:
			if (unlikely (i + 1 >= src_length))
				goto ilseq;

			c = ((c * 256) + src[++i]) & 0x777F;
			c = vbi_caption_unicode (c, to_upper);

			if (unlikely (0 == c))
				goto ilseq;

			*d16++ = c;

			break;

		case 0x20 ... 0x7F:			
			*d16++ = vbi_caption_unicode (c, to_upper);
			break;

		default:
			break;
		}
	}

	if (NULL != out_size)
		*out_size = (char *) d16 - buffer;

	*d16 = 0;

	return buffer;
	
ilseq:
	free (buffer);
	buffer = NULL;

	errno = EILSEQ;

	return NULL;
}

/**
 * @internal
 * @param out_size If not @c NULL the actual number of bytes stored
 *   in the buffer (excluding the terminating NUL) will be stored here.
 * @param src_codeset Character set name for iconv() conversion,
 *   for example "ISO-8859-1". When @c NULL the default is UTF-8.
 * @param src Source string, can be @c NULL.
 * @param src_size Number of bytes in the source string (excluding
 *   the terminating NUL, if any).
 *
 * Converts a string from @a src_codeset to UCS-2 and writes the result
 * with a terminating NUL character into a newly allocated buffer. Note
 * the buffer may be larger than necessary.
 *
 * @returns
 * A pointer to the allocated buffer. You must free() the buffer
 * when it is no longer needed. The function returns @c NULL when
 * the conversion fails, when it runs out of memory or when @a src
 * is @c NULL.
 *
 * @since 0.2.23
 */
static char *
strndup_iconv_to_ucs2		(unsigned long *	out_size,
				 const char *		src_codeset,
				 const char *		src,
				 unsigned long		src_size)
{
	char *d;
	uint16_t *d16;
	char *buffer;
	unsigned long buffer_size;

	if (NULL == src_codeset) {
		src_codeset = "UTF-8";
	} else if (same_codeset (src_codeset, "UCS2")) {
		return strndup_identity (out_size, src, src_size);
	} else if (same_codeset (src_codeset, "EIA608")) {
		return strndup_ucs2_eia608 (out_size, src, src_size, FALSE);
	}

	if (NULL != out_size)
		*out_size = 0;

	if (unlikely (NULL == src))
		return NULL;

	buffer = NULL;
	buffer_size = 0;

#ifdef HAVE_ICONV

	for (;;) {
		vbi_iconv_t *cd;
		const char *s;
		size_t d_left;
		size_t s_left;
		size_t r;

		d_left = 16384;
		if (buffer_size > 0)
			d_left = buffer_size * 2;

		d = vbi_malloc (d_left);
		if (NULL == d) {
			errno = ENOMEM;
			return NULL;
		}

		buffer = d;
		buffer_size = d_left;

		cd = _vbi_iconv_open ("UCS-2", src_codeset,
				      &d, d_left,
				      /* repl_char */ 0);
		if (NULL == cd) {
			free (buffer);
			buffer = NULL;

			return NULL;
		}

		d_left = buffer_size - (d - buffer)
			- 2 /* room for a UCS-2 NUL */;

		s = src;
		s_left = src_size;

		/* Ignore compiler warnings if second argument
		   is declared as char** instead of const char**. */
		r = iconv (cd->icd, &s, &s_left, &d, &d_left);

		_vbi_iconv_close (cd);
		cd = NULL;

		if ((size_t) -1 != r)
			break;

		free (buffer);
		buffer = NULL;

		if (E2BIG != errno)
			return NULL;

		/* Buffer was too small, try again. */
	}

	if (NULL != out_size)
		*out_size = d - buffer;

	d16 = (uint16_t *) d;
	*d16 = 0;

#endif /* !HAVE_ICONV */

	return buffer;
}

/**
 * @internal
 * @param out_size If not @c NULL the actual number of bytes stored
 *   in the buffer (excluding the terminating NUL) will be stored here.
 * @param dst_codeset Character set name for iconv() conversion,
 *   for example "ISO-8859-1". When @c NULL the default is UTF-8.
 * @param src_codeset Character set name for iconv() conversion,
 *   for example "ISO-8859-1". When @c NULL the default is UTF-8.
 * @param src Source string, can be @c NULL.
 * @param src_size Number of bytes in the source string (excluding
 *   the terminating NUL, if any).
 * @param repl_char UCS-2 replacement for characters which are not
 *   representable in @a dst_codeset. When zero the function will
 *   fail if the source buffer contains unrepresentable characters.
 *
 * Converts a string with iconv() and writes the result with a
 * terminating NUL character into a newly allocated buffer. Note
 * the buffer may be larger than necessary.
 *
 * @returns
 * A pointer to the allocated buffer. You must free() the buffer
 * when it is no longer needed. The function returns @c NULL when
 * the conversion fails, when it runs out of memory or when @a src
 * is @c NULL.
 *
 * @since 0.2.23
 */
static char *
strndup_iconv			(unsigned long *	out_size,
				 const char *		dst_codeset,
				 const char *		src_codeset,
				 const char *		src,
				 unsigned long		src_size,
				 int			repl_char)
{
	if (same_codeset (dst_codeset, src_codeset)) {
		return strndup_identity (out_size, src, src_size);
	} else if (same_codeset (src_codeset, "UCS2")) {
		if (NULL != src && 0 != (src_size & 1)) {
			if (NULL != out_size)
				*out_size = 0;
			errno = EILSEQ;
			return NULL;
		}

		return strndup_iconv_from_ucs2 (out_size,
						dst_codeset,
						(const uint16_t *) src,
						src_size / 2,
						repl_char);
	} else {
		char *buffer;
		char *result;
		unsigned long size;

		buffer = strndup_iconv_to_ucs2 (&size,
						src_codeset,
						src,
						src_size);
		if (NULL == buffer)
			return NULL;

		if (same_codeset (dst_codeset, "UCS2"))
			return buffer;

		result = strndup_iconv_from_ucs2 (out_size,
						  dst_codeset,
						  (const uint16_t *) buffer,
						  size / 2,
						  repl_char);

		free (buffer);
		buffer = NULL;

		return result;
	}
}

/**
 * @ingroup Conv
 * @param dst_codeset Character set name for iconv() conversion,
 *   for example "ISO-8859-1". When @c NULL the default is UTF-8.
 * @param src_codeset Character set name for iconv() conversion,
 *   for example "ISO-8859-1". When @c NULL the default is UTF-8.
 * @param src Source buffer, can be @c NULL.
 * @param src_size Number of bytes in the source string (excluding
 *   the terminating NUL, if any).
 * @param repl_char UCS-2 replacement for characters which are not
 *   representable in @a dst_codeset. When zero the function will
 *   fail if the source buffer contains unrepresentable characters.
 *
 * Converts a string with iconv() and writes the result with a
 * terminating NUL character into a newly allocated buffer.
 *
 * @returns
 * A pointer to the allocated buffer. You must free() the buffer
 * when it is no longer needed. The function returns @c NULL when
 * the conversion fails, when it runs out of memory or when @a src
 * is @c NULL.
 *
 * @since 0.2.23
 */
char *
vbi_strndup_iconv		(const char *		dst_codeset,
				 const char *		src_codeset,
				 const char *		src,
				 unsigned long		src_size,
				 int			repl_char)
{
	char *result;
	char *buffer;
	unsigned long size;

	buffer = strndup_iconv (&size,
				dst_codeset,
				src_codeset,
				src, src_size,
				repl_char);
	if (NULL == buffer)
		return NULL;

	result = realloc (buffer, size + 4);
	if (NULL == result)
		result = buffer;

	return result;
}

/**
 * @ingroup internal
 * @param dst_codeset Character set name for iconv() conversion,
 *   for example "ISO-8859-1". When @c NULL the default is UTF-8.
 * @param src String of Closed Caption characters, can be @c NULL.
 * @param src_length Number of characters (= bytes) in the
 *   source string. Can be -1 if the @a src string is NUL terminated.
 * @param repl_char UCS-2 replacement for characters which are not
 *   representable in @a dst_codeset. When zero the function will
 *   fail if the source buffer contains unrepresentable characters.
 *
 * Converts a string of EIA 608 Closed Caption characters to another
 * format and stores the result with a terminating NUL in a newly
 * allocated buffer. The function ignores parity bits and the bytes
 * 0x00 ... 0x1F except two byte special and extended characters (e.g.
 * music note 0x11 0x37).
 *
 * @see vbi_caption_unicode()
 *
 * @returns
 * A pointer to the allocated buffer. You must free() the buffer
 * when it is no longer needed. The function returns @c NULL when the
 * source buffer contains invalid two byte characters, when the
 * conversion fails, when it runs out of memory or when @a src is
 * @c NULL.
 *
 * @since 0.2.23
 */
char *
vbi_strndup_iconv_caption	(const char *		dst_codeset,
				 const char *		src,
				 long			src_length,
				 int			repl_char)
{
	if (NULL == src)
		return NULL;

	if (src_length < 0)
		src_length = strlen (src);

	return vbi_strndup_iconv (dst_codeset, "EIA-608",
				  src, src_length, repl_char);
}

#if 3 == VBI_VERSION_MINOR

/**
 * @internal
 * @param out_size If not @c NULL the actual number of bytes stored
 *   in the buffer (excluding the terminating NUL) will be stored here.
 * @param cs Teletext character set descriptor.
 * @param src Source string in Teletext format, can be @c NULL.
 * @param src_length Number of characters (= bytes) in the source
 *   string. Can be -1 if the string is NUL terminated.
 *
 * Converts a string from Teletext to UCS-2 format and writes the
 * result with a terminating NUL character into a newly allocated
 * buffer. The function ignores parity bits and control codes
 * (0x00 ... 0x1F). Note the buffer may be larger than necessary.
 *
 * @returns
 * A pointer to the allocated buffer. You must free() the buffer
 * when it is no longer needed. The function returns @c NULL when
 * it runs out of memory, or when @a src is @c NULL.
 *
 * @since 0.2.23
 */
static char *
strndup_ucs2_teletext		(unsigned long *	out_size,
				 const vbi_ttx_charset *cs,
				 const char *		src,
				 unsigned long		src_length)
{
	uint16_t *d16;
	char *buffer;
	unsigned long i;

	assert (NULL != cs);

	if (NULL != out_size)
		*out_size = 0;

	if (unlikely (NULL == src))
		return NULL;

	if (src_length < 0)
		src_length = strlen ((const char *) src);

	buffer = vbi_malloc (src_length * 2 + 2);
	if (NULL == buffer)
		return NULL;

	d16 = (uint16_t *) buffer;

	for (i = 0; i < src_length; ++i) {
		unsigned int c = src[i] & 0x7F;		

		if (c >= 0x20) {
			*d16++ = vbi_teletext_unicode (cs->g0, cs->subset, c);
		}
	}

	if (NULL != out_size)
		*out_size = (char *) d16 - buffer;

	*d16 = 0;

	return buffer;
}

/**
 * @ingroup Conv
 * @param dst_codeset Character set name for iconv() conversion,
 *   for example "ISO-8859-1". When @c NULL the default is UTF-8.
 * @param cs Teletext character set descriptor.
 * @param src String of Teletext characters, can be @c NULL.
 * @param src_length Number of characters (= bytes) in the
 *   source string. Can be -1 if the @a src string is NUL terminated.
 * @param repl_char UCS-2 replacement for characters which are not
 *   representable in @a dst_codeset. When zero the function will
 *   fail if the source buffer contains unrepresentable characters.
 *
 * Converts a string of Teletext characters to @d dst_codeset and
 * stores the result with a terminating NUL in a newly allocated buffer.
 * The function ignores parity bits and control codes (0x00 ... 0x1F).
 *
 * @returns
 * A pointer to the allocated buffer. You must free() the buffer
 * when it is no longer needed. The function returns @c NULL when
 * the conversion fails, when it runs out of memory or when @a src
 * is @c NULL.
 *
 * @since 0.2.23
 */
char *
vbi_strndup_iconv_teletext	(const char *		dst_codeset,
				 const vbi_ttx_charset *cs,
				 const uint8_t *	src,
				 long			src_length,
				 int			repl_char)
{
	char *buffer;
	char *result;
	unsigned long size;

	buffer = strndup_ucs2_teletext (&size, cs, src, src_length);
	if (NULL == buffer)
		return NULL;

	if (same_codeset (dst_codeset, "UCS2")) {
		result = realloc (buffer, size + 2);
		if (NULL == result)
			result = buffer;
	} else {
		result = vbi_strndup_iconv (dst_codeset, "UCS-2",
					    buffer, size,
					    repl_char);
		free (buffer);
		buffer = NULL;
	}

	return result;
}

#endif /* 3 == VBI_VERSION_MINOR */

/**
 * @ingroup Conv
 * @param fp Output file.
 * @param dst_codeset Character set name for iconv() conversion,
 *   for example "ISO-8859-1". When @c NULL the default is UTF-8.
 * @param src_codeset Character set name for iconv() conversion,
 *   for example "ISO-8859-1". When @c NULL the default is UTF-8.
 * @param src Source buffer, can be @c NULL.
 * @param src_size Number of bytes in the source string (excluding
 *   the terminating NUL, if any).
 * @param repl_char UCS-2 replacement for characters which are not
 *   representable in @a dst_codeset. When zero the function will
 *   fail if the source buffer contains unrepresentable characters.
 *
 * Converts a string with iconv() and writes the result into the
 * given file.
 *
 * @returns
 * FALSE on failure.
 *
 * @since 0.2.23
 */
vbi_bool
vbi_fputs_iconv			(FILE *			fp,
				 const char *		dst_codeset,
				 const char *		src_codeset,
				 const char *		src,
				 unsigned long		src_size,
				 int			repl_char)
{
	char *buffer;
	unsigned long size;
	size_t actual;

	assert (NULL != fp);

	if (NULL == src || 0 == src_size)
		return TRUE;

	if (NULL == dst_codeset)
		dst_codeset = "UTF-8";

	if (NULL == src_codeset)
		src_codeset = "UTF-8";

	if (same_codeset (dst_codeset, src_codeset)) {
		return ((size_t) src_size
			== fwrite (src, 1, src_size, fp));
	}

	buffer = strndup_iconv (&size,
				dst_codeset,
				src_codeset,
				src, src_size,
				repl_char);
	if (NULL == buffer)
		return FALSE;

	actual = fwrite (buffer, 1, size, fp);

	free (buffer);
	buffer = NULL;

	return (actual == (size_t) size);
}

/**
 * @ingroup Conv
 * @param fp Output file.
 * @param dst_codeset Character set name for iconv() conversion,
 *   for example "ISO-8859-1". When @c NULL the default is UTF-8.
 * @param src Source string in UCS-2 format, can be @c NULL.
 * @param src_length Number of characters (not bytes) in the source
 *   string. Can be -1 if the string is NUL terminated.
 * @param repl_char UCS-2 replacement for characters which are not
 *   representable in @a dst_codeset. When zero the function will
 *   fail if the source buffer contains unrepresentable characters.
 *
 * Converts a UCS-2 string with iconv() and writes the result into
 * the given file.
 *
 * @returns
 * FALSE on failure.
 *
 * @since 0.2.23
 */
vbi_bool
vbi_fputs_iconv_ucs2		(FILE *			fp,
				 const char *		dst_codeset,
				 const uint16_t *	src,
				 long			src_length,
				 int			repl_char)
{
	if (NULL == src)
		return TRUE;

	if (src_length < 0)
		src_length = vbi_strlen_ucs2 (src);

	return vbi_fputs_iconv (fp, dst_codeset, "UCS-2",
				(const char *) src, src_length * 2,
				repl_char);
}

/**
 * @ingroup Conv
 * Returns the character encoding used by the current locale, for example
 * "UTF-8". @c NULL if unknown.
 *
 * Note applications must call
 * @code
 * setlocale (LC_ALL, "");
 * @endcode
 * to use the locale specified by the environment. The default C locale
 * uses ASCII encoding.
 *
 * @since 0.2.23
 */
const char *
vbi_locale_codeset		(void)
{
	const char *dst_format;

	dst_format = bind_textdomain_codeset (vbi3_intl_domainname, NULL);

	if (NULL == dst_format)
		dst_format = nl_langinfo (CODESET);

	return dst_format; /* may be NULL */
}


syntax highlighted by Code2HTML, v. 0.9.1