/* * libzvbi - Unicode conversion helper functions * * Copyright (C) 2003-2006 Michael H. Schimek * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* $Id: conv.c,v 1.3 2006/10/08 06:19:48 mschimek Exp $ */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #include #include "misc.h" #include "version.h" #include "intl-priv.h" #include "conv.h" extern const char _zvbi_intl_domainname[]; #ifdef HAVE_ICONV # include struct _vbi_iconv_t { iconv_t icd; uint16_t ucs2_repl[1]; }; #else struct _vbi_iconv_t { int dummy; }; #endif #ifdef HAVE_ICONV /** * @internal * @param cd Conversion object returned by vbi_iconv_open(). * @param dst Pointer to output buffer pointer, will be incremented * by the number of bytes written. * @param dst_left Space available in the output buffer, in bytes. * @param src Pointer to input buffer pointer, will be incremented * by the number of bytes read. * @param src_left Number of bytes left to read in the input buffer. * * Like iconv(), but converts unrepresentable characters to * @a cd->ucs2_repl. The source is assumed to be in UCS-2 format * (so we know how to skip unrepresentable characters). * * @returns * See iconv(). * * @since 0.2.23 */ static size_t iconv_ucs2 (vbi_iconv_t * cd, char ** dst, size_t * dst_left, const char ** src, size_t * src_left) { size_t r; assert (NULL != cd); assert (NULL != dst); assert (NULL != dst_left); assert (NULL != src); assert (NULL != src_left); r = 0; while (*src_left > 0) { const char *src1; size_t src_left1; /* iconv() source pointer may be defined as char **, should be const char ** or const void **. Ignore compiler warnings. */ r = iconv (cd->icd, (void *) src, src_left, dst, dst_left); if (likely ((size_t) -1 != r)) break; /* success */ if (EILSEQ != errno) break; if (0 == cd->ucs2_repl[0]) return -1; /* do not replace */ src1 = (const char *) cd->ucs2_repl; src_left1 = 2; r = iconv (cd->icd, (void *) &src1, &src_left1, dst, dst_left); if (unlikely ((size_t) -1 == r)) break; /* failed */ *src += 2; /* in UCS-2 format */ *src_left -= 2; } return r; } #endif /* HAVE_ICONV */ /** @internal */ vbi_bool _vbi_iconv_ucs2 (vbi_iconv_t * cd, char ** dst, unsigned long dst_size, const uint16_t * src, long src_length) { const char *s; size_t d_left; size_t s_left; size_t r; assert (NULL != cd); assert (NULL != dst); assert (NULL != *dst); if (NULL == src || 0 == src_length) return TRUE; #ifdef HAVE_ICONV if (src_length < 0) src_length = vbi_strlen_ucs2 (src) + 1; s = (const char *) src; s_left = src_length * 2; d_left = dst_size; r = iconv_ucs2 (cd, dst, &d_left, &s, &s_left); return ((size_t) -1 != r && 0 == s_left); #else return FALSE; #endif } /** * @internal * @param cd Conversion object returned by vbi_iconv_open(). * * Frees all resources associated with the conversion object. * * @since 0.2.23 */ void _vbi_iconv_close (vbi_iconv_t * cd) { if (NULL == cd) return; #ifdef HAVE_ICONV if ((iconv_t) -1 != cd->icd) { iconv_close (cd->icd); cd->icd = (iconv_t) -1; } free (cd); #endif } /** * @internal * @param dst_codeset Character set name for iconv() conversion, * for example "ISO-8859-1". When @c NULL the default is UTF-8. * @param src_codeset Character set name for iconv() conversion, * for example "ISO-8859-1". When @c NULL the default is UTF-8. * @param dst Pointer to output buffer pointer, which will be * incremented by the number of bytes written. Can be @c NULL. * @param dst_size Space available in the output buffer, in bytes. * @param repl_char UCS-2 replacement for characters which are not * representable in @a dst_codeset. When zero the _vbi_iconv_ucs2() * function will return @c FALSE instead. * * Helper function to convert text. A start byte sequence will be * stored in @a dst if necessary. * * @returns * @c NULL when the conversion is impossible. * * @since 0.2.23 */ vbi_iconv_t * _vbi_iconv_open (const char * dst_codeset, const char * src_codeset, char ** dst, unsigned long dst_size, int repl_char) { #ifdef HAVE_ICONV vbi_iconv_t *cd; if (NULL == dst_codeset) dst_codeset = "UTF-8"; if (NULL == src_codeset) src_codeset = "UCS-2"; cd = malloc (sizeof (*cd)); if (NULL == cd) return NULL; cd->icd = iconv_open (dst_codeset, src_codeset); if ((iconv_t) -1 == cd->icd) { free (cd); return NULL; } cd->ucs2_repl[0] = repl_char; if (NULL != dst) { size_t d_left; size_t n; d_left = dst_size; /* Write out the byte sequence to get into the initial state if this is necessary. */ n = iconv (cd->icd, NULL, NULL, dst, &d_left); if ((size_t) -1 == n) { _vbi_iconv_close (cd); return NULL; } } return cd; #else /* !HAVE_ICONV */ return NULL; #endif } /** @internal */ static vbi_bool same_codeset (const char * dst_codeset, const char * src_codeset) { assert (NULL != dst_codeset); assert (NULL != src_codeset); for (;;) { char d, s; d = *dst_codeset; s = *src_codeset; if (d == s) { if (0 == d) return TRUE; ++dst_codeset; ++src_codeset; } else if ('-' == d || '_' == d) { ++dst_codeset; } else if ('-' == s || '_' == s) { ++src_codeset; } else { return FALSE; } } } /** * @ingroup Conv * @param src NUL-terminated UCS-2 string. * * Counts the characters in the string, up to and excluding * the terminating NUL. * * @since 0.2.23 */ unsigned long vbi_strlen_ucs2 (const uint16_t * src) { const uint16_t *s; if (NULL == src) return 0; for (s = src; 0 != *s; ++s) ; return s - src; } /** * @internal * @param out_size If not @c NULL the actual number of bytes stored * in the buffer (excluding the terminating NUL) will be stored here. * @param src Source string. * @param src_size Number of bytes in the source string (excluding * the terminating NUL, if any). * * Copies a string into a newly allocated buffer, with a terminating * NUL (4 bytes). * * @returns * A pointer to the allocated buffer. You must free() the buffer * when it is no longer needed. The function returns @c NULL when * it runs out of memory, or when @a src is @c NULL. * * @since 0.2.23 */ static char * strndup_identity (unsigned long * out_size, const char * src, unsigned long src_size) { char *buffer; buffer = vbi_malloc (src_size + 4); if (NULL == buffer) { if (NULL != out_size) *out_size = 0; return NULL; } memcpy (buffer, src, src_size); memset (buffer + src_size, 0, 4); if (NULL != out_size) *out_size = src_size; return buffer; } /** * @internal * @param out_size If not @c NULL the actual number of bytes stored * in the buffer (excluding the terminating NUL) will be stored here. * @param src Source string in UCS-2 format, can be @c NULL. * @param src_length Number of characters (not bytes) in the source * string. Can be -1 if the string is NUL terminated. * * Converts a string from UCS-2 to UTF-8 format and writes the * result with a terminating NUL character into a newly allocated * buffer. Note the buffer may be larger than necessary. * * @returns * A pointer to the allocated buffer. You must free() the buffer * when it is no longer needed. The function returns @c NULL when * it runs out of memory, or when @a src is @c NULL. * * @since 0.2.23 */ static char * strndup_utf8_ucs2 (unsigned long * out_size, const uint16_t * src, long src_length) { char *d; char *buffer; const uint16_t *end; if (NULL != out_size) *out_size = 0; if (unlikely (NULL == src)) return NULL; if (src_length < 0) src_length = vbi_strlen_ucs2 (src); buffer = vbi_malloc (src_length * 3 + 1); if (NULL == buffer) return NULL; d = buffer; for (end = src + src_length; src < end; ++src) { unsigned int c = *src; if (c < 0x80) { *d++ = c; } else if (c < 0x800) { d[0] = 0xC0 | (c >> 6); d[1] = 0x80 | (c & 0x3F); d += 2; } else { d[0] = 0xE0 | (c >> 12); d[1] = 0x80 | ((c >> 6) & 0x3F); d[2] = 0x80 | (c & 0x3F); d += 3; } } if (NULL != out_size) *out_size = d - buffer; *d = 0; return buffer; } /** * @internal * @param out_size If not @c NULL the actual number of bytes stored * in the buffer (excluding the terminating NUL) will be stored here. * @param dst_codeset Character set name for iconv() conversion, * for example "ISO-8859-1". When @c NULL the default is UTF-8. * @param src Source string in UCS-2 format, can be @c NULL. * @param src_length Number of characters (not bytes) in the source * string. Can be -1 if the string is NUL terminated. * @param repl_char UCS-2 replacement for characters which are not * representable in @a dst_codeset. When zero the function will * fail if the source buffer contains unrepresentable characters. * * Converts a UCS-2 string with iconv() and writes the result with a * terminating NUL character into a newly allocated buffer. Note the * buffer may be larger than necessary. * * @returns * A pointer to the allocated buffer. You must free() the buffer * when it is no longer needed. The function returns @c NULL when * the conversion fails, when it runs out of memory or when @a src * is @c NULL. * * @since 0.2.23 */ static char * strndup_iconv_from_ucs2 (unsigned long * out_size, const char * dst_codeset, const uint16_t * src, long src_length, int repl_char) { char *d; uint32_t *d32; char *buffer; unsigned long buffer_size; if (NULL == dst_codeset || same_codeset (dst_codeset, "UTF8")) { return strndup_utf8_ucs2 (out_size, src, src_length); } else if (same_codeset (dst_codeset, "UCS2")) { return strndup_identity (out_size, (const char *) src, src_length * 2); } if (NULL != out_size) *out_size = 0; if (unlikely (NULL == src)) return NULL; buffer = NULL; buffer_size = 0; #ifdef HAVE_ICONV if (unlikely (src_length < 0)) src_length = vbi_strlen_ucs2 (src); for (;;) { vbi_iconv_t *cd; const char *s; size_t d_left; size_t s_left; size_t r; d_left = src_length * 4; if (buffer_size > 0) d_left = buffer_size * 2; d = vbi_malloc (d_left); if (unlikely (NULL == d)) { errno = ENOMEM; return NULL; } buffer = d; buffer_size = d_left; cd = _vbi_iconv_open (dst_codeset, "UCS-2", &d, d_left, repl_char); if (NULL == cd) { free (buffer); buffer = NULL; return NULL; } d_left = buffer_size - (d - buffer) - 4 /* room for a UCS-4 NUL */; s = (const char *) src; s_left = src_length * 2; r = iconv_ucs2 (cd, &d, &d_left, &s, &s_left); _vbi_iconv_close (cd); cd = NULL; if (likely ((size_t) -1 != r)) break; free (buffer); buffer = NULL; if (E2BIG != errno) return NULL; /* Buffer was too small, try again. */ } if (NULL != out_size) *out_size = d - buffer; d32 = (uint32_t *) d; *d32 = 0; #endif /* HAVE_ICONV */ return buffer; } /** * @ingroup Conv * @param dst_codeset Character set name for iconv() conversion, * for example "ISO-8859-1". When @c NULL the default is UTF-8. * @param src Source string in UCS-2 format, can be @c NULL. * @param src_length Number of characters (not bytes) in the source * string. Can be -1 if the string is NUL terminated. * @param repl_char UCS-2 replacement for characters which are not * representable in @a dst_codeset. When zero the function will * fail if the source buffer contains unrepresentable characters. * * Converts a UCS-2 string with iconv() and writes the result with a * terminating NUL character into a newly allocated buffer. * * @returns * A pointer to the allocated buffer. You must free() the buffer * when it is no longer needed. The function returns @c NULL when * the conversion fails, when it runs out of memory or when @a src * is @c NULL. * * @since 0.2.23 */ char * vbi_strndup_iconv_ucs2 (const char * dst_codeset, const uint16_t * src, long src_length, int repl_char) { char *buffer; char *result; unsigned long size; buffer = strndup_iconv_from_ucs2 (&size, dst_codeset, src, src_length, repl_char); if (NULL == buffer) return NULL; result = realloc (buffer, size + 4); if (NULL == result) result = buffer; return result; } /** * @internal * @param out_size If not @c NULL the actual number of bytes stored * in the buffer (excluding the terminating NUL) will be stored here. * @param src Source string in EIA 608 (Closed Caption) format, can * be @c NULL. * @param src_length Number of characters (= bytes) in the source * string. Can be -1 if the string is NUL terminated. * @param to_upper Convert the string to upper case. * * Converts a string from EIA 608 to UCS-2 format and writes the * result with a terminating NUL character into a newly allocated * buffer. The function ignores parity bits and the bytes 0x00 ... * 0x1F except two byte special and extended characters (e.g. * music note 0x11 0x37). Note the buffer may be larger than necessary. * * @returns * A pointer to the allocated buffer. You must free() the buffer * when it is no longer needed. The function returns @c NULL when the * source buffer contains invalid two byte characters, when * it runs out of memory, or when @a src is @c NULL. * * @since 0.2.23 */ static char * strndup_ucs2_eia608 (unsigned long * out_size, const char * src, long src_length, vbi_bool to_upper) { uint16_t *d16; char *buffer; long i; if (NULL != out_size) *out_size = 0; if (unlikely (NULL == src)) return NULL; if (src_length < 0) src_length = strlen (src); buffer = vbi_malloc (src_length * 2 + 2); if (unlikely (NULL == buffer)) return NULL; d16 = (uint16_t *) buffer; for (i = 0; i < src_length; ++i) { unsigned int c = src[i] & 0x7F; switch (c) { case 0x11 ... 0x13: case 0x19 ... 0x1B: if (unlikely (i + 1 >= src_length)) goto ilseq; c = ((c * 256) + src[++i]) & 0x777F; c = vbi_caption_unicode (c, to_upper); if (unlikely (0 == c)) goto ilseq; *d16++ = c; break; case 0x20 ... 0x7F: *d16++ = vbi_caption_unicode (c, to_upper); break; default: break; } } if (NULL != out_size) *out_size = (char *) d16 - buffer; *d16 = 0; return buffer; ilseq: free (buffer); buffer = NULL; errno = EILSEQ; return NULL; } /** * @internal * @param out_size If not @c NULL the actual number of bytes stored * in the buffer (excluding the terminating NUL) will be stored here. * @param src_codeset Character set name for iconv() conversion, * for example "ISO-8859-1". When @c NULL the default is UTF-8. * @param src Source string, can be @c NULL. * @param src_size Number of bytes in the source string (excluding * the terminating NUL, if any). * * Converts a string from @a src_codeset to UCS-2 and writes the result * with a terminating NUL character into a newly allocated buffer. Note * the buffer may be larger than necessary. * * @returns * A pointer to the allocated buffer. You must free() the buffer * when it is no longer needed. The function returns @c NULL when * the conversion fails, when it runs out of memory or when @a src * is @c NULL. * * @since 0.2.23 */ static char * strndup_iconv_to_ucs2 (unsigned long * out_size, const char * src_codeset, const char * src, unsigned long src_size) { char *d; uint16_t *d16; char *buffer; unsigned long buffer_size; if (NULL == src_codeset) { src_codeset = "UTF-8"; } else if (same_codeset (src_codeset, "UCS2")) { return strndup_identity (out_size, src, src_size); } else if (same_codeset (src_codeset, "EIA608")) { return strndup_ucs2_eia608 (out_size, src, src_size, FALSE); } if (NULL != out_size) *out_size = 0; if (unlikely (NULL == src)) return NULL; buffer = NULL; buffer_size = 0; #ifdef HAVE_ICONV for (;;) { vbi_iconv_t *cd; const char *s; size_t d_left; size_t s_left; size_t r; d_left = 16384; if (buffer_size > 0) d_left = buffer_size * 2; d = vbi_malloc (d_left); if (NULL == d) { errno = ENOMEM; return NULL; } buffer = d; buffer_size = d_left; cd = _vbi_iconv_open ("UCS-2", src_codeset, &d, d_left, /* repl_char */ 0); if (NULL == cd) { free (buffer); buffer = NULL; return NULL; } d_left = buffer_size - (d - buffer) - 2 /* room for a UCS-2 NUL */; s = src; s_left = src_size; /* Ignore compiler warnings if second argument is declared as char** instead of const char**. */ r = iconv (cd->icd, &s, &s_left, &d, &d_left); _vbi_iconv_close (cd); cd = NULL; if ((size_t) -1 != r) break; free (buffer); buffer = NULL; if (E2BIG != errno) return NULL; /* Buffer was too small, try again. */ } if (NULL != out_size) *out_size = d - buffer; d16 = (uint16_t *) d; *d16 = 0; #endif /* !HAVE_ICONV */ return buffer; } /** * @internal * @param out_size If not @c NULL the actual number of bytes stored * in the buffer (excluding the terminating NUL) will be stored here. * @param dst_codeset Character set name for iconv() conversion, * for example "ISO-8859-1". When @c NULL the default is UTF-8. * @param src_codeset Character set name for iconv() conversion, * for example "ISO-8859-1". When @c NULL the default is UTF-8. * @param src Source string, can be @c NULL. * @param src_size Number of bytes in the source string (excluding * the terminating NUL, if any). * @param repl_char UCS-2 replacement for characters which are not * representable in @a dst_codeset. When zero the function will * fail if the source buffer contains unrepresentable characters. * * Converts a string with iconv() and writes the result with a * terminating NUL character into a newly allocated buffer. Note * the buffer may be larger than necessary. * * @returns * A pointer to the allocated buffer. You must free() the buffer * when it is no longer needed. The function returns @c NULL when * the conversion fails, when it runs out of memory or when @a src * is @c NULL. * * @since 0.2.23 */ static char * strndup_iconv (unsigned long * out_size, const char * dst_codeset, const char * src_codeset, const char * src, unsigned long src_size, int repl_char) { if (same_codeset (dst_codeset, src_codeset)) { return strndup_identity (out_size, src, src_size); } else if (same_codeset (src_codeset, "UCS2")) { if (NULL != src && 0 != (src_size & 1)) { if (NULL != out_size) *out_size = 0; errno = EILSEQ; return NULL; } return strndup_iconv_from_ucs2 (out_size, dst_codeset, (const uint16_t *) src, src_size / 2, repl_char); } else { char *buffer; char *result; unsigned long size; buffer = strndup_iconv_to_ucs2 (&size, src_codeset, src, src_size); if (NULL == buffer) return NULL; if (same_codeset (dst_codeset, "UCS2")) return buffer; result = strndup_iconv_from_ucs2 (out_size, dst_codeset, (const uint16_t *) buffer, size / 2, repl_char); free (buffer); buffer = NULL; return result; } } /** * @ingroup Conv * @param dst_codeset Character set name for iconv() conversion, * for example "ISO-8859-1". When @c NULL the default is UTF-8. * @param src_codeset Character set name for iconv() conversion, * for example "ISO-8859-1". When @c NULL the default is UTF-8. * @param src Source buffer, can be @c NULL. * @param src_size Number of bytes in the source string (excluding * the terminating NUL, if any). * @param repl_char UCS-2 replacement for characters which are not * representable in @a dst_codeset. When zero the function will * fail if the source buffer contains unrepresentable characters. * * Converts a string with iconv() and writes the result with a * terminating NUL character into a newly allocated buffer. * * @returns * A pointer to the allocated buffer. You must free() the buffer * when it is no longer needed. The function returns @c NULL when * the conversion fails, when it runs out of memory or when @a src * is @c NULL. * * @since 0.2.23 */ char * vbi_strndup_iconv (const char * dst_codeset, const char * src_codeset, const char * src, unsigned long src_size, int repl_char) { char *result; char *buffer; unsigned long size; buffer = strndup_iconv (&size, dst_codeset, src_codeset, src, src_size, repl_char); if (NULL == buffer) return NULL; result = realloc (buffer, size + 4); if (NULL == result) result = buffer; return result; } /** * @ingroup internal * @param dst_codeset Character set name for iconv() conversion, * for example "ISO-8859-1". When @c NULL the default is UTF-8. * @param src String of Closed Caption characters, can be @c NULL. * @param src_length Number of characters (= bytes) in the * source string. Can be -1 if the @a src string is NUL terminated. * @param repl_char UCS-2 replacement for characters which are not * representable in @a dst_codeset. When zero the function will * fail if the source buffer contains unrepresentable characters. * * Converts a string of EIA 608 Closed Caption characters to another * format and stores the result with a terminating NUL in a newly * allocated buffer. The function ignores parity bits and the bytes * 0x00 ... 0x1F except two byte special and extended characters (e.g. * music note 0x11 0x37). * * @see vbi_caption_unicode() * * @returns * A pointer to the allocated buffer. You must free() the buffer * when it is no longer needed. The function returns @c NULL when the * source buffer contains invalid two byte characters, when the * conversion fails, when it runs out of memory or when @a src is * @c NULL. * * @since 0.2.23 */ char * vbi_strndup_iconv_caption (const char * dst_codeset, const char * src, long src_length, int repl_char) { if (NULL == src) return NULL; if (src_length < 0) src_length = strlen (src); return vbi_strndup_iconv (dst_codeset, "EIA-608", src, src_length, repl_char); } #if 3 == VBI_VERSION_MINOR /** * @internal * @param out_size If not @c NULL the actual number of bytes stored * in the buffer (excluding the terminating NUL) will be stored here. * @param cs Teletext character set descriptor. * @param src Source string in Teletext format, can be @c NULL. * @param src_length Number of characters (= bytes) in the source * string. Can be -1 if the string is NUL terminated. * * Converts a string from Teletext to UCS-2 format and writes the * result with a terminating NUL character into a newly allocated * buffer. The function ignores parity bits and control codes * (0x00 ... 0x1F). Note the buffer may be larger than necessary. * * @returns * A pointer to the allocated buffer. You must free() the buffer * when it is no longer needed. The function returns @c NULL when * it runs out of memory, or when @a src is @c NULL. * * @since 0.2.23 */ static char * strndup_ucs2_teletext (unsigned long * out_size, const vbi_ttx_charset *cs, const char * src, unsigned long src_length) { uint16_t *d16; char *buffer; unsigned long i; assert (NULL != cs); if (NULL != out_size) *out_size = 0; if (unlikely (NULL == src)) return NULL; if (src_length < 0) src_length = strlen ((const char *) src); buffer = vbi_malloc (src_length * 2 + 2); if (NULL == buffer) return NULL; d16 = (uint16_t *) buffer; for (i = 0; i < src_length; ++i) { unsigned int c = src[i] & 0x7F; if (c >= 0x20) { *d16++ = vbi_teletext_unicode (cs->g0, cs->subset, c); } } if (NULL != out_size) *out_size = (char *) d16 - buffer; *d16 = 0; return buffer; } /** * @ingroup Conv * @param dst_codeset Character set name for iconv() conversion, * for example "ISO-8859-1". When @c NULL the default is UTF-8. * @param cs Teletext character set descriptor. * @param src String of Teletext characters, can be @c NULL. * @param src_length Number of characters (= bytes) in the * source string. Can be -1 if the @a src string is NUL terminated. * @param repl_char UCS-2 replacement for characters which are not * representable in @a dst_codeset. When zero the function will * fail if the source buffer contains unrepresentable characters. * * Converts a string of Teletext characters to @d dst_codeset and * stores the result with a terminating NUL in a newly allocated buffer. * The function ignores parity bits and control codes (0x00 ... 0x1F). * * @returns * A pointer to the allocated buffer. You must free() the buffer * when it is no longer needed. The function returns @c NULL when * the conversion fails, when it runs out of memory or when @a src * is @c NULL. * * @since 0.2.23 */ char * vbi_strndup_iconv_teletext (const char * dst_codeset, const vbi_ttx_charset *cs, const uint8_t * src, long src_length, int repl_char) { char *buffer; char *result; unsigned long size; buffer = strndup_ucs2_teletext (&size, cs, src, src_length); if (NULL == buffer) return NULL; if (same_codeset (dst_codeset, "UCS2")) { result = realloc (buffer, size + 2); if (NULL == result) result = buffer; } else { result = vbi_strndup_iconv (dst_codeset, "UCS-2", buffer, size, repl_char); free (buffer); buffer = NULL; } return result; } #endif /* 3 == VBI_VERSION_MINOR */ /** * @ingroup Conv * @param fp Output file. * @param dst_codeset Character set name for iconv() conversion, * for example "ISO-8859-1". When @c NULL the default is UTF-8. * @param src_codeset Character set name for iconv() conversion, * for example "ISO-8859-1". When @c NULL the default is UTF-8. * @param src Source buffer, can be @c NULL. * @param src_size Number of bytes in the source string (excluding * the terminating NUL, if any). * @param repl_char UCS-2 replacement for characters which are not * representable in @a dst_codeset. When zero the function will * fail if the source buffer contains unrepresentable characters. * * Converts a string with iconv() and writes the result into the * given file. * * @returns * FALSE on failure. * * @since 0.2.23 */ vbi_bool vbi_fputs_iconv (FILE * fp, const char * dst_codeset, const char * src_codeset, const char * src, unsigned long src_size, int repl_char) { char *buffer; unsigned long size; size_t actual; assert (NULL != fp); if (NULL == src || 0 == src_size) return TRUE; if (NULL == dst_codeset) dst_codeset = "UTF-8"; if (NULL == src_codeset) src_codeset = "UTF-8"; if (same_codeset (dst_codeset, src_codeset)) { return ((size_t) src_size == fwrite (src, 1, src_size, fp)); } buffer = strndup_iconv (&size, dst_codeset, src_codeset, src, src_size, repl_char); if (NULL == buffer) return FALSE; actual = fwrite (buffer, 1, size, fp); free (buffer); buffer = NULL; return (actual == (size_t) size); } /** * @ingroup Conv * @param fp Output file. * @param dst_codeset Character set name for iconv() conversion, * for example "ISO-8859-1". When @c NULL the default is UTF-8. * @param src Source string in UCS-2 format, can be @c NULL. * @param src_length Number of characters (not bytes) in the source * string. Can be -1 if the string is NUL terminated. * @param repl_char UCS-2 replacement for characters which are not * representable in @a dst_codeset. When zero the function will * fail if the source buffer contains unrepresentable characters. * * Converts a UCS-2 string with iconv() and writes the result into * the given file. * * @returns * FALSE on failure. * * @since 0.2.23 */ vbi_bool vbi_fputs_iconv_ucs2 (FILE * fp, const char * dst_codeset, const uint16_t * src, long src_length, int repl_char) { if (NULL == src) return TRUE; if (src_length < 0) src_length = vbi_strlen_ucs2 (src); return vbi_fputs_iconv (fp, dst_codeset, "UCS-2", (const char *) src, src_length * 2, repl_char); } /** * @ingroup Conv * Returns the character encoding used by the current locale, for example * "UTF-8". @c NULL if unknown. * * Note applications must call * @code * setlocale (LC_ALL, ""); * @endcode * to use the locale specified by the environment. The default C locale * uses ASCII encoding. * * @since 0.2.23 */ const char * vbi_locale_codeset (void) { const char *dst_format; dst_format = bind_textdomain_codeset (vbi3_intl_domainname, NULL); if (NULL == dst_format) dst_format = nl_langinfo (CODESET); return dst_format; /* may be NULL */ }