/* convert.c - Character set conversion framework. Copyright (C) 1999, 2000 Tom Tromey The Gnome Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. The Gnome Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with the Gnome Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #include #include #include #include #include "unicode.h" #include "convert.h" /* Linked list of all character sets. */ static unicode_encoding_t *encodings; void unicode_register_encoding (unicode_encoding_t *vec) { vec->next = encodings; encodings = vec; } static unicode_encoding_t * find_encoding (const char *name) { unicode_encoding_t *cs; for (cs = encodings; cs; cs = cs->next) { int i; for (i = 0; cs->names[i]; ++i) { if (! strcasecmp (cs->names[i], name)) return cs; } } return cs; } unicode_iconv_t unicode_iconv_open (const char *tocode, const char *fromcode) { unicode_iconv_t r = (unicode_iconv_t) malloc (sizeof (unicode_iconv_i)); if (r == NULL) { errno = ENOMEM; return (unicode_iconv_t) -1; } #ifdef UNICODE_USE_SYSTEM_ICONV r->u.native = iconv_open (tocode, fromcode); if (r->u.native != (iconv_t) -1) { r->type = unicode_iconv_type_native; return r; } #endif r->type = unicode_iconv_type_ours; r->u.ours.from = find_encoding (fromcode); r->u.ours.to = find_encoding (tocode); if (r->u.ours.from == NULL || r->u.ours.to == NULL) { free (r); errno = EINVAL; return (unicode_iconv_t) -1; } /* FIXME: how to pick the size? */ r->u.ours.valid = 0; r->u.ours.size = 1024; r->u.ours.buffer = (unicode_char_t *) malloc (r->u.ours.size * sizeof (unicode_char_t)); if (r->u.ours.buffer == NULL) { free (r); errno = ENOMEM; return (unicode_iconv_t) -1; } if (r->u.ours.from->init && ! r->u.ours.from->init (&r->u.ours.from_data)) { free (r->u.ours.buffer); free (r); errno = ENOMEM; return (unicode_iconv_t) -1; } if (r->u.ours.to->init && ! r->u.ours.to->init (&r->u.ours.to_data)) { if (r->u.ours.from->destroy) r->u.ours.from->destroy (&r->u.ours.from_data); free (r->u.ours.buffer); free (r); errno = ENOMEM; return (unicode_iconv_t) -1; } return r; } int unicode_iconv_close (unicode_iconv_t cd) { int r = 0; #ifdef UNICODE_USE_SYSTEM_ICONV if (cd->type == unicode_iconv_type_native) r = iconv_close (cd->u.native); else #endif { if (cd->u.ours.to->destroy) cd->u.ours.to->destroy (&cd->u.ours.to_data); if (cd->u.ours.from->destroy) cd->u.ours.from->destroy (&cd->u.ours.from_data); free (cd->u.ours.buffer); } free (cd); return r; } ssize_t unicode_iconv (unicode_iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { size_t count = 0; #ifdef UNICODE_USE_SYSTEM_ICONV if (cd->type == unicode_iconv_type_native) return iconv (cd->u.native, inbuf, inbytesleft, outbuf, outbytesleft); #endif if (inbuf == NULL || *inbuf == NULL) { if (outbuf != NULL && *outbytesleft > 0) { if (cd->u.ours.to->reset) return cd->u.ours.to->reset (&cd->u.ours.to_data, outbuf, outbytesleft); else return 0; } } if (outbuf == NULL || *outbytesleft <= 0) { errno = E2BIG; return -1; } /* FIXME: when converting to or from UCS4, we should just skip the intermediate buffer. */ while (*inbytesleft > 0) { size_t left = *inbytesleft; const char *here = *inbuf; enum unicode_read_result rr; unicode_char_t *buffer = cd->u.ours.buffer + cd->u.ours.valid; size_t charsleft = cd->u.ours.size - cd->u.ours.valid; enum unicode_write_result wr; /* Convert as many characters as possible into the intermediate buffer. */ rr = cd->u.ours.from->read (cd->u.ours.from_data, inbuf, inbytesleft, &buffer, &charsleft); switch (rr) { case unicode_read_ok: count += buffer - cd->u.ours.buffer - cd->u.ours.valid; cd->u.ours.valid = buffer - cd->u.ours.buffer; break; case unicode_read_incomplete: /* It doesn't matter whether we ever processed any user data -- it only matters whether we processed *all* of it. If we failed to process all of it we return EINVAL. */ *inbytesleft = left; *inbuf = here; errno = EINVAL; return -1; case unicode_read_error: *inbytesleft = left; *inbuf = here; errno = EILSEQ; return -1; default: assert (0); } /* Now convert as many characters as possible from the intermediate buffer into the output buffer. */ buffer = cd->u.ours.buffer; charsleft = cd->u.ours.valid; wr = cd->u.ours.to->write (cd->u.ours.to_data, &buffer, &charsleft, outbuf, outbytesleft); /* FIXME: circular buffer would be more efficient here. */ #ifdef HAVE_MEMMOVE memmove (cd->u.ours.buffer, buffer, charsleft * sizeof (unicode_char_t)); #else bcopy (buffer, cd->u.ours.buffer, charsleft * sizeof (unicode_char_t)); #endif cd->u.ours.valid = charsleft; switch (wr) { case unicode_write_ok: break; case unicode_write_more_room: errno = E2BIG; return -1; default: assert (0); } } return count; }