/* decomp.c - Character decomposition. Copyright (C) 1999 Tom Tromey The Gnome Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. The Gnome Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with the Gnome Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #include #include "unicode.h" #include "decomp.h" /* We cheat a bit and cast type values to (char *). We detect these using the &0xff trick. */ #define CC(Page, Char) \ (((((int) (combining_class_table[Page])) & 0xff) \ == ((int) combining_class_table[Page])) \ ? ((int) combining_class_table[Page]) \ : (combining_class_table[Page][Char])) #define COMBINING_CLASS(Char) \ (((Char) > (UNICODE_LAST_CHAR)) ? 0 : CC((Char) >> 8, (Char) & 0xff)) /* Size of array. */ #define asize(Ary) ((sizeof (Ary)) / sizeof ((Ary)[0])) /* Compute the canonical ordering of a string in-place. */ void unicode_canonical_ordering (unicode_char_t *string, size_t len) { size_t i; int swap = 1; while (swap) { int last; swap = 0; last = COMBINING_CLASS (string[0]); for (i = 0; i < len - 1; ++i) { int next = COMBINING_CLASS (string[i + 1]); if (next != 0 && last > next) { size_t j; /* Percolate item leftward through string. */ for (j = i; j > 0; --j) { unicode_char_t t; if (COMBINING_CLASS (string[j]) <= next) break; t = string[j + 1]; string[j + 1] = string[j]; string[j] = t; swap = 1; } /* We're re-entering the loop looking at the old character again. */ next = last; } last = next; } } } unicode_char_t * unicode_canonical_decomposition (unicode_char_t ch, size_t *result_len) { unicode_char_t *r = NULL; if (ch <= 0xffff) { int start = 0; int end = asize (decomp_table); while (start != end) { int half = (start + end) / 2; if (ch == decomp_table[half].ch) { /* Found it. */ int i, len; /* We store as a double-nul terminated string. */ for (len = 0; (decomp_table[half].expansion[len] || decomp_table[half].expansion[len + 1]); len += 2) ; /* We've counted twice as many bytes as there are characters. */ *result_len = len / 2; r = malloc (len / 2 * sizeof (unicode_char_t)); for (i = 0; i < len; i += 2) { r[i / 2] = (decomp_table[half].expansion[i] << 8 | decomp_table[half].expansion[i + 1]); } break; } else if (ch > decomp_table[half].ch) start = half; else end = half; } } if (r == NULL) { /* Not in our table. */ r = malloc (sizeof (unicode_char_t)); *r = ch; *result_len = 1; } /* Supposedly following the Unicode 2.1.9 table means that the decompositions come out in canonical order. I haven't tested this, but we rely on it here. */ return r; }