/* decomp.c - Character decomposition.

   Copyright (C) 1999 Tom Tromey

   The Gnome Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   The Gnome Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with the Gnome Library; see the file COPYING.LIB.  If not,
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  */

#include <config.h>

#include <stdlib.h>

#include "unicode.h"
#include "decomp.h"

/* We cheat a bit and cast type values to (char *).  We detect these
   using the &0xff trick.  */
#define CC(Page, Char) \
  (((((int) (combining_class_table[Page])) & 0xff) \
    == ((int) combining_class_table[Page])) \
   ? ((int) combining_class_table[Page]) \
   : (combining_class_table[Page][Char]))

#define COMBINING_CLASS(Char) \
     (((Char) > (UNICODE_LAST_CHAR)) ? 0 : CC((Char) >> 8, (Char) & 0xff))

/* Size of array.  */
#define asize(Ary) ((sizeof (Ary)) / sizeof ((Ary)[0]))

/* Compute the canonical ordering of a string in-place.  */
void
unicode_canonical_ordering (unicode_char_t *string,
			    size_t len)
{
  size_t i;
  int swap = 1;

  while (swap)
    {
      int last;
      swap = 0;
      last = COMBINING_CLASS (string[0]);
      for (i = 0; i < len - 1; ++i)
	{
	  int next = COMBINING_CLASS (string[i + 1]);
	  if (next != 0 && last > next)
	    {
	      size_t j;
	      /* Percolate item leftward through string.  */
	      for (j = i; j > 0; --j)
		{
		  unicode_char_t t;
		  if (COMBINING_CLASS (string[j]) <= next)
		    break;
		  t = string[j + 1];
		  string[j + 1] = string[j];
		  string[j] = t;
		  swap = 1;
		}
	      /* We're re-entering the loop looking at the old
		 character again.  */
	      next = last;
	    }
	  last = next;
	}
    }
}

unicode_char_t *
unicode_canonical_decomposition (unicode_char_t ch,
				 size_t *result_len)
{
  unicode_char_t *r = NULL;

  if (ch <= 0xffff)
    {
      int start = 0;
      int end = asize (decomp_table);
      while (start != end)
	{
	  int half = (start + end) / 2;
	  if (ch == decomp_table[half].ch)
	    {
	      /* Found it.  */
	      int i, len;
	      /* We store as a double-nul terminated string.  */
	      for (len = 0; (decomp_table[half].expansion[len]
			     || decomp_table[half].expansion[len + 1]);
		   len += 2)
		;

	      /* We've counted twice as many bytes as there are
		 characters.  */
	      *result_len = len / 2;
	      r = malloc (len / 2 * sizeof (unicode_char_t));

	      for (i = 0; i < len; i += 2)
		{
		  r[i / 2] = (decomp_table[half].expansion[i] << 8
			      | decomp_table[half].expansion[i + 1]);
		}
	      break;
	    }
	  else if (ch > decomp_table[half].ch)
	    start = half;
	  else
	    end = half;
	}
    }

  if (r == NULL)
    {
      /* Not in our table.  */
      r = malloc (sizeof (unicode_char_t));
      *r = ch;
      *result_len = 1;
    }

  /* Supposedly following the Unicode 2.1.9 table means that the
     decompositions come out in canonical order.  I haven't tested
     this, but we rely on it here.  */
  return r;
}