static char rcsid[] = "@(#)$Id: unidata.c,v 1.31 2006/04/09 07:37:07 hurtta Exp $"; /****************************************************************************** * The Elm (ME+) Mail System - $Revision: 1.31 $ $State: Exp $ * * Author: Kari Hurtta (was hurtta+elm@ozone.FMI.FI) *****************************************************************************/ #include "headers.h" #include "mmaputil.h" #include "s_me.h" #include "unidata.h" #include #ifndef ANSI_C extern int errno; #endif DEBUG_VAR(Debug,__FILE__,"charset"); static unsigned char * us_str P_((char *str)); static unsigned char * us_str(str) char *str; { return (unsigned char *)str; } #ifdef MMAP #include #ifndef MAP_FAILED #define MAP_FAILED (void *)(-1) #endif #endif #define TEXT_MAGIC "ELMME+\n" /* TEXT_MAGIC is 8 bytes (including \0) */ struct unidata_header_1 { uint8 text_magic[sizeof TEXT_MAGIC]; uint16 magic; uint16 header_size; uint16 page_size; uint16 cell_size; uint16 decomp_cell_size; uint16 list_pagecount; uint16 udata_pagecount; uint16 decomp_header_pagecount; uint16 decomp_storage_pagecount; uint16 comp_list_pagecount; }; /* Values for flags -field on struct char_cell_1 -structure */ #define FLAGS_have_upper 0x0001 /* upper -field is valid */ #define FLAGS_have_lower 0x0002 /* lower -field is valid */ #define FLAGS_have_title 0x0004 /* title -field is valid */ #define FLAGS_have_decimal 0x0008 /* decimal -field is valid */ #define FLAGS_have_decomp 0x0010 /* decomp -filed is valid */ struct char_cell_1 { uint16 chartype; /* Field 2 -- General Category */ uint16 class; /* Field 3 -- Canonical Combining Classes */ uint16 direction; /* Field 4 -- Bidirectional Category */ /* Field 9 -- mirrored */ uint16 decomp; /* Field 5 -- Character Decomposition */ uint16 decimal; /* Field 6 -- Decimal digit value */ /* Field 7 -- Digit value -- not implemented (yet?) */ /* Field 8 -- Numeric value -- not implemented (yet?) */ uint16 flags; uint16 upper; /* Field 12 -- Upper case equivalent */ uint16 lower; /* Field 12 -- Lower case equivalent */ uint16 title; /* FIeld 12 -- Title case equivalent */ }; static struct char_cell_1 unassigned_cell_buffer = { CHARTYPE_Cn, 0, 0, 0, 0, 0, 0, 0 }; struct decomp_cell_1 { uint16 next_comp; /* next decomp_cell_1 with same first char */ uint16 value; /* backpointer to character */ uint16 storage; /* pointer/index to decomp storage */ uint8 type_byte; uint8 len_byte; /* length of decomposed data on storage */ }; static struct decomp_cell_1 unassigned_decomp_buffer = { 0, 0, 0, 0 }; #define UNIDATA_magic 0xFB00 enum unidata_format { unidata_bad = 0, unidata_format_1 = UNIDATA_magic }; struct unidata_format_1 { int byte_swapped; int mmapped; struct unidata_header_1 *header; size_t header_length; uint16 *vector; size_t vector_length; off_t vector_offset; int vector_count; struct char_cell_1 *cells; size_t cells_length; off_t cells_offset; int cells_count; struct decomp_cell_1 *decomp_cells; size_t decomp_cells_length; off_t decomp_cells_offset; int decomp_cells_count; uint16 *decomp_storage; size_t decomp_storage_length; off_t decomp_storage_offset; int decomp_storage_count; uint16 *comp_vector; size_t comp_vector_length; off_t comp_vector_offset; int comp_vector_count; }; /* This macro requires ANSI C preprocessor */ #if __STDC__ #define SYSCALL(x) { int ret_=x; int err_=errno;\ if (ret_ == -1) \ DPRINT(Debug,1,(&Debug,"unidata: syscall %s failed: errno=%d (%s)\n", \ #x,err_,error_description(err_))); } #else #define SYSCALL(x) x #endif static void free_unidata_1 P_((struct unidata_format_1 *v)); static void free_unidata_1(v) struct unidata_format_1 *v; { if (!v->mmapped) { /* malloced */ if (v->header_length) free(v->header); if (v->vector_length) free(v->vector); if (v->cells_length) free(v->cells); if (v->decomp_cells_length) free(v->decomp_cells); if (v->decomp_storage_length) free(v->decomp_storage); if (v->comp_vector_length) free(v->comp_vector); } else { #ifdef MMAP if (v->header_length) SYSCALL(munmap((void *)v->header,v->header_length)); if (v->vector_length) SYSCALL(munmap((void *)v->vector,v->vector_length)); if (v->cells_length) SYSCALL(munmap((void *)v->cells,v->cells_length)); if (v->decomp_cells_length) SYSCALL(munmap((void *)v->decomp_cells,v->decomp_cells_length)); if (v->decomp_storage_length) SYSCALL(munmap((void *)v->decomp_storage, v->decomp_storage_length)); if (v->comp_vector_length) SYSCALL(munmap((void *)v->comp_vector,v->comp_vector_length)); #else panic("UNIDATA PANIC",__FILE__,__LINE__,"free_unidata_1", "unidata: Internal error -- mmap not available",0); #endif v->mmapped = 0; } v->header = NULL; v->vector = NULL; v->cells = NULL; v->decomp_cells = NULL; v->decomp_storage = NULL; v->comp_vector = NULL; v->header_length = 0; v->vector_length = 0; v->cells_length = 0; v->decomp_cells_length = 0; v->decomp_storage_length = 0; v->comp_vector_length = 0; } #define MAX_VECTOR 0x10000 /* Allocate maximun needed space */ static void malloc_unidata_1 P_((struct unidata_format_1 *v)); static void malloc_unidata_1(v) struct unidata_format_1 *v; { #ifdef MMAP uint16 pagesize = getpagesize(); #else uint16 pagesize = 512; #endif uint16 header_pages, vector_pages, cells_pages; uint16 decomp_cells_pages, decomp_storage_pages, comp_vector_pages; v->byte_swapped = 0; /* generated -- not swapped */ v->mmapped = 0; /* malloced -- not mapped */ v->header_length = sizeof (struct unidata_header_1); v->header = safe_malloc(v->header_length); header_pages = pages(v->header_length,pagesize); v->vector_length = MAX_VECTOR * sizeof (uint16); v->vector = safe_malloc(v->vector_length); vector_pages = pages(v->vector_length,pagesize); v->vector_count = 0; v->vector_offset = pagesize * (off_t) header_pages; v->cells_length = MAX_VECTOR * sizeof (struct char_cell_1); v->cells = safe_malloc(v->cells_length); cells_pages = pages(v->cells_length,pagesize); v->cells_count = 0; v->cells_offset = v->vector_offset + pagesize * (off_t) vector_pages; /* cells_offset need to be recalculated when real need is known */ v->decomp_cells_length = MAX_VECTOR * sizeof (struct decomp_cell_1); v->decomp_cells = safe_malloc(v->decomp_cells_length); decomp_cells_pages = pages(v->decomp_cells_length,pagesize); v->decomp_cells_count = 0; v->decomp_cells_offset = v->cells_offset + pagesize * (off_t) decomp_cells_pages; /* decomp_cells_offset need to be recalculated when real need is known */ v->decomp_storage_length = MAX_VECTOR * sizeof (uint16); v->decomp_storage = safe_malloc(v->decomp_storage_length); decomp_storage_pages = pages(v->decomp_storage_length,pagesize); v->decomp_storage_count = 0; v->decomp_storage_offset = v->decomp_cells_offset + pagesize * (off_t) decomp_storage_pages; /* decomp_storage_offset need to be recalculated when real need is known */ v->comp_vector_length = MAX_VECTOR * sizeof (uint16); v->comp_vector = safe_malloc(v->comp_vector_length); comp_vector_pages = pages(v->comp_vector_length,pagesize); v->comp_vector_count = 0; v->comp_vector_offset = v->decomp_storage_offset + pagesize * (off_t) comp_vector_pages; /* comp_vector_offset need to be recalculated when real need is known */ strfcpy((char *)v->header->text_magic, TEXT_MAGIC,sizeof v->header->text_magic); v->header->magic = unidata_format_1; v->header->header_size = sizeof (struct unidata_header_1); v->header->page_size = pagesize; v->header->cell_size = sizeof (struct char_cell_1); v->header->decomp_cell_size = sizeof (struct decomp_cell_1); v->header->list_pagecount = vector_pages; v->header->udata_pagecount = cells_pages; v->header->decomp_header_pagecount = decomp_cells_pages; v->header->decomp_storage_pagecount = decomp_storage_pages; v->header->comp_list_pagecount = comp_vector_pages; } #define READ_VALUE(x,byte_swapped) (byte_swapped ? SWAP(x) : x) static void dump_header P_((struct unidata_format_1 *v, int byte_swapped, CONST char *filename)); static int write_unidata_1 P_((struct unidata_format_1 *v, int fd, CONST char *filename)); static int write_unidata_1(v, fd, filename) struct unidata_format_1 *v; int fd; CONST char *filename; { uint16 pagesize = READ_VALUE(v->header->page_size,v->byte_swapped); uint16 comp_vector_pages = READ_VALUE(v->header->comp_list_pagecount, v->byte_swapped); if (!write_range(fd,0,v->header_length,(unsigned char*)v->header)) { int err = errno; DPRINT(Debug,1,(&Debug, "unidata: write_range failed: errno=%d (%s)\n", \ err,error_description(err))); lib_error(CATGETS(elm_msg_cat, MeSet, MeFileNotWriteable, "File %.50s is not writeable: %s"), filename, error_description(err)); return 0; } if (!write_range(fd,v->vector_offset,v->vector_length, (unsigned char *)(v->vector))) { int err = errno; DPRINT(Debug,1,(&Debug, "unidata: write_range failed: errno=%d (%s)\n", \ err,error_description(err))); lib_error(CATGETS(elm_msg_cat, MeSet, MeFileNotWriteable, "File %.50s is not writeable: %s"), filename, error_description(err)); return 0; } if (!write_range(fd,v->cells_offset,v->cells_length, (unsigned char *)(v->cells))) { int err = errno; DPRINT(Debug,1,(&Debug, "unidata: write_range failed: errno=%d (%s)\n", \ err,error_description(err))); lib_error(CATGETS(elm_msg_cat, MeSet, MeFileNotWriteable, "File %.50s is not writeable: %s"), filename, error_description(err)); return 0; } if (!write_range(fd,v->decomp_cells_offset,v->decomp_cells_length, (unsigned char *)(v->decomp_cells))) { int err = errno; DPRINT(Debug,1,(&Debug, "unidata: write_range failed: errno=%d (%s)\n", \ err,error_description(err))); lib_error(CATGETS(elm_msg_cat, MeSet, MeFileNotWriteable, "File %.50s is not writeable: %s"), filename, error_description(err)); return 0; } if (!write_range(fd,v->decomp_storage_offset,v->decomp_storage_length, (unsigned char *)(v->decomp_storage))) { int err = errno; DPRINT(Debug,1,(&Debug, "unidata: write_range failed: errno=%d (%s)\n", \ err,error_description(err))); lib_error(CATGETS(elm_msg_cat, MeSet, MeFileNotWriteable, "File %.50s is not writeable: %s"), filename, error_description(err)); return 0; } if (!write_range(fd,v->comp_vector_offset,v->comp_vector_length, (unsigned char *)(v->comp_vector))) { int err = errno; DPRINT(Debug,1,(&Debug, "unidata: write_range failed: errno=%d (%s)\n", \ err,error_description(err))); lib_error(CATGETS(elm_msg_cat, MeSet, MeFileNotWriteable, "File %.50s is not writeable: %s"), filename, error_description(err)); return 0; } if (v->comp_vector_length < comp_vector_pages * (size_t) pagesize) { /* Be sure that file is big enough */ if (!write_range(fd, v->comp_vector_offset + comp_vector_pages * (off_t) pagesize -1, 1, us_str(" "))) { int err = errno; DPRINT(Debug,1,(&Debug, "unidata: write_range failed: errno=%d (%s)\n", \ err,error_description(err))); lib_error(CATGETS(elm_msg_cat, MeSet, MeFileNotWriteable, "File %.50s is not writeable: %s"), filename, error_description(err)); return 0; } } dump_header(v,v->byte_swapped,filename); return 1; } static void dump_header (v,byte_swapped,filename) struct unidata_format_1 *v; int byte_swapped; CONST char *filename; { if (v->header) { DPRINT(Debug,40,(&Debug,"============ Header dump: %-15s =======\n", filename)); DPRINT(Debug,40,(&Debug,"BYTE SWAPPED = %d\n",byte_swapped)); DPRINT(Debug,40,(&Debug, "magic %04X header_size %04X page_size %04X\n", READ_VALUE(v->header->magic,v->byte_swapped), READ_VALUE(v->header->header_size,v->byte_swapped), READ_VALUE(v->header->page_size,v->byte_swapped))); DPRINT(Debug,40,(&Debug, "cell_size %04X decomp_cell_size %04X\n", READ_VALUE(v->header->cell_size,v->byte_swapped), READ_VALUE(v->header->decomp_cell_size,v->byte_swapped))); DPRINT(Debug,40,(&Debug, "list_pagecount %04X udata_pagecount %04X\n", READ_VALUE(v->header->list_pagecount,v->byte_swapped), READ_VALUE(v->header->udata_pagecount,v->byte_swapped))); DPRINT(Debug,40,(&Debug, "decomp_header_pagecount %04X decomp_storage_pagecount %04X\n", READ_VALUE(v->header->decomp_header_pagecount,v->byte_swapped), READ_VALUE(v->header->decomp_storage_pagecount,v->byte_swapped))); DPRINT(Debug,40,(&Debug, "comp_list_pagecount %04X\n", READ_VALUE(v->header->comp_list_pagecount,v->byte_swapped))); DPRINT(Debug,40,(&Debug,"============ End dump: %-15s =======\n", filename)); } } static int calculate_sizes P_((struct unidata_format_1 *v, CONST char *filename)); static int calculate_sizes (v,filename) struct unidata_format_1 *v; CONST char *filename; { uint16 pagesize = READ_VALUE(v->header->page_size,v->byte_swapped); uint16 header_pages, vector_pages, cells_pages; uint16 decomp_cells_pages, decomp_storage_pages, comp_vector_pages; if (READ_VALUE(v->header->header_size,v->byte_swapped) != sizeof (struct unidata_header_1) || READ_VALUE(v->header->cell_size,v->byte_swapped) != sizeof (struct char_cell_1) || READ_VALUE(v->header->decomp_cell_size,v->byte_swapped) != sizeof (struct decomp_cell_1)) { lib_error(CATGETS(elm_msg_cat, MeSet, MeCorruptedMapfile, "Mapfile %.30s is corrupted"), filename); DPRINT(Debug,1,(&Debug,"calculate_sizes failed #1 -- sizeof check\n")); return 0; } header_pages = pages(v->header_length,pagesize); v->vector_offset = pagesize * (off_t) header_pages; vector_pages = READ_VALUE(v->header->list_pagecount,v->byte_swapped); v->cells_offset = v->vector_offset + pagesize * (off_t) vector_pages; cells_pages = READ_VALUE(v->header->udata_pagecount,v->byte_swapped); v->decomp_cells_offset = v->cells_offset + pagesize * (off_t) cells_pages; decomp_cells_pages = READ_VALUE(v->header->decomp_header_pagecount, v->byte_swapped); v->decomp_storage_offset = v->decomp_cells_offset + pagesize * (off_t) decomp_cells_pages; decomp_storage_pages = READ_VALUE(v->header->decomp_storage_pagecount, v->byte_swapped); v->comp_vector_offset = v->decomp_storage_offset + pagesize * (off_t) decomp_storage_pages; comp_vector_pages = READ_VALUE(v->header->comp_list_pagecount, v->byte_swapped); v->vector_length = pagesize * (off_t) vector_pages; /* May give too big value */ v->vector_count = v->vector_length / sizeof (uint16); v->cells_length = pagesize * (off_t) cells_pages; /* May give too big value */ v->cells_count = v->cells_length / sizeof (struct char_cell_1); v->decomp_cells_length = pagesize * (off_t) decomp_cells_pages; /* May give too big value */ v->decomp_cells_count = v->decomp_cells_length / sizeof (struct decomp_cell_1); v->decomp_storage_length = pagesize * (off_t) decomp_storage_pages; /* May give too big value */ v->decomp_storage_count = v->decomp_storage_length / sizeof (uint16); v->comp_vector_length = pagesize * (off_t) comp_vector_pages; /* May give too big value */ v->comp_vector_count = v->comp_vector_length / sizeof (uint16); return 1; } static void debug_output P_((struct unidata_format_1 *v, int status, int pagesize, char *str, CONST char *filename)); static void debug_output(v,status,pagesize,str,filename) struct unidata_format_1 *v; int status; int pagesize; char *str; CONST char *filename; { DPRINT(Debug,1,(&Debug, "%s: %s: status=%d, vector_count=%d, cells_count=%d, pagesize=%d\n", filename,str,status,v->vector_count,v->cells_count, pagesize)); DPRINT(Debug,1,(&Debug, "%s: %s: header_length=%d, vector_length=%d, cells_length=%d\n", filename,str,v->header_length,v->vector_length,v->cells_length)); DPRINT(Debug,1,(&Debug, "%s: %s: decomp_cells_count=%d, decomp_storage_count=%d, comp_vector_count=%d\n", filename,str,v->decomp_cells_count,v->decomp_storage_count, v->comp_vector_count)); DPRINT(Debug,1,(&Debug, "%s: %s: decomp_cells_length=%d, decomp_storage_length=%d, comp_vector_length=%d\n", filename,str,v->decomp_cells_length,v->decomp_storage_length, v->comp_vector_length)); if (v->header) { DPRINT(Debug,5,(&Debug, "%s: %s: swapped=%d, page_size=%d, vector_pages=%d, cells_pages=%d\n", filename,str,v->byte_swapped, READ_VALUE(v->header->page_size, v->byte_swapped), READ_VALUE(v->header->list_pagecount, v->byte_swapped), READ_VALUE(v->header->udata_pagecount, v->byte_swapped))); DPRINT(Debug,5,(&Debug, "%s: %s: decomp_cells_pages=%d, decomp_storage_pages=%d, comp_vector_pages=%d\n", filename,str, READ_VALUE(v->header->decomp_header_pagecount, v->byte_swapped), READ_VALUE(v->header->decomp_storage_pagecount, v->byte_swapped), READ_VALUE(v->header->comp_list_pagecount, v->byte_swapped))); } } static int read_unidata_1 P_((struct unidata_format_1 *v, int fd, int byte_swapped, CONST char *filename)); static int read_unidata_1(v, fd, byte_swapped, filename) struct unidata_format_1 *v; int fd; int byte_swapped; CONST char *filename; { int status = 0; uint16 pagesize = 0; v->byte_swapped = byte_swapped; v->mmapped = 0; v->header_length = sizeof (struct unidata_header_1); v->header = safe_malloc(v->header_length); if (!read_range(fd,0,v->header_length,(unsigned char*)v->header)) { int err = errno; lib_error(CATGETS(elm_msg_cat, MeSet, MeFileUnreadable, "File %.50s is unreadable: %s"), filename, error_description(err)); status = 0; goto fail_1; } pagesize = READ_VALUE(v->header->page_size,v->byte_swapped); dump_header(v,byte_swapped,filename); if (!calculate_sizes(v,filename)) { status = 0; goto fail_1; } v->vector = safe_malloc(v->vector_length); if (!read_range(fd,v->vector_offset,v->vector_length, (unsigned char *)(v->vector))) { int err = errno; lib_error(CATGETS(elm_msg_cat, MeSet, MeFileUnreadable, "File %.50s is unreadable: %s"), filename, error_description(err)); status = 0; goto fail_2; } v->cells = safe_malloc(v->cells_length); if (!read_range(fd,v->cells_offset,v->cells_length, (unsigned char *)(v->cells))) { int err = errno; lib_error(CATGETS(elm_msg_cat, MeSet, MeFileUnreadable, "File %.50s is unreadable: %s"), filename, error_description(err)); status = 0; goto fail_3; } v->decomp_cells = safe_malloc(v->decomp_cells_length); if (!read_range(fd,v->decomp_cells_offset,v->decomp_cells_length, (unsigned char *)(v->decomp_cells))) { int err = errno; lib_error(CATGETS(elm_msg_cat, MeSet, MeFileUnreadable, "File %.50s is unreadable: %s"), filename, error_description(err)); status = 0; goto fail_4; } v->decomp_storage = safe_malloc(v->decomp_storage_length); if (!read_range(fd,v->decomp_storage_offset,v->decomp_storage_length, (unsigned char *)(v->decomp_storage))) { int err = errno; lib_error(CATGETS(elm_msg_cat, MeSet, MeFileUnreadable, "File %.50s is unreadable: %s"), filename, error_description(err)); status = 0; goto fail_5; } v->comp_vector = safe_malloc(v->comp_vector_length); if (!read_range(fd,v->comp_vector_offset,v->comp_vector_length, (unsigned char *)(v->comp_vector))) { int err = errno; lib_error(CATGETS(elm_msg_cat, MeSet, MeFileUnreadable, "File %.50s is unreadable: %s"), filename, error_description(err)); status = 0; goto fail_6; } status = 1; if (!status) { fail_6: free(v->comp_vector); v->comp_vector = NULL; v->comp_vector_length = 0; fail_5: free(v->decomp_storage); v->decomp_storage = NULL; v->decomp_storage_length = 0; fail_4: free(v->decomp_cells); v->decomp_cells = NULL; v->decomp_cells_length = 0; fail_3: free(v->cells); v->cells = NULL; v->cells_length = 0; fail_2: free(v->vector); v->vector = NULL; v->vector_length = 0; fail_1: free(v->header); v->header = NULL; v->header_length = 0; } debug_output(v,status,pagesize,"read",filename); return status; } static int mmap_unidata_1 P_((struct unidata_format_1 *v, int fd, int byte_swapped, CONST char *filename)); static int mmap_unidata_1(v, fd, byte_swapped, filename) struct unidata_format_1 *v; int fd; int byte_swapped; CONST char *filename; { int status = 0; #ifdef MMAP uint16 real_pagesize = getpagesize(); uint16 pagesize = 0; void * result; v->byte_swapped = byte_swapped; v->mmapped = 1; v->header_length = sizeof (struct unidata_header_1); result = mmap(0,v->header_length,PROT_READ,MAP_SHARED,fd,0); if (result == MAP_FAILED) { int err = errno; lib_error(CATGETS(elm_msg_cat, MeSet, MeFailedMMap, "Failed to mmap %.50s: %30s"), filename, error_description(err)); v->header = NULL; v->header_length = 0; status = 0; goto fail_1; } v->header = result; pagesize = READ_VALUE(v->header->page_size,v->byte_swapped); if (pagesize < real_pagesize || pagesize % real_pagesize != 0) { lib_error(CATGETS(elm_msg_cat, MeSet, MeIncompatiblePagesize, "Pagesize of %.50s (%d) incompatible with system pagesize (%d)"), filename, pagesize,real_pagesize); status = 0; goto fail_2; } dump_header(v,byte_swapped,filename); if (!calculate_sizes(v,filename)) { status = 0; goto fail_2; } result = mmap(0,v->vector_length,PROT_READ,MAP_SHARED,fd, v->vector_offset); if (result == MAP_FAILED) { int err = errno; lib_error(CATGETS(elm_msg_cat, MeSet, MeFailedMMap, "Failed to mmap %.50s: %30s"), filename, error_description(err)); v->vector = NULL; v->vector_length = 0; status = 0; goto fail_2; } v->vector = result; result = mmap(0,v->cells_length,PROT_READ,MAP_SHARED,fd, v->cells_offset); if (result == MAP_FAILED) { int err = errno; lib_error(CATGETS(elm_msg_cat, MeSet, MeFailedMMap, "Failed to mmap %.50s: %30s"), filename, error_description(err)); v->cells = NULL; v->cells_length = 0; status = 0; goto fail_3; } v->cells = result; result = mmap(0,v->decomp_cells_length,PROT_READ,MAP_SHARED,fd, v->decomp_cells_offset); if (result == MAP_FAILED) { int err = errno; lib_error(CATGETS(elm_msg_cat, MeSet, MeFailedMMap, "Failed to mmap %.50s: %30s"), filename, error_description(err)); v->decomp_cells = NULL; v->decomp_cells_length = 0; status = 0; goto fail_4; } v->decomp_cells = result; result = mmap(0,v->decomp_storage_length,PROT_READ,MAP_SHARED,fd, v->decomp_storage_offset); if (result == MAP_FAILED) { int err = errno; lib_error(CATGETS(elm_msg_cat, MeSet, MeFailedMMap, "Failed to mmap %.50s: %30s"), filename, error_description(err)); v->decomp_storage = NULL; v->decomp_storage_length = 0; status = 0; goto fail_5; } v->decomp_storage = result; result = mmap(0,v->comp_vector_length,PROT_READ,MAP_SHARED,fd, v->comp_vector_offset); if (result == MAP_FAILED) { int err = errno; lib_error(CATGETS(elm_msg_cat, MeSet, MeFailedMMap, "Failed to mmap %.50s: %30s"), filename, error_description(err)); v->cells = NULL; v->cells_length = 0; status = 0; goto fail_6; } v->comp_vector = result; status = 1; if (!status) { fail_6: SYSCALL(munmap((void *)v->decomp_storage,v->decomp_storage_length)); v->decomp_storage = NULL; v->decomp_storage_length = 0; fail_5: SYSCALL(munmap((void *)v->decomp_cells,v->decomp_cells_length)); v->decomp_cells = NULL; v->decomp_cells_length = 0; fail_4: SYSCALL(munmap((void *)v->cells,v->cells_length)); v->cells = NULL; v->cells_length = 0; fail_3: SYSCALL(munmap((void *)v->vector,v->vector_length)); v->vector = NULL; v->vector_length = 0; fail_2: SYSCALL(munmap((void *)v->header,v->header_length)); v->header = NULL; v->header_length = 0; } fail_1: debug_output(v,status,pagesize,"mmap",filename); #endif return status; } static void trim_unidata_1 P_((struct unidata_format_1 *v)); static void trim_unidata_1(v) struct unidata_format_1 *v; { /* If malloced, truncate according of usage */ if (!v->mmapped && !v->byte_swapped) { uint16 pagesize = v->header->page_size; int n = v->vector_count; if (n < 1) n = 1; v->vector_length = n * sizeof (uint16); v->header->list_pagecount = pages(v->vector_length,pagesize); v->vector = safe_realloc(v->vector,v->vector_length); n = v->cells_count; if (n < 1) n = 1; v->cells_length = n * sizeof (struct unidata_header_1); v->header->udata_pagecount = pages(v->cells_length,pagesize); v->cells_offset = v->vector_offset + pagesize * (off_t) (v->header->list_pagecount); v->cells = safe_realloc(v->cells,v->cells_length); n = v->decomp_cells_count; if (n < 1) n = 1; v->decomp_cells_length = n * sizeof (struct decomp_cell_1); v->header->decomp_header_pagecount = pages(v->decomp_cells_length, pagesize); v->decomp_cells_offset = v->cells_offset + pagesize * (off_t) (v->header->udata_pagecount); v->decomp_cells = safe_realloc(v->decomp_cells, v->decomp_cells_length); n = v->decomp_storage_count; if (n < 1) n = 1; v->decomp_storage_length = n * sizeof (uint16); v->header->decomp_storage_pagecount = pages(v->decomp_storage_length, pagesize); v->decomp_storage_offset = v->decomp_cells_offset + pagesize * (off_t) (v->header->decomp_header_pagecount); v->decomp_storage = safe_realloc(v->decomp_storage, v->decomp_storage_length); n = v->comp_vector_count; if (n < 1) n = 1; v->comp_vector_length = n * sizeof (uint16); v->header->comp_list_pagecount = pages(v->comp_vector_length, pagesize); v->comp_vector_offset = v->decomp_storage_offset + pagesize * (off_t) (v->header->decomp_storage_pagecount); v->comp_vector = safe_realloc(v->comp_vector, v->comp_vector_length); } /* zero fill unused area */ if (v->vector_count * sizeof (uint16) < v->vector_length) { /* bzero is defined on hdrs/defs.h * notice pointer arithmetic */ bzero((void *)(v->vector + v->vector_count), v->vector_length - v->vector_count * sizeof (uint16)); } if (v->cells_count * sizeof (struct unidata_header_1) < v->cells_length) { /* bzero is defined on hdrs/defs.h * notice pointer arithmetic */ bzero((void *)(v->cells + v->cells_count), v->cells_length - v->cells_count * sizeof (struct unidata_header_1)); } if (v->decomp_cells_count * sizeof (struct decomp_cell_1) < v->decomp_cells_length) { /* bzero is defined on hdrs/defs.h * notice pointer arithmetic */ bzero((void *)(v->decomp_cells + v->decomp_cells_count), v->decomp_cells_length - v->decomp_cells_count * sizeof (struct decomp_cell_1)); } if (v->decomp_storage_count * sizeof (uint16) < v->decomp_storage_length) { /* bzero is defined on hdrs/defs.h * notice pointer arithmetic */ bzero((void *)(v->decomp_storage + v->decomp_storage_count), v->decomp_storage_length - v->decomp_storage_count * sizeof (uint16)); } if (v->comp_vector_count * sizeof (uint16) < v->comp_vector_length) { /* bzero is defined on hdrs/defs.h * notice pointer arithmetic */ bzero((void *)(v->comp_vector + v->comp_vector_count), v->comp_vector_length - v->comp_vector_count * sizeof (uint16)); } } struct unidata_mapped_data { enum unidata_format format; union { struct unidata_format_1 f1; } v; }; void free_unidata(v) struct unidata_mapped_data * v; { switch(v->format) { case unidata_format_1: free_unidata_1(&(v->v.f1)); break; } v->format = unidata_bad; free(v); } int write_unidata(v,filename) struct unidata_mapped_data *v; CONST char *filename; { char *tmpname; int fd, err; int status = 0; if (0 != access(filename,WRITE_ACCESS)) { int err = errno; DPRINT(Debug,1,(&Debug, "unidata: access failed: errno=%d (%s)\n", \ err,error_description(err))); if (err != ENOENT) { lib_error(CATGETS(elm_msg_cat, MeSet, MeFileNotWriteable, "File %.50s is not writeable: %s"), filename, error_description(err)); return 0; } } tmpname = safe_strdup(filename); tmpname = strmcat(tmpname,".N"); err = can_open(tmpname,"w"); if (err) { DPRINT(Debug,1,(&Debug, "unidata: can_open failed: code=%d (%s)\n", \ err,error_description(err))); lib_error(CATGETS(elm_msg_cat, MeSet, MeFileNotWriteable, "File %.50s is not writeable: %s"), tmpname, error_description(err)); status = 0; goto fail; } fd = open(tmpname,O_WRONLY|O_CREAT|O_EXCL,0644); if (-1 == fd) { int err = errno; DPRINT(Debug,1,(&Debug, "unidata: open failed: errno=%d (%s)\n", \ err,error_description(err))); lib_error(CATGETS(elm_msg_cat, MeSet, MeFileNotWriteable, "File %.50s is not writeable: %s"), tmpname, error_description(err)); status = 0; goto fail; } switch(v->format) { case unidata_format_1: status = write_unidata_1(&(v->v.f1),fd,tmpname); break; } if (-1 == close(fd)) { int err = errno; DPRINT(Debug,1,(&Debug, "unidata: close failed: errno=%d (%s)\n", \ err,error_description(err))); lib_error(CATGETS(elm_msg_cat, MeSet, MeFileNotWriteable, "File %.50s is not writeable: %s"), tmpname, error_description(err)); status = 0; goto fail; } if (status) { if (-1 == rename(tmpname,filename)) { int err = errno; lib_error(CATGETS(elm_msg_cat, MeSet, MeFileNotRenamed, "Failed to rename temporary file to %.50s: %.30s"), filename, error_description(err)); status = 0; goto fail; } } fail: if (tmpname) free(tmpname); return status; } int get_unidata(ptr,filename) struct unidata_mapped_data **ptr; CONST char *filename; { struct unidata_mapped_data * res = *ptr; int status = 0; int fd, err; uint8 text_magic[sizeof TEXT_MAGIC]; uint16 magic; int n; err = can_open(filename,"r"); if (err) { lib_error(CATGETS(elm_msg_cat, MeSet, MeFileUnreadable, "File %.50s is unreadable: %s"), filename, error_description(err)); return 0; } fd = open(filename,O_RDONLY); if (-1 == fd) { int err = errno; lib_error(CATGETS(elm_msg_cat, MeSet, MeFileUnreadable, "File %.50s is unreadable: %s"), filename, error_description(err)); return 0; } n = read(fd,text_magic, sizeof text_magic); if (n != 8) { /* text_magic is assumed to be 8 bytes or there is error */ lib_error(CATGETS(elm_msg_cat, MeSet, MeCorruptedMapfile, "Mapfile %.30s is corrupted"), filename); close(fd); DPRINT(Debug,1,(&Debug,"get_unidata failed #1 -- text_magic read\n")); return 0; } if (0 != memcmp(text_magic,TEXT_MAGIC,8)) { lib_error(CATGETS(elm_msg_cat, MeSet, MeCorruptedMapfile, "Mapfile %.30s is corrupted"), filename); close(fd); DPRINT(Debug,1,(&Debug, "get_unidata failed #1 -- text_magic is wrong: %.8s\n", text_magic)); return 0; } n = read(fd,(void *)&magic, sizeof magic); if (n != 2) { /* magic is assumed to be 2 bytes or * there is 'Configure' error... */ lib_error(CATGETS(elm_msg_cat, MeSet, MeCorruptedMapfile, "Mapfile %.30s is corrupted"), filename); close(fd); DPRINT(Debug,1,(&Debug,"get_unidata failed #1 -- magic read\n")); return 0; } if (res) free_unidata(res); res = NULL; res = safe_malloc(sizeof (struct unidata_mapped_data)); res->format = unidata_bad; if (unidata_format_1 == magic) { res->format = unidata_format_1; if (!mmap_unidata_1(&(res->v.f1),fd,0,filename) && !read_unidata_1(&(res->v.f1),fd,0,filename)) { status = 0; } else status = 1; } else if (unidata_format_1 == SWAP(magic)) { res->format = unidata_format_1; if (!mmap_unidata_1(&(res->v.f1),fd,1,filename) && !read_unidata_1(&(res->v.f1),fd,1,filename)) { status = 0; } else status = 1; } else { lib_error(CATGETS(elm_msg_cat, MeSet, MeCorruptedMapfile, "Mapfile %.30s is corrupted"), filename); DPRINT(Debug,1,(&Debug, "get_unidata failed #2 -- magic check\n")); status = 0; } if (!status) { if (res) free(res); res = NULL; } close(fd); *ptr = res; return status; } struct codes { int code; char * name; }; static int lookup_code P_((struct codes *codes, char buffer[])); static int lookup_code(codes,buffer) struct codes *codes; char buffer[]; { int ptr; for (ptr = 0; codes[ptr].name; ptr++) if (0 == strcmp(codes[ptr].name,buffer)) break; return ptr; } static int lookup_from_code P_((struct codes *codes, int code)); static int lookup_from_code(codes,code) struct codes *codes; int code; { int ptr; for (ptr = 0; codes[ptr].name; ptr++) if (codes[ptr].code == code) break; return ptr; } static struct codes categories[] = { /* Mark */ { CHARTYPE_Mn, "Mn" }, /* Mark, Non-Spacing */ { CHARTYPE_Mc, "Mc" }, /* Mark, Spacing Combining */ { CHARTYPE_Me, "Me" }, /* Mark, Enclosing */ /* Number */ { CHARTYPE_Nd, "Nd" }, /* Number, Decimal Digit */ { CHARTYPE_Nl, "Nl" }, /* Number, Letter */ { CHARTYPE_No, "No" }, /* Number, Other */ /* Separator */ { CHARTYPE_Zs, "Zs" }, /* Separator, Space */ { CHARTYPE_Zl, "Zl" }, /* Separator, Line */ { CHARTYPE_Zp, "Zp" }, /* Separator, Paragraph */ /* Other */ { CHARTYPE_Cc, "Cc" }, /* Other, Control */ { CHARTYPE_Cf, "Cf" }, /* Other, Format */ { CHARTYPE_Cs, "Cs" }, /* Other, Surrogate */ { CHARTYPE_Co, "Co" }, /* Other, Private Use */ { CHARTYPE_Cn, "Cn" }, /* Other, Not Assigned */ /* Letter */ { CHARTYPE_Lu, "Lu" }, /* Letter, Uppercase */ { CHARTYPE_Ll, "Ll" }, /* Letter, Lowercase */ { CHARTYPE_Lt, "Lt" }, /* Letter, Titlecase */ { CHARTYPE_Lm, "Lm" }, /* Letter, Modifier */ { CHARTYPE_Lo, "Lo" }, /* Letter, Other */ /* Punctuation */ { CHARTYPE_Pc, "Pc" }, /* Punctuation, Connector */ { CHARTYPE_Pd, "Pd" }, /* Punctuation, Dash */ { CHARTYPE_Ps, "Ps" }, /* Punctuation, Open */ { CHARTYPE_Pe, "Pe" }, /* Punctuation, Close */ { CHARTYPE_Po, "Po" }, /* Punctuation, Other */ /* Symbol */ { CHARTYPE_Sm, "Sm" }, /* Symbol, Math */ { CHARTYPE_Sc, "Sc" }, /* Symbol, Currency */ { CHARTYPE_Sk, "Sk" }, /* Symbol, Modifier */ { CHARTYPE_So, "So" }, /* Symbol, Other */ { 0, NULL } }; static struct codes bidir_categories[] = { /* Strong types */ { DIRECTION_L, "L" }, /* Left-Right */ { DIRECTION_R, "R" }, /* Right-Left */ /* Weak types */ { DIRECTION_EN, "EN" }, /* European Number */ { DIRECTION_ES, "ES" }, /* European Number Separator */ { DIRECTION_ET, "ET" }, /* European Number Terminator */ { DIRECTION_AN, "AN" }, /* Arabic Number */ { DIRECTION_CS, "CS" }, /* Common Number Separator */ /* Separators */ { DIRECTION_B, "B" }, /* Block Separator */ { DIRECTION_S, "S" }, /* Segment Separator */ /* Neutrals */ { DIRECTION_WS, "WS" }, /* Whitespace */ { DIRECTION_ON, "ON" }, /* Other Neutrals */ /* Not Assigned */ { 0, "" }, /* Not assigned */ { 0, NULL } }; static struct codes mirrored[] = { { 0, "N" }, { DIRECTION_mirrored, "Y" }, { 0, NULL } }; static struct codes decomp_compat[] = { { DECOMP_font, "" }, /* A font variant (e.g. a blackletter form). */ { DECOMP_noBreak, "" }, /* A no-break version of a space or hyphen. */ { DECOMP_initial, "" }, /* An initial presentation form (Arabic). */ { DECOMP_medial, "" }, /* A medial presentation form (Arabic). */ { DECOMP_final, "" }, /* A final presentation form (Arabic). */ { DECOMP_isolated, "" }, /* An isolated presentation form (Arabic). */ { DECOMP_circle, "" }, /* An encircled form. */ { DECOMP_super, "" }, /* A superscript form. */ { DECOMP_sub, "" }, /* A subscript form. */ { DECOMP_vertical, "" }, /* A vertical layout presentation form. */ { DECOMP_wide, "" }, /* A wide (or zenkaku) compatibility character. */ { DECOMP_narrow, "" }, /* A narrow (or hankaku) compatibility character. */ { DECOMP_small, "" }, /* A small variant form (CNS compatibility). */ { DECOMP_square, "" }, /* A CJK squared font variant. */ { DECOMP_compat, "" }, /* Otherwise unspecified compatibility character.*/ { DECOMP_fraction, "" }, /* ??? */ { 0, NULL } }; static int write_unidata_text_1 P_((struct unidata_format_1 *v, FILE *f)); static int write_unidata_text_1(v, f) struct unidata_format_1 *v; FILE *f; { int status = 0; int code; for (code = 0; code <= 0xFFFF; code++) { if (code < v -> vector_count) { uint16 ptr = v->vector[code]; if (ptr < v -> cells_count) { struct char_cell_1 *C = &(v->cells[ptr]); uint16 chartype = READ_VALUE(C->chartype,v->byte_swapped); uint16 class = READ_VALUE(C->class,v->byte_swapped); uint16 direction = READ_VALUE(C->direction,v->byte_swapped); uint16 decomp = READ_VALUE(C->decomp,v->byte_swapped); uint16 decimal = READ_VALUE(C->decimal,v->byte_swapped); uint16 flags = READ_VALUE(C->flags,v->byte_swapped); uint16 upper = READ_VALUE(C->upper,v->byte_swapped); uint16 lower = READ_VALUE(C->lower,v->byte_swapped); uint16 title = READ_VALUE(C->title,v->byte_swapped); uint16 mirror_code = 0xF000 & direction; int ptr; direction -= mirror_code; /* Field 0 Code value in 4-digit hexadecimal format */ fprintf(f,"%04X;",code); /* Field 1 Unicode 2.0 Character Name */ fprintf(f,"Character %04X;",code); /* Field 2 General Category */ ptr = lookup_from_code(categories,chartype); if (categories[ptr].name) fprintf(f,"%s;",categories[ptr].name); else { fprintf(f,"%X;",chartype); DPRINT(Debug,1,(&Debug, "unidata: bad unidata, code=%d (%04X), chartype=%d (%04X)\n", code,code,chartype,chartype)); } /* Field 3 Canonical Combining Classes */ fprintf(f,"%d;",class); /* Field 4 Bidirectional Category */ ptr = lookup_from_code(bidir_categories,direction); if (bidir_categories[ptr].name) fprintf(f,"%s;",bidir_categories[ptr].name); else { fprintf(f,"%X;",direction); DPRINT(Debug,1,(&Debug, "unidata: bad unidata, code=%d (%04X), direction=%d (%04X)\n", code,code,direction,direction)); } /* Field 6 Character Decomposition */ if (flags & FLAGS_have_decomp) { if (decomp < v->decomp_cells_count) { struct decomp_cell_1 *D = &(v->decomp_cells[decomp]); uint16 storage = READ_VALUE(D->storage, v->byte_swapped); uint16 value = READ_VALUE(D->value, v->byte_swapped); uint8 type_byte = D->type_byte; uint8 len_byte = D->len_byte; if (value != code) { DPRINT(Debug,1,(&Debug, "unidata: bad unidata, code=%d (%04X), value=%d (%04X)\n", code,code,value,value)); } if (type_byte) { ptr = lookup_from_code(decomp_compat,type_byte); if (decomp_compat[ptr].name) fprintf(f,"%s ",decomp_compat[ptr].name); else { fprintf(f,"<%X> ",type_byte); DPRINT(Debug,1,(&Debug, "unidata: bad unidata, code=%d (%04X), type_byte=%d (%02X)\n", code,code,type_byte,type_byte)); } } if (len_byte) { if (storage + len_byte <= v -> decomp_storage_count) { int i; for (i = 0; i < len_byte; i++) { fprintf(f,"%04X", v->decomp_storage[storage+i]); if (i < len_byte-1) fprintf(f," "); } } else { DPRINT(Debug,1,(&Debug, "unidata: bad unidata, code=%d (%04X) (decomp)storage=%d (%04X) len_byte=%d (%02X)\n", code,code,storage,storage, len_byte,len_byte)); } } else { DPRINT(Debug,1,(&Debug, "unidata: bad unidata, code=%d (%04X) zero length decomposition\n", code,code)); } fprintf(f,";"); } else { fprintf(f,"<%04X>;",decomp); DPRINT(Debug,1,(&Debug, "unidata: bad unidata, code=%d (%04X), decomp(cell)=%d (%04X)\n", code,code,decomp,decomp)); } } else fprintf(f,";"); /* Read field 6 Decimal digit value */ if (flags & FLAGS_have_decimal) { fprintf(f,"%d;",decimal); } else fprintf(f,";"); /* Field 7 Digit value */ fprintf(f,";"); /* Field 8 Numeric value */ fprintf(f,";"); /* Field 9 */ ptr = lookup_from_code(mirrored,mirror_code); if (mirrored[ptr].name) fprintf(f,"%s;",mirrored[ptr].name); else { fprintf(f,"%X;",mirror_code); DPRINT(Debug,1,(&Debug, "unidata: bad unidata, code=%d (%04X), mirror_code=%d (%04X)\n", code,code,mirror_code,mirror_code)); } /* Field 10 Unicode 1.0 Name */ fprintf(f,";"); /* Field 11 10646 Comment field */ fprintf(f,";"); /* Field 12 Upper case equivalent mapping */ if (flags & FLAGS_have_upper) { fprintf(f,"%04X;",upper); } else fprintf(f,";"); /* Field 13 Lower case equivalent mapping */ if (flags & FLAGS_have_lower) { fprintf(f,"%04X;",lower); } else fprintf(f,";"); /* Field 14 Title case equivalent mapping */ if (flags & FLAGS_have_title) { fprintf(f,"%04X\n",title); } else fprintf(f,"\n"); } else { DPRINT(Debug,1,(&Debug, "unidata: bad unidata, code=%d (%04X), (cell)ptr=%d (%04X)\n", code,code,ptr,ptr)); } } } return status; } int write_unidata_text(v,f) struct unidata_mapped_data *v; FILE *f; { int status = 0; switch(v->format) { case unidata_format_1: status = write_unidata_text_1(&(v->v.f1),f); break; } return status; } static int read_field P_((FILE *fp, int *c, char * buffer, int size, FILE *logfile)); static int read_field (fp,c,buffer,size,logfile) FILE *fp; int *c; char * buffer; int size; FILE *logfile; { int ptr; for (ptr = 0, *c = fgetc(fp); ptr < size -1; *c = fgetc(fp), ptr++) { if (';' == *c || EOF == *c || '\n' == *c) break; if (logfile) { putc(*c,logfile); } buffer[ptr] = *c; } buffer[ptr] = '\0'; if (logfile && EOF != *c) { putc(*c,logfile); } return ptr; } static void format_error P_((FILE *logfile,CONST char *filename, int *line_number,char *buffer, int c)); static void format_error(logfile,filename,line_number,buffer, c) FILE *logfile; CONST char *filename; int *line_number; char *buffer; int c; { lib_error(CATGETS(elm_msg_cat, MeSet, MeFormatError, "Format error on %.50s, line %d: %s"), filename,*line_number,buffer); if (logfile) { fprintf(logfile,"<-- [ERROR]\n"); fprintf(logfile,catgets(elm_msg_cat, MeSet, MeFormatError, "Format error on %.50s, line %d: %s"), filename,*line_number,buffer); putc('\n',logfile); if ('\n' != c) fprintf(logfile,"%05d: ",*line_number); } } static int read_line P_((FILE *fp, struct char_cell_1 *buffer, uint16 *code, char *name_buffer, int name_buffer_size, int *line_number, CONST char *filename, FILE *logfile, char * composition_buffer, int composition_buffer_size)); static int read_line(fp,buffer,code,name_buffer,name_buffer_size, line_number, filename, logfile, composition_buffer, composition_buffer_size) FILE *fp; struct char_cell_1 *buffer; uint16 *code; char *name_buffer; int name_buffer_size; int *line_number; CONST char *filename; FILE *logfile; char *composition_buffer; int composition_buffer_size; { char number_buffer[5], *endptr; char category_buffer[3]; char combining_buffer[4]; char bidir_category_buffer[3]; char decimal_buffer[5]; char digit_buffer[10]; char numeric_buffer[10]; char mirror_buffer[2]; char unicode10_buffer[70]; char comment_buffer[100]; char upper_buffer[5]; char lower_buffer[5]; char title_buffer[5]; int c = '\0',ptr; int status = 0; long l; /* Format: 0000;;Cc;0;ON;;;;;N;NULL;;;; */ if (logfile) { fprintf(logfile,"%05d: ",*line_number); } /* Read field 0 Code value in 4-digit hexadecimal format */ ptr = read_field(fp,&c,number_buffer,sizeof number_buffer,logfile); /* EOF on beginning is OK */ if (0 == ptr && EOF == c) goto fail; buffer->flags = 0; if (EOF == c) goto got_eof; if (4 != ptr || c != ';') { format_error(logfile,filename,line_number,number_buffer,c); goto fail; } *code = l = strtol(number_buffer,&endptr,16); if ('\0' != *endptr || *code != l) { format_error(logfile,filename,line_number,number_buffer,c); goto fail; } /* Read field 1 Unicode 2.0 Character Name */ ptr = read_field(fp,&c,name_buffer,name_buffer_size,logfile); if (EOF == c) goto got_eof; if (c != ';') { format_error(logfile,filename,line_number,number_buffer,c); goto fail; } /* Read field 2 General Category */ ptr = read_field(fp,&c,category_buffer,sizeof category_buffer,logfile); if (EOF == c) goto got_eof; if (2 != ptr || c != ';') { format_error(logfile,filename,line_number,category_buffer,c); goto fail; } ptr = lookup_code(categories,category_buffer); if (!categories[ptr].name) { format_error(logfile,filename,line_number,category_buffer,c); goto fail; } buffer->chartype = categories[ptr].code; /* Read field 3 Canonical Combining Classes */ ptr = read_field(fp,&c,combining_buffer,sizeof combining_buffer,logfile); if (EOF == c) goto got_eof; if (c != ';') { format_error(logfile,filename,line_number,combining_buffer,c); goto fail; } buffer->class = strtol(combining_buffer,&endptr,10); if ('\0' != *endptr) { format_error(logfile,filename,line_number,combining_buffer,c); goto fail; } /* Read field 4 Bidirectional Category */ ptr = read_field(fp,&c,bidir_category_buffer,sizeof bidir_category_buffer, logfile); if (EOF == c) goto got_eof; if (c != ';') { format_error(logfile,filename,line_number,bidir_category_buffer,c); goto fail; } ptr = lookup_code(bidir_categories,bidir_category_buffer); if (!bidir_categories[ptr].name) { format_error(logfile,filename,line_number,bidir_category_buffer,c); goto fail; } buffer->direction = bidir_categories[ptr].code; /* Read field 5 Character Decomposition */ ptr = read_field(fp,&c,composition_buffer,composition_buffer_size, logfile); if (EOF == c) goto got_eof; if (c != ';') { format_error(logfile,filename,line_number,composition_buffer,c); goto fail; } /* Read field 6 Decimal digit value */ ptr = read_field(fp,&c,decimal_buffer,sizeof decimal_buffer, logfile); if (EOF == c) goto got_eof; if (c != ';') { format_error(logfile,filename,line_number,decimal_buffer,c); goto fail; } if (0 != ptr) { buffer->decimal = strtol(decimal_buffer,&endptr,10); if ('\0' != *endptr) { format_error(logfile,filename,line_number,decimal_buffer,c); goto fail; } buffer->flags |= FLAGS_have_decimal; } /* Read field 7 Digit value */ ptr = read_field(fp,&c,digit_buffer,sizeof digit_buffer, logfile); if (EOF == c) goto got_eof; if (c != ';') { format_error(logfile,filename,line_number,digit_buffer,c); goto fail; } /* Digit value IGNORED */ /* NOTE: For superscript numbers there should be 'Digit value' * but not 'Decimal digit value', But seems that there * is bug on UNIDATA2.TXT, and therefore values is given * also on 'Decimal gigit value'. For example: * 2070;SUPERSCRIPT ZERO;No;0;EN; 0030;0;0;0;N;SUPERSCRIPT DIGIT ZERO;;;; * * Because of this, fields 'Digit value' and 'Decimal * digit value' are identical. */ /* Read field 8 Numeric value */ ptr = read_field(fp,&c,numeric_buffer,sizeof numeric_buffer,logfile); if (EOF == c) goto got_eof; if (c != ';') { format_error(logfile,filename,line_number,numeric_buffer,c); goto fail; } /* Numeric value IGNORED */ /* Read field 9 */ ptr = read_field(fp,&c,mirror_buffer,sizeof mirror_buffer,logfile); if (EOF == c) goto got_eof; if (1 != ptr || c != ';') { format_error(logfile,filename,line_number,mirror_buffer,c); goto fail; } ptr = lookup_code(mirrored,mirror_buffer); if (!mirrored[ptr].name) { format_error(logfile,filename,line_number,mirror_buffer,c); goto fail; } buffer->direction |= mirrored[ptr].code; /* Read field 10 Unicode 1.0 Name */ ptr = read_field(fp,&c,unicode10_buffer,sizeof unicode10_buffer, logfile); if (EOF == c) goto got_eof; if (c != ';') { format_error(logfile,filename,line_number,unicode10_buffer,c); goto fail; } /* Unicode 1.0 Name IGNORED */ /* Read field 11 10646 Comment field */ ptr = read_field(fp,&c,comment_buffer,sizeof comment_buffer, logfile); if (EOF == c) goto got_eof; if (c != ';') { format_error(logfile,filename,line_number,comment_buffer,c); goto fail; } /* 10646 Comment field IGNORED */ /* Read field 12 Upper case equivalent mapping */ ptr = read_field(fp,&c,upper_buffer,sizeof upper_buffer, logfile); if (EOF == c) goto got_eof; if (c != ';') { format_error(logfile,filename,line_number,upper_buffer,c); goto fail; } if (0 != ptr) { if (4 != ptr) { format_error(logfile,filename,line_number,upper_buffer,c); goto fail; } buffer->upper = strtol(upper_buffer,&endptr,16); if ('\0' != *endptr) { format_error(logfile,filename,line_number,upper_buffer,c); goto fail; } buffer->flags |= FLAGS_have_upper; } /* Read field 13 Lower case equivalent mapping */ ptr = read_field(fp,&c,lower_buffer,sizeof lower_buffer, logfile); if (EOF == c) goto got_eof; if (c != ';') { format_error(logfile,filename,line_number,lower_buffer,c); goto fail; } if (0 != ptr) { if (4 != ptr) { format_error(logfile,filename,line_number,lower_buffer,c); goto fail; } buffer->lower = strtol(lower_buffer,&endptr,16); if ('\0' != *endptr) { format_error(logfile,filename,line_number,lower_buffer,c); goto fail; } buffer->flags |= FLAGS_have_lower; } /* Read field 14 Title case equivalent mapping */ ptr = read_field(fp,&c,title_buffer,sizeof title_buffer, logfile); if (EOF == c) goto got_eof; if (c != '\n') { format_error(logfile,filename,line_number,title_buffer,c); goto fail; } if (0 != ptr) { if (4 != ptr) { format_error(logfile,filename,line_number,title_buffer,c); goto fail; } buffer->title = strtol(title_buffer,&endptr,16); if ('\0' != *endptr) { format_error(logfile,filename,line_number,title_buffer,c); goto fail; } buffer->flags |= FLAGS_have_title; } status = 1; got_eof: if (EOF == c) { int err = errno; status = 0; if (ferror(fp)) { lib_error(CATGETS(elm_msg_cat, MeSet, MeGotError, "Unexpected error on %.50s, line %d: %.30s"), filename,*line_number,error_description(err)); if (logfile) { fprintf(logfile,"<-- [ERROR]\n"); fprintf(logfile,catgets(elm_msg_cat, MeSet, MeGotError, "Unexpected error on %.50s, line %d: %.30s"), filename,*line_number,error_description(err)); putc('\n',logfile); } } else { lib_error(CATGETS(elm_msg_cat, MeSet, MeGotEOF, "Unexpected end of %.50s, line %d"), filename,*line_number); if (logfile) { fprintf(logfile,"<-- [ERROR]\n"); fprintf(logfile,catgets(elm_msg_cat, MeSet, MeGotEOF, "Unexpected end of %.50s, line %d"), filename,*line_number); putc('\n',logfile); } } } fail: /* If we are not on end of line because of error, * go to end of line. */ while (EOF != c && '\n' != c) { c = fgetc(fp); if (logfile && EOF != c) { putc(c,logfile); } } if ('\n' == c) (*line_number)++; if (EOF == c && logfile) { fprintf(logfile,"<-- [EOF]\n"); } return status; } static int same_char_cell_1 P_((struct char_cell_1 *p1, struct char_cell_1 *p2)); static int same_char_cell_1(p1,p2) struct char_cell_1 *p1; struct char_cell_1 *p2; { if (p1->chartype != p2->chartype) return 0; if (p1->class != p2->class) return 0; if (p1->direction != p2->direction) return 0; if (p1->flags != p2->flags) return 0; if (0 != (p1->flags & FLAGS_have_decimal) && p1->decimal != p2->decimal) return 0; if (0 != (p1->flags & FLAGS_have_decomp) && p1->decomp != p2->decomp) return 0; if (0 != (p1->flags & FLAGS_have_upper) && p1->upper != p2->upper) return 0; if (0 != (p1->flags & FLAGS_have_lower) && p1->lower != p2->lower) return 0; if (0 != (p1->flags & FLAGS_have_title) && p1->title != p2->title) return 0; return 1; } static int push_decomp_cell P_((struct decomp_cell_1 buffer, struct unidata_format_1 *v)); static int push_decomp_cell(buffer,v) struct decomp_cell_1 buffer; struct unidata_format_1 *v; { if (v->decomp_cells_count < v->decomp_cells_length / sizeof (struct decomp_cell_1)) { int idx = v->decomp_cells_count; if (buffer.len_byte > 0) { uint16 code = v->decomp_storage[buffer.storage]; while (code >= v->comp_vector_count) { if (v->comp_vector_count >= v->comp_vector_length / sizeof (uint16)) { return -1; /* OVERFLOW */ } /* add_decompostion should guarantee that first cell * is unassigned */ v->comp_vector[v->comp_vector_count++] = 0x0000; } if (code < v->comp_vector_count) { buffer.next_comp = v->comp_vector[code]; v->comp_vector[code] = idx; } else buffer.next_comp = 0; } else { DPRINT(Debug,5,(&Debug, "unidata: push_decomp_cell (idx=%d (%04X)) -- zero decomposition\n", idx,idx)); buffer.next_comp = 0; } v->decomp_cells[idx] = buffer; v->decomp_cells_count++; return idx; } return -1; } static int add_decomposition P_((char *decomposition_buffer, int line_number, CONST char *filename, struct unidata_format_1 *v, uint16 *decomp, FILE *logfile, unsigned int code1)); static int add_decomposition(decomposition_buffer, line_number, filename,v,decomp, logfile,code1) char *decomposition_buffer; int line_number; CONST char *filename; struct unidata_format_1 *v; uint16 *decomp; FILE *logfile; unsigned int code1; { int status = 0; char *brk = safe_strdup(decomposition_buffer); char *ptr; struct decomp_cell_1 buffer; int code; if (v->decomp_cells_count == 0) push_decomp_cell(unassigned_decomp_buffer,v); buffer.type_byte = 0; buffer.len_byte = 0; buffer.next_comp = 0; buffer.storage = v -> decomp_storage_count; buffer.value = code1; /* Initial modifier */ if ('<' == brk[0]) { int ptr1; ptr = strtok(brk," "); ptr1 = lookup_code(decomp_compat,ptr); if (!decomp_compat[ptr1].name) { lib_error(CATGETS(elm_msg_cat, MeSet, MeFormatError, "Format error on %.50s, line %d: %s"), filename,line_number,ptr); if (logfile) { fprintf(logfile,catgets(elm_msg_cat, MeSet, MeFormatError, "Format error on %.50s, line %d: %s"), filename,line_number,ptr); putc('\n',logfile); } goto fail; } buffer.type_byte = decomp_compat[ptr1].code; ptr = NULL; } else { buffer.type_byte = 0; ptr = brk; } for (ptr = strtok(ptr," "); ptr; ptr = strtok(NULL," ")) { char *endptr; long l; if (v -> decomp_storage_count >= v->decomp_storage_length / sizeof (uint16)) goto fail; /* OVERFLOW */ v->decomp_storage[v -> decomp_storage_count] = l = strtol(ptr,&endptr,16); if ('\0' != *endptr || v->decomp_storage[v -> decomp_storage_count] != l) { lib_error(CATGETS(elm_msg_cat, MeSet, MeFormatError, "Format error on %.50s, line %d: %s"), filename,line_number,ptr); if (logfile) { fprintf(logfile,catgets(elm_msg_cat, MeSet, MeFormatError, "Format error on %.50s, line %d: %s"), filename,line_number,ptr); putc('\n',logfile); } goto fail; } buffer.len_byte = (++v -> decomp_storage_count) - buffer.storage; if (v -> decomp_storage_count - buffer.storage > 255) { /* OVERFLOW */ goto fail; } } *decomp = code = push_decomp_cell(buffer,v); if (code < 1 || *decomp != code) goto fail; status = 1; fail: free(brk); return status; } int read_unidata_text(ptr,filename,logfile, silent_gaps) struct unidata_mapped_data **ptr; CONST char *filename; FILE *logfile; int silent_gaps; { struct unidata_mapped_data * res = *ptr; int status = 0; FILE *f; int last_processed = -1; int line_number = 1; int unassigned_cell = -1; char composition_buffer[160]; /* This should not have readed by setgid Elm ME+ so this test is not necessary */ int err = can_open(filename,"r"); if (err) { lib_error(CATGETS(elm_msg_cat, MeSet, MeUnidataCantOpen, "Can't open UNIDATA2.TXT: %.50s: %.25s"), filename,error_description(err)); return 0; } f = fopen(filename,"r"); if (!f) { int err = errno; lib_error(CATGETS(elm_msg_cat, MeSet, MeUnidataCantOpen, "Can't open UNIDATA2.TXT: %.50s: %.25s"), filename,error_description(err)); goto fail; } if (res) free_unidata(res); res = NULL; res = safe_malloc(sizeof (struct unidata_mapped_data)); res->format = unidata_format_1; /* malloc maximally needed area */ malloc_unidata_1(&(res->v.f1)); status = 1; while (!feof(f)) { struct char_cell_1 temp_buffer; char temp_name_buffer[100], *point; uint16 code,cell; if (!read_line(f,&temp_buffer,&code, temp_name_buffer, sizeof temp_name_buffer, &line_number, filename, logfile, composition_buffer, sizeof composition_buffer)) { if (feof(f)) break; status = 0; /* FAILURE */ continue; } if (composition_buffer[0]) { if (!add_decomposition(composition_buffer,line_number-1, filename,&(res->v.f1), &temp_buffer.decomp,logfile,code)) { status = 0; /* FAILURE */ continue; } temp_buffer.flags |= FLAGS_have_decomp; } if (status && last_processed+1 != code) { if (!silent_gaps) { lib_error(CATGETS(elm_msg_cat, MeSet, MeSequenceError, "%.50s, line %d: Got %d (%04X), exptected %d (%04X)"), filename,line_number-1,code,code, last_processed+1,last_processed+1); } if (logfile) { fprintf(logfile,catgets(elm_msg_cat, MeSet, MeSequenceError, "%.50s, line %d: Got %d (%04X), exptected %d (%04X)"), filename,line_number-1,code,code, last_processed+1,last_processed+1); putc('\n',logfile); /* Do not log failure... */ } } last_processed = code; /* simple compression */ if (res->v.f1.cells_count < 1 || !same_char_cell_1(&(res->v.f1.cells[res->v.f1.cells_count-1]), &temp_buffer)) { if (res->v.f1.cells_count < res->v.f1.cells_length / sizeof (struct char_cell_1)) { cell = res->v.f1.cells_count; res->v.f1.cells[cell] = temp_buffer; res->v.f1.cells_count = cell+1; } else break; /* OVERFLOW */ } else cell = res->v.f1.cells_count-1; if (code < res->v.f1.vector_length / sizeof (uint16)) { while (res->v.f1.vector_count < code) { if (unassigned_cell == -1) { if (res->v.f1.cells_count < res->v.f1.cells_length / sizeof (struct char_cell_1)) { unassigned_cell = res->v.f1.cells_count; res->v.f1.cells[unassigned_cell] = unassigned_cell_buffer; res->v.f1.cells_count = unassigned_cell+1; } else break; /* OVERFLOW */ } res->v.f1.vector[res->v.f1.vector_count++] = unassigned_cell; } res->v.f1.vector[code] = cell; if (res->v.f1.vector_count <= code) res->v.f1.vector_count++; } else break; /* OVERFLOW */ if ('<' == temp_name_buffer[0] && (point = strchr(temp_name_buffer,',')) && 0 == strcmp(point,", First>")) { int n = point - temp_name_buffer; struct char_cell_1 temp_buffer2; char temp_name_buffer2[100], *point2; uint16 code2; char composition_buffer2[160]; /* Whole range actually */ if (!read_line(f,&temp_buffer2,&code2, temp_name_buffer2, sizeof temp_name_buffer2, &line_number, filename, logfile, composition_buffer2, sizeof composition_buffer2)) { status = 0; /* FAILURE */ continue; } if (temp_buffer.flags & FLAGS_have_decomp) { temp_buffer2.flags |= FLAGS_have_decomp; temp_buffer2.decomp = temp_buffer.decomp; } if ('<' == temp_name_buffer2[0] && (point2 = strchr(temp_name_buffer2,',')) && 0 == strcmp(point2,", Last>") && n == point2 - temp_name_buffer2 && 0 == strncmp(temp_name_buffer,temp_name_buffer2,n) && code2 > code && 0 == strcmp(composition_buffer,composition_buffer2) && same_char_cell_1(&temp_buffer,&temp_buffer2)) { if (code2 < res->v.f1.vector_length / sizeof (uint16)) { int i; for (i = code; i <= code2; i++) res->v.f1.vector[i] = cell; if (res->v.f1.vector_count <= code2) res->v.f1.vector_count = code2+1; last_processed = code2; } else break; /* OVERFLOW */ } else { lib_error(CATGETS(elm_msg_cat, MeSet, MeNotEndRange, "%.50s: %d: %.25s is not end for %.25s"), filename,line_number-1, temp_name_buffer2,temp_name_buffer2); if (logfile) { fprintf(logfile, catgets(elm_msg_cat, MeSet, MeNotEndRange, "%.50s: %d: %.25s is not end for %.25s"), filename,line_number-1, temp_name_buffer2,temp_name_buffer2); putc('\n',logfile); } status = 0; } } } if (status && last_processed != 0xFFFF) { if (!silent_gaps) lib_error(CATGETS(elm_msg_cat, MeSet, MeIncomplete, "%.50s: Last processed was %d, should be %d"), filename,last_processed,0xFFFF); if (logfile) { fprintf(logfile, catgets(elm_msg_cat, MeSet, MeIncomplete, "%.50s: Last processed was %d, should be %d"), filename,last_processed,0xFFFF); putc('\n',logfile); } } /* recalculate space according of usage and * zero fill unused area */ trim_unidata_1(&(res->v.f1)); debug_output(&(res->v.f1),status, res->v.f1.header ? READ_VALUE(res->v.f1.header->page_size, res->v.f1.byte_swapped) : 0,"text",filename); fail: if (f) fclose(f); *ptr = res; return status; } static int unicode_compress_input_1 P_((struct unidata_format_1 *v, uint16 *unicode_res, uint16 unicode_inp[], int len, int *comp_type)); int unicode_compress_input_1(v,unicode_res,unicode_inp,len,comp_type) struct unidata_format_1 *v; uint16 *unicode_res; uint16 unicode_inp[]; int len; int *comp_type; { int status = 0; if (len < 1) status = -1; else { uint16 idx = unicode_inp[0]; if (idx < v ->comp_vector_count) { uint16 ptr = READ_VALUE(v->comp_vector[idx],v->byte_swapped); int found = 0; status = 0; while(ptr != 0 && ptr < v->decomp_cells_count && !found) { struct decomp_cell_1 *C = &v->decomp_cells[ptr]; uint16 storage = READ_VALUE(C->storage,v->byte_swapped); uint8 len_1 = C->len_byte; if (storage + len_1 <= v->decomp_storage_count) { int i; found = 1; for (i = 0; i < len_1 && i < len; i++) if (unicode_inp[i] != READ_VALUE(v->decomp_storage[storage+i], v->byte_swapped)) found = 0; if (found) { *unicode_res = READ_VALUE(C->value, v->byte_swapped); *comp_type = C->type_byte; if (i < len_1) status = -1; else status = i; } } ptr = READ_VALUE(C->next_comp,v->byte_swapped); } } else status = 0; } return status; } static int unicode_lookup_character_1 P_((struct unidata_format_1 *v, unsigned int unicode, struct character_information *info, uint16 unicode_out[], int buffer_len)); static int unicode_lookup_character_1(v,unicode,info,unicode_out,buffer_len) struct unidata_format_1 *v; unsigned int unicode; struct character_information *info; uint16 unicode_out[]; int buffer_len; { int status = -1; info -> character_type = CHARTYPE_Cn; info -> direction_data = 0; info -> mirror_code = 0; info -> upper = 0; info -> lower = 0; info -> title = 0; info -> decomp_len = 0; if (buffer_len > 0) { /* Pre-init buffer */ int i; unicode_out[0] = unicode; for (i = 1; i < buffer_len; i++) unicode_out[i] = 0; } if (unicode < v -> vector_count) { uint16 ptr = v->vector[unicode]; if (ptr < v -> cells_count) { struct char_cell_1 *C = &(v->cells[ptr]); uint16 chartype = READ_VALUE(C->chartype,v->byte_swapped); uint16 class = READ_VALUE(C->class,v->byte_swapped); uint16 direction = READ_VALUE(C->direction,v->byte_swapped); uint16 decomp = READ_VALUE(C->decomp,v->byte_swapped); uint16 decimal = READ_VALUE(C->decimal,v->byte_swapped); uint16 flags = READ_VALUE(C->flags,v->byte_swapped); uint16 upper = READ_VALUE(C->upper,v->byte_swapped); uint16 lower = READ_VALUE(C->lower,v->byte_swapped); uint16 title = READ_VALUE(C->title,v->byte_swapped); uint16 mirror_code = 0xF000 & direction; direction -= mirror_code; info -> character_type = chartype; info -> direction_data = direction; info -> mirror_code = mirror_code; if (flags & FLAGS_have_upper) { info -> upper = upper; } if (flags & FLAGS_have_lower) { info -> lower = lower; } if (flags & FLAGS_have_title) { info -> title = title; } if (flags & FLAGS_have_decomp) { if (decomp < v->decomp_cells_count) { struct decomp_cell_1 *D = &(v->decomp_cells[decomp]); uint16 storage = READ_VALUE(D->storage, v->byte_swapped); #if 0 uint8 type_byte = D->type_byte; #endif uint8 len_byte = D->len_byte; if (storage + len_byte <= v->decomp_storage_count) { if (buffer_len > 0) { int i; for (i = 0; i < len_byte && i < buffer_len; i++) unicode_out[i] = READ_VALUE(v->decomp_storage[storage+i], v->byte_swapped); } info -> decomp_len = len_byte; } } } } else return -1; } else return -1; switch(CHARTYPE_major(info -> character_type)) { case CHARTYPE_Mark: case CHARTYPE_Number: case CHARTYPE_Separator: case CHARTYPE_Letter: case CHARTYPE_Punctuation: case CHARTYPE_Symbol: status = 1; break; default: status = 0; break; } #ifdef DEBUG if (status < 1) { DPRINT(Debug,63,(&Debug, "unicode_lookup_character_1: %04X: type=%d (%04X),", unicode,info -> character_type,info -> character_type)); if (Debug.active > 62) { int ptr = lookup_from_code(categories,info -> character_type); if (categories[ptr].name) { DPRINT(Debug,63,(&Debug," type=%s,",categories[ptr].name)); } DPRINT(Debug,63,(&Debug,", return %d\n",status)); } } #endif return status; } /* Note: buffer_len == 0 if not unicode decompression wanted buffer_len : on bytes (sizeof) Returns 0 = failure (not valid character) 1 = succeed -1 = database bad */ int unicode_lookup_character(v,unicode,info, unicode_out, buffer_len) struct unidata_mapped_data *v; unsigned int unicode; struct character_information *info; uint16 unicode_out[]; int buffer_len; { int status = -1; switch(v->format) { case unidata_format_1: status = unicode_lookup_character_1(&(v->v.f1),unicode,info, unicode_out, buffer_len); break; } return status; } /* Returns = 0 failure (not compressable) = 1 (one-to-one mapping) > 0 compressed input returned = -1 incomplete input */ int unicode_compress_input(v,unicode_res,unicode_inp,len,comp_type) struct unidata_mapped_data *v; uint16 *unicode_res; uint16 unicode_inp[]; int len; int *comp_type; { int status = 0; switch(v->format) { case unidata_format_1: status = unicode_compress_input_1(&(v->v.f1), unicode_res,unicode_inp,len, comp_type); break; } return status; } /* * Local Variables: * mode:c * c-basic-offset:4 * buffer-file-coding-system: iso-8859-1 * End: */