/* text - uniform multi-byte/wide text handling
* Copyright (c) 2003 Michael B. Allen <mba2000 ioplex.com>
*
* The MIT License
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <stdlib.h>
#include <stdio.h>
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
#include <errno.h>
#include <ctype.h>
#include <wchar.h>
#include <wctype.h>
#include "mba/msgno.h"
#include "mba/text.h"
int
str_length(const unsigned char *src, const unsigned char *slim)
{
const unsigned char *start = src;
if (src == NULL || src >= slim) {
return 0;
}
while (*src) {
src++;
if (src == slim) {
return 0;
}
}
return src - start;
}
int
wcs_length(const wchar_t *src, const wchar_t *slim)
{
const wchar_t *start = src;
if (src == NULL || src >= slim) {
return 0;
}
while (*src) {
src++;
if (src == slim) {
return 0;
}
}
return src - start;
}
size_t
str_size(const unsigned char *src, const unsigned char *slim)
{
const unsigned char *start = src;
if (src == NULL || src >= slim) {
return 0;
}
while (*src) {
src++;
if (src == slim) {
return 0;
}
}
return (src - start + 1) * sizeof *src;
}
size_t
wcs_size(const wchar_t *src, const wchar_t *slim)
{
const wchar_t *start = src;
if (src == NULL || src >= slim) {
return 0;
}
while (*src) {
src++;
if (src == slim) {
return 0;
}
}
return (src - start + 1) * sizeof *src;
}
int
str_copy(const unsigned char *src, const unsigned char *slim,
unsigned char *dst, unsigned char *dlim, int n)
{
unsigned char *start = dst;
if (dst == NULL || dst >= dlim) {
return 0;
}
if (src == NULL || src >= slim) {
*dst = '\0';
return 0;
}
while (n-- && *src) {
*dst++ = *src++;
if (src == slim || dst == dlim) {
dst = start;
break;
}
}
*dst = '\0';
return dst - start;
}
int
wcs_copy(const wchar_t *src, const wchar_t *slim,
wchar_t *dst, wchar_t *dlim, int n)
{
wchar_t *start = dst;
if (dst == NULL || dst >= dlim) {
return 0;
}
if (src == NULL || src >= slim) {
*dst = L'\0';
return 0;
}
while (n-- && *src) {
*dst++ = *src++;
if (src == slim || dst == dlim) {
dst = start;
break;
}
}
*dst = L'\0';
return dst - start;
}
int
str_copy_new(const unsigned char *src,
const unsigned char *slim,
unsigned char **dst,
int n,
struct allocator *al)
{
const unsigned char *start = src;
size_t siz;
if (dst == NULL) {
return 0;
}
if (src == NULL || src >= slim) {
*dst = NULL;
return 0;
}
while (n-- && *src) {
src++;
if (src == slim) {
*dst = NULL;
return 0;
}
}
siz = src - start + 1;
if ((*dst = allocator_alloc(al, siz, 0)) == NULL) {
return -1;
}
memcpy(*dst, start, siz);
(*dst)[src - start] = '\0';
return src - start;
}
int
wcs_copy_new(const wchar_t *src,
const wchar_t *slim,
wchar_t **dst,
int n,
struct allocator *al)
{
const wchar_t *start = src;
size_t siz;
if (dst == NULL) {
return 0;
}
if (src == NULL || src >= slim) {
*dst = NULL;
return 0;
}
while (n-- && *src) {
src++;
if (src == slim) {
*dst = NULL;
return 0;
}
}
siz = (src - start + 1) * sizeof *src;
if ((*dst = allocator_alloc(al, siz, 0)) == NULL) {
return -1;
}
memcpy(*dst, start, siz);
(*dst)[src - start] = L'\0';
return src - start;
}
/* Standard UTF-8 decoder
*/
int
utf8towc(const unsigned char *src, const unsigned char *slim, wchar_t *wc)
{
const unsigned char *start = src;
ptrdiff_t n = slim - src;
if (n < 1) return 0;
if (*src < 0x80) {
*wc = *src;
} else if ((*src & 0xE0) == 0xC0) {
if (n < 2) return 0;
*wc = (*src++ & 0x1F) << 6;
if ((*src & 0xC0) != 0x80) {
errno = EILSEQ;
return -1;
} else {
*wc |= *src & 0x3F;
}
if (*wc < 0x80) {
errno = EILSEQ;
return -1;
}
} else if ((*src & 0xF0) == 0xE0) {
if (n < 3) return 0;
if (sizeof *wc < 3) {
errno = EILSEQ; /* serrogates not supported */
return -1;
}
*wc = (*src++ & 0x0F) << 12;
if ((*src & 0xC0) != 0x80) {
errno = EILSEQ;
return -1;
} else {
*wc |= (*src++ & 0x3F) << 6;
if ((*src & 0xC0) != 0x80) {
errno = EILSEQ;
return -1;
} else {
*wc |= *src & 0x3F;
}
}
if (*wc < 0x800) {
errno = EILSEQ;
return -1;
}
} else if ((*src & 0xF8) == 0xF0) {
if (n < 4) return 0;
*wc = (*src++ & 0x07) << 18;
if ((*src & 0xC0) != 0x80) {
errno = EILSEQ;
return -1;
} else {
*wc |= (*src++ & 0x3F) << 12;
if ((*src & 0xC0) != 0x80) {
errno = EILSEQ;
return -1;
} else {
*wc |= (*src++ & 0x3F) << 6;
if ((*src & 0xC0) != 0x80) {
errno = EILSEQ;
return -1;
} else {
*wc |= *src & 0x3F;
}
}
}
if (*wc < 0x10000) {
errno = EILSEQ;
return -1;
}
}
src++;
return src - start;
}
int
utf8casecmp(const unsigned char *str1, const unsigned char *str1lim,
const unsigned char *str2, const unsigned char *str2lim)
{
int n1, n2;
wchar_t ucs1, ucs2;
int ch1, ch2;
#if (__STDC_VERSION__ >= 199901L) || (_XOPEN_VERSION >= 500)
mbstate_t ps1, ps2;
memset(&ps1, 0, sizeof(ps1));
memset(&ps2, 0, sizeof(ps2));
while (str1 < str1lim && str2 < str2lim) {
if ((*str1 & 0x80) && (*str2 & 0x80)) { /* both multibyte */
if ((n1 = mbrtowc(&ucs1, (const char *)str1, str1lim - str1, &ps1)) < 0 ||
(n2 = mbrtowc(&ucs2, (const char *)str2, str2lim - str2, &ps2)) < 0) {
#else
while (str1 < str1lim && str2 < str2lim) {
if ((*str1 & 0x80) && (*str2 & 0x80)) { /* both multibyte */
if ((n1 = mbtowc(&ucs1, (const char *)str1, str1lim - str1)) < 0 ||
(n2 = mbtowc(&ucs2, (const char *)str2, str2lim - str2)) < 0) {
#endif
PMNO(errno);
return -1;
}
if (ucs1 != ucs2 && (ucs1 = towupper(ucs1)) != (ucs2 = towupper(ucs2))) {
return ucs1 < ucs2 ? -1 : 1;
}
str1 += n1;
str2 += n2;
} else { /* neither or one multibyte */
ch1 = *str1;
ch2 = *str2;
if (ch1 != ch2 && (ch1 = toupper(ch1)) != (ch2 = toupper(ch2))) {
return ch1 < ch2 ? -1 : 1;
} else if (ch1 == '\0') {
return 0;
}
str1++;
str2++;
}
}
return 0;
}
int
utf8tolower(unsigned char *str, unsigned char *slim)
{
unsigned char *start = str;
#if (__STDC_VERSION__ >= 199901L) || (_XOPEN_VERSION >= 500)
mbstate_t psw, psm;
memset(&psw, 0, sizeof(psw));
memset(&psm, 0, sizeof(psm));
#endif
while (str < slim && *str) {
if ((*str & 0x80) == 0) {
*str = tolower(*str);
++str;
} else {
wchar_t wc, wcl;
size_t n;
#if (__STDC_VERSION__ >= 199901L) || (_XOPEN_VERSION >= 500)
if ((n = mbrtowc(&wc, (const char *)str, slim - str, &psw)) == (size_t)-1) {
PMNO(errno);
return -1;
}
if ((wcl = towlower(wc)) != wc) {
/* These functions are flawed because there are a few characters that encode
* as a different number of bytes depending on wheather or not it's the
* upper or lower case version of the UCS code. Right here we to see if
* it didn't convert back to same size as lowercase and if so, return -1.
*/
if (wcrtomb((char *)str, wcl, &psm) != n) {
#else
if ((n = mbtowc(&wc, (const char *)str, slim - str)) == (size_t)-1) {
PMNO(errno);
return -1;
}
if ((wcl = towlower(wc)) != wc) {
if ((size_t)wctomb((char *)str, wcl) != n) {
#endif
PMNO(errno);
return -1;
}
}
str += n;
}
}
return str - start;
}
int
utf8toupper(unsigned char *str, unsigned char *slim)
{
unsigned char *start = str;
#if (__STDC_VERSION__ >= 199901L) || (_XOPEN_VERSION >= 500)
mbstate_t psw, psm;
memset(&psw, 0, sizeof(psw));
memset(&psm, 0, sizeof(psm));
#endif
while (str < slim && *str) {
if ((*str & 0x80) == 0 && 0) {
*str = toupper(*str);
++str;
} else {
wchar_t wc, wcu;
size_t n;
#if (__STDC_VERSION__ >= 199901L) || (_XOPEN_VERSION >= 500)
if ((n = mbrtowc(&wc, (const char *)str, slim - str, &psw)) == (size_t)-1) {
PMNO(errno);
return -1;
}
if ((wcu = towupper(wc)) != wc) {
if (wcrtomb((char *)str, wcu, &psm) != n) {
#else
if ((n = mbtowc(&wc, (const char *)str, slim - str)) == (size_t)-1) {
PMNO(errno);
return -1;
}
if ((wcu = towupper(wc)) != wc) {
if ((size_t)wctomb((char *)str, wcu) != n) {
#endif
PMNO(errno);
return -1;
}
}
str += n;
}
}
return str - start;
}
/* Even though fputws is defined in C99 and UNIX98 we cannot safely mix
* wide character I/O with regular so we might as well just unconditionally
* use our own
*/
int
_fputws(const wchar_t *buf, FILE *stream)
{
char mb[16];
int n = 0;
#if (__STDC_VERSION__ >= 199901L) || (_XOPEN_VERSION >= 500)
mbstate_t ps;
memset(&ps, 0, sizeof(ps));
while (*buf) {
if ((n = wcrtomb(mb, *buf, &ps)) == -1) {
#else
while (*buf) {
if ((n = wctomb(mb, *buf)) == -1) {
#endif
PMNO(errno);
return -1;
}
if (fwrite(mb, n, 1, stream) != 1) {
PMNO(errno);
return -1;
}
buf++;
}
return 0;
}
#if !defined(_GNU_SOURCE)
#if !defined(_BSD_SOURCE) && \
!defined(_XOPEN_SOURCE_EXTENDED) && \
!defined(_WIN32) && \
!(defined(__APPLE__) && defined(__MACH__))
char *
strdup(const char *s)
{
return s ? strcpy(malloc(strlen(s) + 1), s) : NULL;
}
#endif
wchar_t *
wcsdup(const wchar_t *s)
{
return s ? wcscpy(malloc((wcslen(s) + 1) * sizeof *s), s) : NULL;
}
size_t
strnlen(const char *s, size_t maxlen)
{
size_t len;
for (len = 0; *s && len < maxlen; s++, len++);
return len;
}
#if (__STDC_VERSION__ < 199901L) && \
!defined(_BSD_SOURCE) && \
(_XOPEN_VERSION < 500) && \
!(defined(__APPLE__) && defined(__MACH__))
int
vsnprintf(char *str, size_t size, const char *format, va_list ap)
{
(void)size;
return vsprintf(str, format, ap);
}
#endif
size_t
wcsnlen(const wchar_t *s, size_t maxlen)
{
size_t len;
for (len = 0; *s && len < maxlen; s++, len++);
return len;
}
int
wcscasecmp(const wchar_t *s1, const wchar_t *s2)
{
wchar_t c1, c2;
do {
c1 = *s1++;
c2 = *s2++;
if (c1 == L'\0' || c2 == L'\0') {
break;
}
if (c1 != c2) {
c1 = towupper(c1);
c2 = towupper(c2);
}
} while (c1 == c2);
return c1 - c2;
}
#endif
/* "dumb" snprintf that just returns -1 if the buffer wasn't large enough
*/
int
dsnprintf(char *str, size_t size, const char *format, ...)
{
va_list ap;
int n;
va_start(ap, format);
n = vsnprintf(str, size, format, ap);
va_end(ap);
if (n < 0) {
PMNO(errno);
return -1;
} else if ((size_t)n >= size) {
PMNO(errno = ENOBUFS);
return -1;
}
return n;
}
syntax highlighted by Code2HTML, v. 0.9.1