/* Urwid unicode character processing tables Copyright (C) 2006 Rebecca Breu. This file contains rewritten code of utable.py by Ian Ward. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Urwid web site: http://excess.org/urwid/ */ #include #define ENC_UTF8 1 #define ENC_WIDE 2 #define ENC_NARROW 3 static int widths_len = 2*38; static const long int widths[] = { 126, 1, 159, 0, 687, 1, 710, 0, 711, 1, 727, 0, 733, 1, 879, 0, 1154, 1, 1161, 0, 4347, 1, 4447, 2, 7467, 1, 7521, 0, 8369, 1, 8426, 0, 9000, 1, 9002, 2, 11021, 1, 12350, 2, 12351, 1, 12438, 2, 12442, 0, 19893, 2, 19967, 1, 55203, 2, 63743, 1, 64106, 2, 65039, 1, 65059, 0, 65131, 2, 65279, 1, 65376, 2, 65500, 1, 65510, 2, 120831, 1, 262141, 2, 1114109, 1 }; static short byte_encoding = ENC_UTF8; static PyObject * to_bool(int val) { if (val) Py_RETURN_TRUE; else Py_RETURN_FALSE; } //====================================================================== static char get_byte_encoding_doc[] = "get_byte_encoding() -> string encoding\n\n\ Get byte encoding ('utf8', 'wide', or 'narrow')."; static PyObject * get_byte_encoding(PyObject *self, PyObject *args) { if (!PyArg_ParseTuple(args, "")) return NULL; if (byte_encoding == ENC_UTF8) return Py_BuildValue("s", "utf8"); if (byte_encoding == ENC_WIDE) return Py_BuildValue("s", "wide"); if (byte_encoding == ENC_NARROW) return Py_BuildValue("s", "narrow"); return Py_None; // should never happen } //====================================================================== static char set_byte_encoding_doc[] = "set_byte_encoding(string encoding) -> None\n\n\ Set byte encoding. \n\n\ encoding -- one of 'utf8', 'wide', 'narrow'"; static PyObject * set_byte_encoding(PyObject *self, PyObject *args) { char * enc; if (!PyArg_ParseTuple(args, "s", &enc)) return NULL; if (strcmp(enc, "utf8") == 0) byte_encoding = ENC_UTF8; else if (strcmp(enc, "wide") == 0) byte_encoding = ENC_WIDE; else if (strcmp(enc, "narrow") == 0) byte_encoding = ENC_NARROW; else { // got wrong encoding PyErr_SetString(PyExc_ValueError, "Unknown encoding."); return NULL; } return Py_None; } //====================================================================== static char get_width_doc[] = "get_width(int ord) -> int width\n\n\ Return the screen column width for unicode ordinal ord.\n\n\ ord -- ordinal"; static int Py_GetWidth(long int ord) { int i; if ((ord == 0xe) || (ord == 0xf)) return 0; for (i=0; i= 0) { if ((text[pos]&0xc0) != 0x80) { Py_DecodeOne(text, text_len, pos, subret); ret[0] = subret[0]; ret[1] = pos-1; return; } pos-=1; if (pos == pos-4) //error { ret[0] = '?'; ret[1] = pos - 1; return; } } } static PyObject * decode_one_right(PyObject *self, PyObject *args) { PyObject *py_text; int pos, text_len; char *text; int ret[2] = {'?',0}; if (!PyArg_ParseTuple(args, "Oi", &py_text, &pos)) return NULL; PyString_AsStringAndSize(py_text, &text, &text_len); Py_DecodeOneRight((const unsigned char *)text, text_len, pos, ret); return Py_BuildValue("(i, i)", ret[0], ret[1]); } //====================================================================== static char within_double_byte_doc[] = "within_double_byte(strint text, int line_start, int pos) -> int withindb\n\n\ Return whether pos is within a double-byte encoded character.\n\n\ str -- string in question\n\ line_start -- offset of beginning of line (< pos)\n\ pos -- offset in question\n\n\ Return values:\n\ 0 -- not within dbe char, or double_byte_encoding == False\n\ 1 -- pos is on the 1st half of a dbe char\n\ 2 -- pos is on the 2nd half of a dbe char"; static int Py_WithinDoubleByte(const unsigned char *str, int line_start, int pos) { int i; if ((str[pos] >= 0x40) && (str[pos] < 0x7f)) { //might be second half of big5, uhc or gbk encoding if (pos == line_start) return 0; if (str[pos-1] >= 0x81) { if ((Py_WithinDoubleByte(str, line_start, pos-1)) == 1) return 2; else return 0; } } if (str[pos] < 0x80) return 0; for (i=pos-1; i>=line_start; i--) if (str[i] < 0x80) break; if ((pos-i) & 1) return 1; else return 2; } static PyObject * within_double_byte(PyObject *self, PyObject *args) { const unsigned char *str; int line_start, pos; int ret; if (!PyArg_ParseTuple(args, "sii", &str, &line_start, &pos)) return NULL; ret = Py_WithinDoubleByte(str, line_start, pos); return Py_BuildValue("i", ret); } //====================================================================== char is_wide_char_doc[] = "is_wide_char(string/unicode text, int offs) -> bool iswide\n\n\ Test if the character at offs within text is wide.\n\n\ text -- string or unicode text\n\ offs -- offset"; static int Py_IsWideChar(PyObject *text, int offs) { const unsigned char *str; Py_UNICODE *ustr; int ret[2], str_len; if (PyUnicode_Check(text)) //text_py is unicode string { ustr = PyUnicode_AS_UNICODE(text); return (Py_GetWidth((long int)ustr[offs]) == 2); } str = (const unsigned char *)PyString_AsString(text); str_len = (int) PyString_Size(text); if (byte_encoding == ENC_UTF8) { Py_DecodeOne(str, str_len, offs, ret); return (Py_GetWidth(ret[0]) == 2); } if (byte_encoding == ENC_WIDE) return (Py_WithinDoubleByte(str, offs, offs) == 1); return 0; } static PyObject * is_wide_char(PyObject *self, PyObject *args) { PyObject *text; int offs; int ret; if (!PyArg_ParseTuple(args, "Oi", &text, &offs)) return NULL; ret = Py_IsWideChar(text, offs); return Py_BuildValue("O", to_bool(ret)); } //====================================================================== char move_prev_char_doc[] = "move_prev_char(string/unicode text, int start_offs, int end_offs) -> int pos\n\n\ Return the position of the character before end_offs.\n\n\ text -- string or unicode text\n\ start_offs -- start offset\n\ end_offs -- end offset"; static int Py_MovePrevChar(PyObject *text, int start_offs, int end_offs) { int position; unsigned char *str; if (PyUnicode_Check(text)) //text_py is unicode string return end_offs-1; else str = (unsigned char *)PyString_AsString(text); if (byte_encoding == ENC_UTF8) //encoding is utf8 { position = end_offs - 1; while ((str[position]&0xc0) == 0x80) position -=1; return position; } else if ((byte_encoding == ENC_WIDE) && (Py_WithinDoubleByte(str, start_offs, end_offs-1) == 2)) return end_offs-2; else return end_offs-1; } static PyObject * move_prev_char(PyObject *self, PyObject *args) { PyObject *text; int start_offs, end_offs; int ret; if (!PyArg_ParseTuple(args, "Oii", &text, &start_offs, &end_offs)) return NULL; ret = Py_MovePrevChar(text, start_offs, end_offs); return Py_BuildValue("i", ret); } //====================================================================== char move_next_char_doc[] = "move_next_char(string/unicode text, int start_offs, int end_offs) -> int pos\n\n\ Return the position of the character after start_offs.\n\n\ text -- string or unicode text\n\ start_offs -- start offset\n\ end_offs -- end offset"; static int Py_MoveNextChar(PyObject *text, int start_offs, int end_offs) { int position; unsigned char * str; if (PyUnicode_Check(text)) //text_py is unicode string return start_offs+1; else str = (unsigned char *)PyString_AsString(text); if (byte_encoding == ENC_UTF8) //encoding is utf8 { position = start_offs + 1; while ((position < end_offs) && ((str[position]&0xc0) == 0x80)) position +=1; return position; } else if ((byte_encoding == ENC_WIDE) && (Py_WithinDoubleByte(str, start_offs, start_offs) == 1)) return start_offs+2; else return start_offs+1; } static PyObject * move_next_char(PyObject *self, PyObject *args) { PyObject *text; int start_offs, end_offs; int ret; if (!PyArg_ParseTuple(args, "Oii", &text, &start_offs, &end_offs)) return NULL; ret = Py_MoveNextChar(text, start_offs, end_offs); return Py_BuildValue("i", ret); } //====================================================================== char calc_width_doc[] = "calc_width(string/unicode text, int start_off, int end_offs) -> int width\n\n\ Return the screen column width of text between start_offs and end_offs.\n\n\ text -- string or unicode text\n\ start_offs -- start offset\n\ end_offs -- end offset"; static int Py_CalcWidth(PyObject *text, int start_offs, int end_offs) { unsigned char * str; int i, screencols, ret[2], str_len; Py_UNICODE *ustr; if (PyUnicode_Check(text)) //text_py is unicode string { ustr = PyUnicode_AS_UNICODE(text); screencols = 0; for(i=start_offs; i pref_col) { ret[0] = i; ret[1] = screencols; return 0; } screencols += width; } ret[0] = i; ret[1] = screencols; return 0; } if (!PyString_Check(text)) { PyErr_SetString(PyExc_TypeError, "Neither unicode nor string."); return -1; } str = (unsigned char *)PyString_AsString(text); str_len = (int) PyString_Size(text); if (byte_encoding == ENC_UTF8) { i = start_offs; screencols = 0; while (i pref_col) { ret[0] = i; ret[1] = screencols; return 0; } i = dummy[1]; screencols += width; } ret[0] = i; ret[1] = screencols; return 0; } // "wide" and "narrow" i = start_offs + pref_col; if (i>= end_offs) { ret[0] = end_offs; ret[1] = end_offs - start_offs; return 0; } if (byte_encoding == ENC_WIDE) if (Py_WithinDoubleByte(str, start_offs, i)==2) i -= 1; ret[0] = i; ret[1] = i - start_offs; return 0; } static PyObject * calc_text_pos(PyObject *self, PyObject *args) { PyObject *text; int start_offs, end_offs, pref_col; int ret[2], err; if (!PyArg_ParseTuple(args, "Oiii", &text, &start_offs, &end_offs, &pref_col)) return NULL; err = Py_CalcTextPos(text, start_offs, end_offs, pref_col, ret); if (err==-1) //an error occured return NULL; return Py_BuildValue("(ii)", ret[0], ret[1]); } //====================================================================== static PyMethodDef Str_UtilMethods[] = { {"get_byte_encoding", get_byte_encoding, METH_VARARGS, get_byte_encoding_doc}, {"set_byte_encoding", set_byte_encoding, METH_VARARGS, set_byte_encoding_doc}, {"get_width", get_width, METH_VARARGS, get_width_doc}, {"decode_one", decode_one, METH_VARARGS, decode_one_doc}, {"decode_one_right", decode_one_right, METH_VARARGS, decode_one_right_doc}, {"within_double_byte", within_double_byte, METH_VARARGS, within_double_byte_doc}, {"is_wide_char", is_wide_char, METH_VARARGS, is_wide_char_doc}, {"move_prev_char", move_prev_char, METH_VARARGS, move_prev_char_doc}, {"move_next_char", move_next_char, METH_VARARGS, move_next_char_doc}, {"calc_width", calc_width, METH_VARARGS, calc_width_doc}, {"calc_text_pos", calc_text_pos, METH_VARARGS, calc_text_pos_doc}, {NULL, NULL, 0, NULL} // Sentinel }; PyMODINIT_FUNC initstr_util(void) { Py_InitModule("str_util", Str_UtilMethods); } int main(int argc, char *argv[]) { //Pass argv[0] to the Python interpreter: Py_SetProgramName(argv[0]); //Initialize the Python interpreter. Py_Initialize(); //Add a static module: initstr_util(); return 0; }