.\" Copyright (C) 2001 Information-technology Promotion Agency (IPA) .\" Copyright (C) 2001-2003 .\" National Institute of Advanced Industrial Science and Technology (AIST) .\" This file si aprt of the m17n library documentation. .\" Permission is granted to copy, distribute and/or modify this document .\" under the terms of the GNU Free Documentation License, Version 1.2 or .\" any later version published by the Free Software Foundation; with no .\" Invariant Section, Front-Cover Texts "The m17n library documentation", .\" and no Back-Cover Texts. A copy of the license is included in the .\" appendix entitled "GNU Free Documentation License". .TH "M-text" 3m17n "14 Jul 2007" "" "Version 1.4.0" "" "The m17n Library" \" -*- nroff -*- .ad l .nh .SH NAME M-text \- M-text objects and API for them. .PP .SS "Variables: Default Endian of UTF-16 and UTF-32" .in +1c .ti -1c .RI "enum \fBMTextFormat\fP \fBMTEXT_FORMAT_UTF_16\fP" .br .RI "\fIVariable of value MTEXT_FORMAT_UTF_16LE or MTEXT_FORMAT_UTF_16BE. \fP" .ti -1c .RI "const int \fBMTEXT_FORMAT_UTF_32\fP" .br .RI "\fIVariable of value MTEXT_FORMAT_UTF_32LE or MTEXT_FORMAT_UTF_32BE. \fP" .in -1c .SS "Typedefs" .in +1c .ti -1c .RI "typedef \fBMText\fP \fBMText\fP" .br .RI "\fIType of \fIM-texts\fP. \fP" .in -1c .SS "Enumerations" .in +1c .ti -1c .RI "enum \fBMTextFormat\fP { \fBMTEXT_FORMAT_US_ASCII\fP, \fBMTEXT_FORMAT_UTF_8\fP, \fBMTEXT_FORMAT_UTF_16LE\fP, \fBMTEXT_FORMAT_UTF_16BE\fP, \fBMTEXT_FORMAT_UTF_32LE\fP, \fBMTEXT_FORMAT_UTF_32BE\fP, \fBMTEXT_FORMAT_MAX\fP }" .br .RI "\fIEnumeration for specifying the format of an M-text. \fP" .in -1c .SS "Functions" .in +1c .ti -1c .RI "int \fBmtext_line_break\fP (\fBMText\fP *mt, int pos, int option, int *after)" .br .RI "\fIFind a linebreak postion of an M-text. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext\fP ()" .br .RI "\fIAllocate a new M-text. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_from_data\fP (const void *data, int nitems, enum \fBMTextFormat\fP format)" .br .RI "\fIAllocate a new M-text with specified data. \fP" .ti -1c .RI "void * \fBmtext_data\fP (\fBMText\fP *mt, enum \fBMTextFormat\fP *fmt, int *nunits, int *pos_idx, int *unit_idx)" .br .RI "\fIGet information about the text data in M-text. \fP" .ti -1c .RI "int \fBmtext_len\fP (\fBMText\fP *mt)" .br .RI "\fINumber of characters in M-text. \fP" .ti -1c .RI "int \fBmtext_ref_char\fP (\fBMText\fP *mt, int pos)" .br .RI "\fIReturn the character at the specified position in an M-text. \fP" .ti -1c .RI "int \fBmtext_set_char\fP (\fBMText\fP *mt, int pos, int c)" .br .RI "\fIStore a character into an M-text. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_cat_char\fP (\fBMText\fP *mt, int c)" .br .RI "\fIAppend a character to an M-text. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_dup\fP (\fBMText\fP *mt)" .br .RI "\fICreate a copy of an M-text. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_cat\fP (\fBMText\fP *mt1, \fBMText\fP *mt2)" .br .RI "\fIAppend an M-text to another. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_ncat\fP (\fBMText\fP *mt1, \fBMText\fP *mt2, int n)" .br .RI "\fIAppend a part of an M-text to another. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_cpy\fP (\fBMText\fP *mt1, \fBMText\fP *mt2)" .br .RI "\fICopy an M-text to another. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_ncpy\fP (\fBMText\fP *mt1, \fBMText\fP *mt2, int n)" .br .RI "\fICopy the first some characters in an M-text to another. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_duplicate\fP (\fBMText\fP *mt, int from, int to)" .br .RI "\fICreate a new M-text from a part of an existing M-text. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_copy\fP (\fBMText\fP *mt1, int pos, \fBMText\fP *mt2, int from, int to)" .br .RI "\fICopy characters in the specified range into an M-text. \fP" .ti -1c .RI "int \fBmtext_del\fP (\fBMText\fP *mt, int from, int to)" .br .RI "\fIDelete characters in the specified range destructively. \fP" .ti -1c .RI "int \fBmtext_ins\fP (\fBMText\fP *mt1, int pos, \fBMText\fP *mt2)" .br .RI "\fIInsert an M-text into another M-text. \fP" .ti -1c .RI "int \fBmtext_insert\fP (\fBMText\fP *mt1, int pos, \fBMText\fP *mt2, int from, int to)" .br .RI "\fIInsert sub-text of an M-text into another M-text. \fP" .ti -1c .RI "int \fBmtext_ins_char\fP (\fBMText\fP *mt, int pos, int c, int n)" .br .RI "\fIInsert a character into an M-text. \fP" .ti -1c .RI "int \fBmtext_replace\fP (\fBMText\fP *mt1, int from1, int to1, \fBMText\fP *mt2, int from2, int to2)" .br .RI "\fIReplace sub-text of M-text with another. \fP" .ti -1c .RI "int \fBmtext_character\fP (\fBMText\fP *mt, int from, int to, int c)" .br .RI "\fISearch a character in an M-text. \fP" .ti -1c .RI "int \fBmtext_chr\fP (\fBMText\fP *mt, int c)" .br .RI "\fIReturn the position of the first occurrence of a character in an M-text. \fP" .ti -1c .RI "int \fBmtext_rchr\fP (\fBMText\fP *mt, int c)" .br .RI "\fIReturn the position of the last occurrence of a character in an M-text. \fP" .ti -1c .RI "int \fBmtext_cmp\fP (\fBMText\fP *mt1, \fBMText\fP *mt2)" .br .RI "\fICompare two M-texts character-by-character. \fP" .ti -1c .RI "int \fBmtext_ncmp\fP (\fBMText\fP *mt1, \fBMText\fP *mt2, int n)" .br .RI "\fICompare initial parts of two M-texts character-by-character. \fP" .ti -1c .RI "int \fBmtext_compare\fP (\fBMText\fP *mt1, int from1, int to1, \fBMText\fP *mt2, int from2, int to2)" .br .RI "\fICompare specified regions of two M-texts. \fP" .ti -1c .RI "int \fBmtext_spn\fP (\fBMText\fP *mt, \fBMText\fP *accept)" .br .RI "\fISearch an M-text for a set of characters. \fP" .ti -1c .RI "int \fBmtext_cspn\fP (\fBMText\fP *mt, \fBMText\fP *reject)" .br .RI "\fISearch an M-text for the complement of a set of characters. \fP" .ti -1c .RI "int \fBmtext_pbrk\fP (\fBMText\fP *mt, \fBMText\fP *accept)" .br .RI "\fISearch an M-text for any of a set of characters. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_tok\fP (\fBMText\fP *mt, \fBMText\fP *delim, int *pos)" .br .RI "\fILook for a token in an M-text. \fP" .ti -1c .RI "int \fBmtext_text\fP (\fBMText\fP *mt1, int pos, \fBMText\fP *mt2)" .br .RI "\fILocate an M-text in another. \fP" .ti -1c .RI "int \fBmtext_search\fP (\fBMText\fP *mt1, int from, int to, \fBMText\fP *mt2)" .br .RI "\fILocate an M-text in a specific range of another. \fP" .ti -1c .RI "int \fBmtext_casecmp\fP (\fBMText\fP *mt1, \fBMText\fP *mt2)" .br .RI "\fICompare two M-texts ignoring cases. \fP" .ti -1c .RI "int \fBmtext_ncasecmp\fP (\fBMText\fP *mt1, \fBMText\fP *mt2, int n)" .br .RI "\fICompare initial parts of two M-texts ignoring cases. \fP" .ti -1c .RI "int \fBmtext_case_compare\fP (\fBMText\fP *mt1, int from1, int to1, \fBMText\fP *mt2, int from2, int to2)" .br .RI "\fICompare specified regions of two M-texts ignoring cases. \fP" .ti -1c .RI "int \fBmtext_lowercase\fP (\fBMText\fP *mt)" .br .RI "\fILowercase an M-text. \fP" .ti -1c .RI "int \fBmtext_titlecase\fP (\fBMText\fP *mt)" .br .RI "\fITitlecase an M-text. \fP" .ti -1c .RI "int \fBmtext_uppercase\fP (\fBMText\fP *mt)" .br .RI "\fIUppercase an M-text. \fP" .in -1c .SS "Variables" .in +1c .ti -1c .RI "\fBMSymbol\fP \fBMlanguage\fP" .br .in -1c .SH "Detailed Description" .PP In the m17n library, text is represented as an object called \fIM-text\fP rather than as a C-string (\fCchar *\fP or \fCunsigned char *\fP). An M-text is a sequence of characters whose length is equals to or more than 0, and can be coined from various character sources, e.g. C-strings, files, character codes, etc. .PP M-texts are more useful than C-strings in the following points. .PP .PD 0 .TP M-texts can handle mixture of characters of various scripts, including all Unicode characters and more. This is an indispensable facility when handling multilingual text. .PD 0 .TP Each character in an M-text can have properties called \fItext\fP \fIproperties\fP. Text properties store various kinds of information attached to parts of an M-text to provide application programs with a unified view of those information. As rich information can be stored in M-texts in the form of text properties, functions in application programs can be simple. In addition, the library provides many functions to manipulate an M-text just the same way as a C-string. .SH "Typedef Documentation" .PP .SS "typedef struct \fBMText\fP \fBMText\fP" .PP The type \fBMText\fP is for an \fIM-text\fP object. Its internal structure is concealed from application programs. .SH "Enumeration Type Documentation" .PP .SS "enum \fBMTextFormat\fP" .PP The enum \fBMTextFormat\fP is used as an argument of the \fBmtext_from_data()\fP function to specify the format of data from which an M-text is created. .PP \fBEnumerator: \fP .in +1c .TP \fB\fIMTEXT_FORMAT_US_ASCII \fP\fP \fB\fIMTEXT_FORMAT_UTF_8 \fP\fP .TP \fB\fIMTEXT_FORMAT_UTF_16LE \fP\fP \fB\fIMTEXT_FORMAT_UTF_16BE \fP\fP .TP \fB\fIMTEXT_FORMAT_UTF_32LE \fP\fP \fB\fIMTEXT_FORMAT_UTF_32BE \fP\fP .TP \fB\fIMTEXT_FORMAT_MAX \fP\fP .SH "Variable Documentation" .PP .SS "enum \fBMTextFormat\fP \fBMTEXT_FORMAT_UTF_16\fP" .PP The global variable \fBMTEXT_FORMAT_UTF_16\fP is initialized to \fBMTEXT_FORMAT_UTF_16LE\fP on a 'Little Endian' system (storing words with the least significant byte first), and to \fBMTEXT_FORMAT_UTF_16BE\fP on a 'Big Endian' system (storing words with the most significant byte first). .PP \fBSee Also:\fP.RS 4 \fBmtext_from_data()\fP .RE .PP .SS "const int \fBMTEXT_FORMAT_UTF_32\fP" .PP The global variable \fBMTEXT_FORMAT_UTF_32\fP is initialized to \fBMTEXT_FORMAT_UTF_32LE\fP on a 'Little Endian' system (storing words with the least significant byte first), and to \fBMTEXT_FORMAT_UTF_32BE\fP on a 'Big Endian' system (storing words with the most significant byte first). .PP \fBSee Also:\fP.RS 4 \fBmtext_from_data()\fP .RE .PP .SS "\fBMSymbol\fP \fBMlanguage\fP" .PP The symbol whose name is 'language'.