Glib::ustring Class Reference
Glib::ustring has much the same interface as std::string, but contains Unicode characters encoded as UTF-8. More...Public Types | |
typedef std::string::size_type | size_type |
typedef std::string::difference_type | difference_type |
typedef gunichar | value_type |
typedef gunichar& | reference |
typedef const gunichar& | const_reference |
typedef ustring_Iterator< std::string::iterator > | iterator |
typedef ustring_Iterator< std::string::const_iterator > | const_iterator |
typedef std::reverse_iterator< iterator > | reverse_iterator |
typedef std::reverse_iterator< const_iterator > | const_reverse_iterator |
Public Member Functions | |
ustring () | |
~ustring () | |
ustring (const ustring& other) | |
ustring& | operator= (const ustring& other) |
void | swap (ustring& other) |
ustring (const std::string& src) | |
ustring (const ustring& src, size_type i, size_type n=npos) | |
ustring (const char* src, size_type n) | |
ustring (const char* src) | |
ustring (size_type n, gunichar uc) | |
ustring (size_type n, char c) | |
template<class In> | |
ustring (In pbegin, In pend) | |
Assign new contents. | |
ustring& | operator= (const std::string& src) |
ustring& | operator= (const char* src) |
ustring& | operator= (gunichar uc) |
ustring& | operator= (char c) |
ustring& | assign (const ustring& src) |
ustring& | assign (const ustring& src, size_type i, size_type n) |
ustring& | assign (const char* src, size_type n) |
ustring& | assign (const char* src) |
ustring& | assign (size_type n, gunichar uc) |
ustring& | assign (size_type n, char c) |
template<class In> | |
ustring& | assign (In pbegin, In pend) |
Append to the string. | |
ustring& | operator+= (const ustring& src) |
ustring& | operator+= (const char* src) |
ustring& | operator+= (gunichar uc) |
ustring& | operator+= (char c) |
void | push_back (gunichar uc) |
void | push_back (char c) |
ustring& | append (const ustring& src) |
ustring& | append (const ustring& src, size_type i, size_type n) |
ustring& | append (const char* src, size_type n) |
ustring& | append (const char* src) |
ustring& | append (size_type n, gunichar uc) |
ustring& | append (size_type n, char c) |
template<class In> | |
ustring& | append (In pbegin, In pend) |
Insert into the string. | |
ustring& | insert (size_type i, const ustring& src) |
ustring& | insert (size_type i, const ustring& src, size_type i2, size_type n) |
ustring& | insert (size_type i, const char* src, size_type n) |
ustring& | insert (size_type i, const char* src) |
ustring& | insert (size_type i, size_type n, gunichar uc) |
ustring& | insert (size_type i, size_type n, char c) |
iterator | insert (iterator p, gunichar uc) |
iterator | insert (iterator p, char c) |
void | insert (iterator p, size_type n, gunichar uc) |
void | insert (iterator p, size_type n, char c) |
template<class In> | |
void | insert (iterator p, In pbegin, In pend) |
Replace sub-strings. | |
ustring& | replace (size_type i, size_type n, const ustring& src) |
ustring& | replace (size_type i, size_type n, const ustring& src, size_type i2, size_type n2) |
ustring& | replace (size_type i, size_type n, const char* src, size_type n2) |
ustring& | replace (size_type i, size_type n, const char* src) |
ustring& | replace (size_type i, size_type n, size_type n2, gunichar uc) |
ustring& | replace (size_type i, size_type n, size_type n2, char c) |
ustring& | replace (iterator pbegin, iterator pend, const ustring& src) |
ustring& | replace (iterator pbegin, iterator pend, const char* src, size_type n) |
ustring& | replace (iterator pbegin, iterator pend, const char* src) |
ustring& | replace (iterator pbegin, iterator pend, size_type n, gunichar uc) |
ustring& | replace (iterator pbegin, iterator pend, size_type n, char c) |
template<class In> | |
ustring& | replace (iterator pbegin, iterator pend, In pbegin2, In pend2) |
Erase sub-strings. | |
void | clear () |
ustring& | erase (size_type i, size_type n=npos) |
ustring& | erase () |
iterator | erase (iterator p) |
iterator | erase (iterator pbegin, iterator pend) |
Compare and collate. | |
int | compare (const ustring& rhs) const |
int | compare (const char* rhs) const |
int | compare (size_type i, size_type n, const ustring& rhs) const |
int | compare (size_type i, size_type n, const ustring& rhs, size_type i2, size_type n2) const |
int | compare (size_type i, size_type n, const char* rhs, size_type n2) const |
int | compare (size_type i, size_type n, const char* rhs) const |
std::string | collate_key () const |
std::string | casefold_collate_key () const |
Extract characters and sub-strings. | |
value_type | operator[] (size_type i) const |
value_type | at (size_type i) const |
ustring | substr (size_type i=0, size_type n=npos) const |
Access a sequence of characters. | |
iterator | begin () |
iterator | end () |
const_iterator | begin () const |
const_iterator | end () const |
reverse_iterator | rbegin () |
reverse_iterator | rend () |
const_reverse_iterator | rbegin () const |
const_reverse_iterator | rend () const |
Find sub-strings. | |
size_type | find (const ustring& str, size_type i=0) const |
size_type | find (const char* str, size_type i, size_type n) const |
size_type | find (const char* str, size_type i=0) const |
size_type | find (gunichar uc, size_type i=0) const |
size_type | find (char c, size_type i=0) const |
size_type | rfind (const ustring& str, size_type i=npos) const |
size_type | rfind (const char* str, size_type i, size_type n) const |
size_type | rfind (const char* str, size_type i=npos) const |
size_type | rfind (gunichar uc, size_type i=npos) const |
size_type | rfind (char c, size_type i=npos) const |
Match against a set of characters. | |
size_type | find_first_of (const ustring& match, size_type i=0) const |
size_type | find_first_of (const char* match, size_type i, size_type n) const |
size_type | find_first_of (const char* match, size_type i=0) const |
size_type | find_first_of (gunichar uc, size_type i=0) const |
size_type | find_first_of (char c, size_type i=0) const |
size_type | find_last_of (const ustring& match, size_type i=npos) const |
size_type | find_last_of (const char* match, size_type i, size_type n) const |
size_type | find_last_of (const char* match, size_type i=npos) const |
size_type | find_last_of (gunichar uc, size_type i=npos) const |
size_type | find_last_of (char c, size_type i=npos) const |
size_type | find_first_not_of (const ustring& match, size_type i=0) const |
size_type | find_first_not_of (const char* match, size_type i, size_type n) const |
size_type | find_first_not_of (const char* match, size_type i=0) const |
size_type | find_first_not_of (gunichar uc, size_type i=0) const |
size_type | find_first_not_of (char c, size_type i=0) const |
size_type | find_last_not_of (const ustring& match, size_type i=npos) const |
size_type | find_last_not_of (const char* match, size_type i, size_type n) const |
size_type | find_last_not_of (const char* match, size_type i=npos) const |
size_type | find_last_not_of (gunichar uc, size_type i=npos) const |
size_type | find_last_not_of (char c, size_type i=npos) const |
Retrieve the string's size. | |
bool | empty () const |
Returns true if the string is empty. | |
size_type | size () const |
Returns the number of characters in the string, not including any null-termination. | |
size_type | length () const |
This is the same as size(). | |
size_type | bytes () const |
Returns the number of bytes in the string, not including any null-termination. | |
Change the string's size. | |
void | resize (size_type n, gunichar uc) |
void | resize (size_type n, char c='\0') |
Control the allocated memory. | |
size_type | capacity () const |
size_type | max_size () const |
void | reserve (size_type n=0) |
Get a per-byte representation of the string. | |
operator std::string () const | |
const std::string& | raw () const |
const char* | data () const |
const char* | c_str () const |
size_type | copy (char* dest, size_type n, size_type i=0) const |
UTF-8 utilities. | |
bool | validate () const |
bool | validate (iterator& first_invalid) |
bool | validate (const_iterator& first_invalid) const |
bool | is_ascii () const |
ustring | normalize (NormalizeMode mode=NORMALIZE_DEFAULT_COMPOSE) const |
Character case conversion. | |
ustring | uppercase () const |
ustring | lowercase () const |
ustring | casefold () const |
Static Public Attributes | |
static GLIBMM_API const size_type | npos |
Related Functions | |
(Note that these are not member functions.) | |
std::istream& | operator>> (std::istream& is, Glib::ustring& utf8_string) |
Stream input operator. | |
std::ostream& | operator<< (std::ostream& os, const Glib::ustring& utf8_string) |
Stream output operator. | |
void | swap (ustring& lhs, ustring& rhs) |
bool | operator== (const ustring& lhs, const ustring& rhs) |
bool | operator== (const ustring& lhs, const char* rhs) |
bool | operator== (const char* lhs, const ustring& rhs) |
bool | operator!= (const ustring& lhs, const ustring& rhs) |
bool | operator!= (const ustring& lhs, const char* rhs) |
bool | operator!= (const char* lhs, const ustring& rhs) |
bool | operator< (const ustring& lhs, const ustring& rhs) |
bool | operator< (const ustring& lhs, const char* rhs) |
bool | operator< (const char* lhs, const ustring& rhs) |
bool | operator> (const ustring& lhs, const ustring& rhs) |
bool | operator> (const ustring& lhs, const char* rhs) |
bool | operator> (const char* lhs, const ustring& rhs) |
bool | operator<= (const ustring& lhs, const ustring& rhs) |
bool | operator<= (const ustring& lhs, const char* rhs) |
bool | operator<= (const char* lhs, const ustring& rhs) |
bool | operator>= (const ustring& lhs, const ustring& rhs) |
bool | operator>= (const ustring& lhs, const char* rhs) |
bool | operator>= (const char* lhs, const ustring& rhs) |
ustring | operator+ (const ustring& lhs, const ustring& rhs) |
ustring | operator+ (const ustring& lhs, const char* rhs) |
ustring | operator+ (const char* lhs, const ustring& rhs) |
ustring | operator+ (const ustring& lhs, gunichar rhs) |
ustring | operator+ (gunichar lhs, const ustring& rhs) |
ustring | operator+ (const ustring& lhs, char rhs) |
ustring | operator+ (char lhs, const ustring& rhs) |
Detailed Description
Glib::ustring has much the same interface as std::string, but contains Unicode characters encoded as UTF-8.
- About UTF-8 and ASCII
- The standard character set ANSI_X3.4-1968 -- more commonly known as ASCII -- is a subset of UTF-8. So, if you want to, you can use Glib::ustring without even thinking about UTF-8.
- Whenever ASCII is mentioned in this manual, we mean the real ASCII (i.e. as defined in ANSI_X3.4-1968), which contains only 7-bit characters. Glib::ustring can not be used with ASCII-compatible extended 8-bit charsets like ISO-8859-1. It's a good idea to avoid string literals containing non-ASCII characters (e.g. German umlauts) in source code, or at least you should use UTF-8 literals.
- You can find a detailed UTF-8 and Unicode FAQ here: http://www.cl.cam.ac.uk/~mgk25/unicode.html
- Glib::ustring vs. std::string
- Glib::ustring has implicit type conversions to and from std::string. These conversions do not convert to/from the current locale (see Glib::locale_from_utf8() and Glib::locale_to_utf8() if you need that). You can always use std::string instead of Glib::ustring -- however, using std::string with multi-byte characters is quite hard. For instance,
std::string::operator[]
might return a byte in the middle of a character, andstd::string::length()
returns the number of bytes rather than characters. So don't do that without a good reason.
- In a perfect world the C++ Standard Library would contain a UTF-8 string class. Unfortunately, the C++ standard doesn't mention UTF-8 at all. Note that std::wstring is not a UTF-8 string class because it contains only fixed-width characters (where width could be 32, 16, or even 8 bits).
- Glib::ustring and stream input/output
- The stream I/O operators, that is operator<<() and operator>>(), perform implicit charset conversion to/from the current locale. If that's not what you intented (e.g. when writing to a configuration file that should always be UTF-8 encoded) use ustring::raw() to override this behaviour.
- If you're using std::ostringstream to build strings for display in the user interface, you must convert the result back to UTF-8 as shown below:
std::ostringstream output; output.imbue(std::locale("")); // use the user's locale for this stream output << percentage << " % done"; label->set_text(Glib::locale_to_utf8(output.str()));
- Implementation notes
- Glib::ustring does not inherit from std::string, because std::string was intended to be a final class. For instance, it does not have a virtual destructor. Also, a HAS-A relationship is more appropriate because ustring can't just enhance the std::string interface. Rather, it has to reimplement the interface so that all operations are based on characters instead of bytes.
Member Typedef Documentation
typedef gunichar Glib::ustring::value_type |
typedef gunichar& Glib::ustring::reference |
typedef const gunichar& Glib::ustring::const_reference |
Constructor & Destructor Documentation
Glib::ustring::ustring | ( | ) |
Default constructor, which creates an empty string.
Glib::ustring::~ustring | ( | ) |
Glib::ustring::ustring | ( | const ustring& | other | ) |
Construct a ustring as a copy of another ustring.
- Parameters:
-
other A source string.
Glib::ustring::ustring | ( | const std::string & | src | ) |
Construct a ustring as a copy of another std::string.
- Parameters:
-
src A source std::string
containing text encoded as UTF-8.
Construct a ustring as a copy of a substring.
- Parameters:
-
src Source ustring. i Index of first character to copy from. n Number of UTF-8 characters to copy (defaults to copying the remainder).
Glib::ustring::ustring | ( | const char * | src, | |
size_type | n | |||
) |
Construct a ustring as a partial copy of a C string.
- Parameters:
-
src Source C string encoded as UTF-8. n Number of UTF-8 characters to copy.
Glib::ustring::ustring | ( | const char * | src | ) |
Construct a ustring as a copy of a C string.
- Parameters:
-
src Source C string encoded as UTF-8.
Glib::ustring::ustring | ( | size_type | n, | |
gunichar | uc | |||
) |
Construct a ustring as multiple characters.
- Parameters:
-
n Number of characters. uc UCS-4 code point to use.
Glib::ustring::ustring | ( | size_type | n, | |
char | c | |||
) |
Construct a ustring as multiple characters.
- Parameters:
-
n Number of characters. c ASCII character to use.
Glib::ustring::ustring | ( | In | pbegin, | |
In | pend | |||
) |
Construct a ustring as a copy of a range.
- Parameters:
-
pbegin Start of range. pend End of range.
Member Function Documentation
Assign the value of another string to this string.
- Parameters:
-
other A source string.
void Glib::ustring::swap | ( | ustring& | other | ) |
Swap contents with another string.
- Parameters:
-
other String to swap with.
ustring& Glib::ustring::operator= | ( | const std::string & | src | ) |
ustring& Glib::ustring::operator= | ( | const char * | src | ) |
ustring& Glib::ustring::operator= | ( | gunichar | uc | ) |
ustring& Glib::ustring::operator= | ( | char | c | ) |
ustring& Glib::ustring::assign | ( | const char * | src | ) |
ustring& Glib::ustring::assign | ( | In | pbegin, | |
In | pend | |||
) |
ustring& Glib::ustring::operator+= | ( | const char * | src | ) |
ustring& Glib::ustring::operator+= | ( | gunichar | uc | ) |
ustring& Glib::ustring::operator+= | ( | char | c | ) |
void Glib::ustring::push_back | ( | gunichar | uc | ) |
void Glib::ustring::push_back | ( | char | c | ) |
ustring& Glib::ustring::append | ( | const char * | src | ) |
ustring& Glib::ustring::append | ( | In | pbegin, | |
In | pend | |||
) |
void Glib::ustring::insert | ( | iterator | p, | |
In | pbegin, | |||
In | pend | |||
) |
ustring& Glib::ustring::replace | ( | size_type | i, | |
size_type | n, | |||
const ustring& | src, | |||
size_type | i2, | |||
size_type | n2 | |||
) |
ustring& Glib::ustring::replace | ( | iterator | pbegin, | |
iterator | pend, | |||
In | pbegin2, | |||
In | pend2 | |||
) |
void Glib::ustring::clear | ( | ) |
ustring& Glib::ustring::erase | ( | ) |
int Glib::ustring::compare | ( | const ustring& | rhs | ) | const |
int Glib::ustring::compare | ( | const char * | rhs | ) | const |
int Glib::ustring::compare | ( | size_type | i, | |
size_type | n, | |||
const ustring& | rhs, | |||
size_type | i2, | |||
size_type | n2 | |||
) | const |
std::string Glib::ustring::collate_key | ( | ) | const |
Create a unique sorting key for the UTF-8 string. If you need to compare UTF-8 strings regularly, e.g. for sorted containers such as std::set<>
, you should consider creating a collate key first and compare this key instead of the actual string.
The ustring::compare() methods as well as the relational operators == != < > <= >=
are quite costly because they have to deal with Unicode and the collation rules defined by the current locale. Converting both operands to UCS-4 is just the first of several costly steps involved when comparing ustrings. So be careful.
std::string Glib::ustring::casefold_collate_key | ( | ) | const |
Create a unique key for the UTF-8 string that can be used for caseless sorting. ustr.casefold_collate_key()
results in the same string as ustr.casefold().collate_key()
, but the former is likely more efficient.
value_type Glib::ustring::operator[] | ( | size_type | i | ) | const |
No reference return; use replace() to write characters.
value_type Glib::ustring::at | ( | size_type | i | ) | const |
No reference return; use replace() to write characters.
- Exceptions:
-
std::out_of_range
iterator Glib::ustring::begin | ( | ) |
iterator Glib::ustring::end | ( | ) |
const_iterator Glib::ustring::begin | ( | ) | const |
const_iterator Glib::ustring::end | ( | ) | const |
reverse_iterator Glib::ustring::rbegin | ( | ) |
reverse_iterator Glib::ustring::rend | ( | ) |
const_reverse_iterator Glib::ustring::rbegin | ( | ) | const |
const_reverse_iterator Glib::ustring::rend | ( | ) | const |
bool Glib::ustring::empty | ( | ) | const |
Returns true if the string is empty.
Equivalent to *this == "".
- Returns:
- Whether the string is empty.
size_type Glib::ustring::size | ( | ) | const |
size_type Glib::ustring::bytes | ( | ) | const |
void Glib::ustring::resize | ( | size_type | n, | |
gunichar | uc | |||
) |
void Glib::ustring::resize | ( | size_type | n, | |
char | c = '\0' | |||
) |
size_type Glib::ustring::capacity | ( | ) | const |
size_type Glib::ustring::max_size | ( | ) | const |
void Glib::ustring::reserve | ( | size_type | n = 0 |
) |
Glib::ustring::operator std::string | ( | ) | const [inline] |
const std::string& Glib::ustring::raw | ( | ) | const [inline] |
const char* Glib::ustring::data | ( | ) | const |
const char* Glib::ustring::c_str | ( | ) | const |
- Returns:
- Number of copied bytes, not characters.
bool Glib::ustring::validate | ( | ) | const |
Check whether the string is valid UTF-8.
bool Glib::ustring::validate | ( | iterator& | first_invalid | ) |
Check whether the string is valid UTF-8.
bool Glib::ustring::validate | ( | const_iterator& | first_invalid | ) | const |
Check whether the string is valid UTF-8.
bool Glib::ustring::is_ascii | ( | ) | const |
Check whether the string is plain 7-bit ASCII.
- Unlike any other ustring method, is_ascii() is safe to use on invalid UTF-8 strings. If the string isn't valid UTF-8, it cannot be valid ASCII either, therefore is_ascii() will just return
false
then.
- Returns:
- Whether the string contains only ASCII characters.
ustring Glib::ustring::normalize | ( | NormalizeMode | mode = NORMALIZE_DEFAULT_COMPOSE |
) | const |
"Normalize" the Unicode character representation of the string.
ustring Glib::ustring::uppercase | ( | ) | const |
Returns a new UTF-8 string with all characters characters converted to their uppercase equivalent, while honoring the current locale. The resulting string may change in the number of bytes as well as in the number of characters. For instance, the German sharp s "ß"
will be replaced by two characters "SS"
because there is no capital "ß"
.
ustring Glib::ustring::lowercase | ( | ) | const |
Returns a new UTF-8 string with all characters characters converted to their lowercase equivalent, while honoring the current locale. The resulting string may change in the number of bytes as well as in the number of characters.
ustring Glib::ustring::casefold | ( | ) | const |
Returns a caseless representation of the UTF-8 string. The resulting string doesn't correspond to any particular case, therefore the result is only useful to compare strings and should never be displayed to the user.
Friends And Related Function Documentation
std::istream & operator>> | ( | std::istream & | is, | |
Glib::ustring& | utf8_string | |||
) | [related] |
std::ostream & operator<< | ( | std::ostream & | os, | |
const Glib::ustring& | utf8_string | |||
) | [related] |
bool operator== | ( | const ustring& | lhs, | |
const char * | rhs | |||
) | [related] |
bool operator== | ( | const char * | lhs, | |
const ustring& | rhs | |||
) | [related] |
bool operator!= | ( | const ustring& | lhs, | |
const char * | rhs | |||
) | [related] |
bool operator!= | ( | const char * | lhs, | |
const ustring& | rhs | |||
) | [related] |
bool operator< | ( | const ustring& | lhs, | |
const char * | rhs | |||
) | [related] |
bool operator< | ( | const char * | lhs, | |
const ustring& | rhs | |||
) | [related] |
bool operator> | ( | const ustring& | lhs, | |
const char * | rhs | |||
) | [related] |
bool operator> | ( | const char * | lhs, | |
const ustring& | rhs | |||
) | [related] |
bool operator<= | ( | const ustring& | lhs, | |
const char * | rhs | |||
) | [related] |
bool operator<= | ( | const char * | lhs, | |
const ustring& | rhs | |||
) | [related] |
bool operator>= | ( | const ustring& | lhs, | |
const char * | rhs | |||
) | [related] |
bool operator>= | ( | const char * | lhs, | |
const ustring& | rhs | |||
) | [related] |
Member Data Documentation
GLIBMM_API const size_type Glib::ustring::npos [static] |
The documentation for this class was generated from the following file: