Glib::ustring Class Reference
Glib::ustring has much the same interface as std::string, but contains Unicode characters encoded as UTF-8. More...
Public Types | |
| typedef std::string::size_type | size_type |
| typedef std::string::difference_type | difference_type |
| typedef gunichar | value_type |
| typedef gunichar& | reference |
| typedef const gunichar& | const_reference |
| typedef ustring_Iterator <std::string::iterator> | iterator |
| typedef ustring_Iterator <std::string::const_iterator> | const_iterator |
| typedef std::reverse_iterator <iterator> | reverse_iterator |
| typedef std::reverse_iterator <const_iterator> | const_reverse_iterator |
Public Member Functions | |
| ustring () | |
| ~ustring () | |
| ustring (const ustring& other) | |
| ustring& | operator= (const ustring& other) |
| void | swap (ustring& other) |
| ustring (const std::string& src) | |
| ustring (const ustring& src, size_type i, size_type n=npos) | |
| ustring (const char* src, size_type n) | |
| ustring (const char* src) | |
| ustring (size_type n, gunichar uc) | |
| ustring (size_type n, char c) | |
| template<class In> | |
| ustring (In pbegin, In pend) | |
Assign new contents. | |
| ustring& | operator= (const std::string& src) |
| ustring& | operator= (const char* src) |
| ustring& | operator= (gunichar uc) |
| ustring& | operator= (char c) |
| ustring& | assign (const ustring& src) |
| ustring& | assign (const ustring& src, size_type i, size_type n) |
| ustring& | assign (const char* src, size_type n) |
| ustring& | assign (const char* src) |
| ustring& | assign (size_type n, gunichar uc) |
| ustring& | assign (size_type n, char c) |
| template<class In> | |
| ustring& | assign (In pbegin, In pend) |
Append to the string. | |
| ustring& | operator+= (const ustring& src) |
| ustring& | operator+= (const char* src) |
| ustring& | operator+= (gunichar uc) |
| ustring& | operator+= (char c) |
| void | push_back (gunichar uc) |
| void | push_back (char c) |
| ustring& | append (const ustring& src) |
| ustring& | append (const ustring& src, size_type i, size_type n) |
| ustring& | append (const char* src, size_type n) |
| ustring& | append (const char* src) |
| ustring& | append (size_type n, gunichar uc) |
| ustring& | append (size_type n, char c) |
| template<class In> | |
| ustring& | append (In pbegin, In pend) |
Insert into the string. | |
| ustring& | insert (size_type i, const ustring& src) |
| ustring& | insert (size_type i, const ustring& src, size_type i2, size_type n) |
| ustring& | insert (size_type i, const char* src, size_type n) |
| ustring& | insert (size_type i, const char* src) |
| ustring& | insert (size_type i, size_type n, gunichar uc) |
| ustring& | insert (size_type i, size_type n, char c) |
| iterator | insert (iterator p, gunichar uc) |
| iterator | insert (iterator p, char c) |
| void | insert (iterator p, size_type n, gunichar uc) |
| void | insert (iterator p, size_type n, char c) |
| template<class In> | |
| void | insert (iterator p, In pbegin, In pend) |
Replace sub-strings. | |
| ustring& | replace (size_type i, size_type n, const ustring& src) |
| ustring& | replace (size_type i, size_type n, const ustring& src, size_type i2, size_type n2) |
| ustring& | replace (size_type i, size_type n, const char* src, size_type n2) |
| ustring& | replace (size_type i, size_type n, const char* src) |
| ustring& | replace (size_type i, size_type n, size_type n2, gunichar uc) |
| ustring& | replace (size_type i, size_type n, size_type n2, char c) |
| ustring& | replace (iterator pbegin, iterator pend, const ustring& src) |
| ustring& | replace (iterator pbegin, iterator pend, const char* src, size_type n) |
| ustring& | replace (iterator pbegin, iterator pend, const char* src) |
| ustring& | replace (iterator pbegin, iterator pend, size_type n, gunichar uc) |
| ustring& | replace (iterator pbegin, iterator pend, size_type n, char c) |
| template<class In> | |
| ustring& | replace (iterator pbegin, iterator pend, In pbegin2, In pend2) |
Erase sub-strings. | |
| void | clear () |
| ustring& | erase (size_type i, size_type n=npos) |
| ustring& | erase () |
| iterator | erase (iterator p) |
| iterator | erase (iterator pbegin, iterator pend) |
Compare and collate. | |
| int | compare (const ustring& rhs) const |
| int | compare (const char* rhs) const |
| int | compare (size_type i, size_type n, const ustring& rhs) const |
| int | compare (size_type i, size_type n, const ustring& rhs, size_type i2, size_type n2) const |
| int | compare (size_type i, size_type n, const char* rhs, size_type n2) const |
| int | compare (size_type i, size_type n, const char* rhs) const |
| std::string | collate_key () const |
| std::string | casefold_collate_key () const |
Extract characters and sub-strings. | |
| value_type | operator[] (size_type i) const |
| value_type | at (size_type i) const |
| ustring | substr (size_type i=0, size_type n=npos) const |
Access a sequence of characters. | |
| iterator | begin () |
| iterator | end () |
| const_iterator | begin () const |
| const_iterator | end () const |
| reverse_iterator | rbegin () |
| reverse_iterator | rend () |
| const_reverse_iterator | rbegin () const |
| const_reverse_iterator | rend () const |
Find sub-strings. | |
| size_type | find (const ustring& str, size_type i=0) const |
| size_type | find (const char* str, size_type i, size_type n) const |
| size_type | find (const char* str, size_type i=0) const |
| size_type | find (gunichar uc, size_type i=0) const |
| size_type | find (char c, size_type i=0) const |
| size_type | rfind (const ustring& str, size_type i=npos) const |
| size_type | rfind (const char* str, size_type i, size_type n) const |
| size_type | rfind (const char* str, size_type i=npos) const |
| size_type | rfind (gunichar uc, size_type i=npos) const |
| size_type | rfind (char c, size_type i=npos) const |
Match against a set of characters. | |
| size_type | find_first_of (const ustring& match, size_type i=0) const |
| size_type | find_first_of (const char* match, size_type i, size_type n) const |
| size_type | find_first_of (const char* match, size_type i=0) const |
| size_type | find_first_of (gunichar uc, size_type i=0) const |
| size_type | find_first_of (char c, size_type i=0) const |
| size_type | find_last_of (const ustring& match, size_type i=npos) const |
| size_type | find_last_of (const char* match, size_type i, size_type n) const |
| size_type | find_last_of (const char* match, size_type i=npos) const |
| size_type | find_last_of (gunichar uc, size_type i=npos) const |
| size_type | find_last_of (char c, size_type i=npos) const |
| size_type | find_first_not_of (const ustring& match, size_type i=0) const |
| size_type | find_first_not_of (const char* match, size_type i, size_type n) const |
| size_type | find_first_not_of (const char* match, size_type i=0) const |
| size_type | find_first_not_of (gunichar uc, size_type i=0) const |
| size_type | find_first_not_of (char c, size_type i=0) const |
| size_type | find_last_not_of (const ustring& match, size_type i=npos) const |
| size_type | find_last_not_of (const char* match, size_type i, size_type n) const |
| size_type | find_last_not_of (const char* match, size_type i=npos) const |
| size_type | find_last_not_of (gunichar uc, size_type i=npos) const |
| size_type | find_last_not_of (char c, size_type i=npos) const |
Retrieve the string's size. | |
| bool | empty () const |
| Returns true if the string is empty. | |
| size_type | size () const |
| Returns the number of characters in the string, not including any null-termination. | |
| size_type | length () const |
| This is the same as size(). | |
| size_type | bytes () const |
| Returns the number of bytes in the string, not including any null-termination. | |
Change the string's size. | |
| void | resize (size_type n, gunichar uc) |
| void | resize (size_type n, char c='\0') |
Control the allocated memory. | |
| size_type | capacity () const |
| size_type | max_size () const |
| void | reserve (size_type n=0) |
Get a per-byte representation of the string. | |
| operator std::string () const | |
| const std::string& | raw () const |
| const char* | data () const |
| const char* | c_str () const |
| size_type | copy (char* dest, size_type n, size_type i=0) const |
UTF-8 utilities. | |
| bool | validate () const |
| bool | validate (iterator& first_invalid) |
| bool | validate (const_iterator& first_invalid) const |
| bool | is_ascii () const |
| ustring | normalize (NormalizeMode mode=NORMALIZE_DEFAULT_COMPOSE) const |
Character case conversion. | |
| ustring | uppercase () const |
| ustring | lowercase () const |
| ustring | casefold () const |
Static Public Member Functions | |
Message formatting. | |
| template<class T1> | |
| static ustring | compose (const ustring& fmt, const T1& a1) |
| template<class T1, class T2> | |
| static ustring | compose (const ustring& fmt, const T1& a1, const T2& a2) |
| template<class T1, class T2, class T3> | |
| static ustring | compose (const ustring& fmt, const T1& a1, const T2& a2, const T3& a3) |
| template<class T1, class T2, class T3, class T4> | |
| static ustring | compose (const ustring& fmt, const T1& a1, const T2& a2, const T3& a3, const T4& a4) |
| template<class T1, class T2, class T3, class T4, class T5> | |
| static ustring | compose (const ustring& fmt, const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5) |
| template<class T1, class T2, class T3, class T4, class T5, class T6> | |
| static ustring | compose (const ustring& fmt, const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5, const T6& a6) |
| template<class T1, class T2, class T3, class T4, class T5, class T6, class T7> | |
| static ustring | compose (const ustring& fmt, const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5, const T6& a6, const T7& a7) |
| template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8> | |
| static ustring | compose (const ustring& fmt, const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5, const T6& a6, const T7& a7, const T8& a8) |
| template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9> | |
| static ustring | compose (const ustring& fmt, const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5, const T6& a6, const T7& a7, const T8& a8, const T9& a9) |
| template<class T1> | |
| static ustring | format (const T1& a1) |
| template<class T1, class T2> | |
| static ustring | format (const T1& a1, const T2& a2) |
| template<class T1, class T2, class T3> | |
| static ustring | format (const T1& a1, const T2& a2, const T3& a3) |
| template<class T1, class T2, class T3, class T4> | |
| static ustring | format (const T1& a1, const T2& a2, const T3& a3, const T4& a4) |
| template<class T1, class T2, class T3, class T4, class T5> | |
| static ustring | format (const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5) |
| template<class T1, class T2, class T3, class T4, class T5, class T6> | |
| static ustring | format (const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5, const T6& a6) |
| template<class T1, class T2, class T3, class T4, class T5, class T6, class T7> | |
| static ustring | format (const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5, const T6& a6, const T7& a7) |
| template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8> | |
| static ustring | format (const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5, const T6& a6, const T7& a7, const T8& a8) |
Static Public Attributes | |
| static GLIBMM_API const size_type | npos |
Related Functions | |
| (Note that these are not member functions.) | |
| std::istream& | operator>> (std::istream& is, Glib::ustring& utf8_string) |
| Stream input operator. | |
| std::ostream& | operator<< (std::ostream& os, const Glib::ustring& utf8_string) |
| Stream output operator. | |
| void | swap (ustring& lhs, ustring& rhs) |
| bool | operator== (const ustring& lhs, const ustring& rhs) |
| bool | operator== (const ustring& lhs, const char* rhs) |
| bool | operator== (const char* lhs, const ustring& rhs) |
| bool | operator!= (const ustring& lhs, const ustring& rhs) |
| bool | operator!= (const ustring& lhs, const char* rhs) |
| bool | operator!= (const char* lhs, const ustring& rhs) |
| bool | operator< (const ustring& lhs, const ustring& rhs) |
| bool | operator< (const ustring& lhs, const char* rhs) |
| bool | operator< (const char* lhs, const ustring& rhs) |
| bool | operator> (const ustring& lhs, const ustring& rhs) |
| bool | operator> (const ustring& lhs, const char* rhs) |
| bool | operator> (const char* lhs, const ustring& rhs) |
| bool | operator<= (const ustring& lhs, const ustring& rhs) |
| bool | operator<= (const ustring& lhs, const char* rhs) |
| bool | operator<= (const char* lhs, const ustring& rhs) |
| bool | operator>= (const ustring& lhs, const ustring& rhs) |
| bool | operator>= (const ustring& lhs, const char* rhs) |
| bool | operator>= (const char* lhs, const ustring& rhs) |
| ustring | operator+ (const ustring& lhs, const ustring& rhs) |
| ustring | operator+ (const ustring& lhs, const char* rhs) |
| ustring | operator+ (const char* lhs, const ustring& rhs) |
| ustring | operator+ (const ustring& lhs, gunichar rhs) |
| ustring | operator+ (gunichar lhs, const ustring& rhs) |
| ustring | operator+ (const ustring& lhs, char rhs) |
| ustring | operator+ (char lhs, const ustring& rhs) |
Detailed Description
Glib::ustring has much the same interface as std::string, but contains Unicode characters encoded as UTF-8.
- About UTF-8 and ASCII
- The standard character set ANSI_X3.4-1968 -- more commonly known as ASCII -- is a subset of UTF-8. So, if you want to, you can use Glib::ustring without even thinking about UTF-8.
- Whenever ASCII is mentioned in this manual, we mean the real ASCII (i.e. as defined in ANSI_X3.4-1968), which contains only 7-bit characters. Glib::ustring can not be used with ASCII-compatible extended 8-bit charsets like ISO-8859-1. It's a good idea to avoid string literals containing non-ASCII characters (e.g. German umlauts) in source code, or at least you should use UTF-8 literals.
- You can find a detailed UTF-8 and Unicode FAQ here: http://www.cl.cam.ac.uk/~mgk25/unicode.html
- Glib::ustring vs. std::string
- Glib::ustring has implicit type conversions to and from std::string. These conversions do not convert to/from the current locale (see Glib::locale_from_utf8() and Glib::locale_to_utf8() if you need that). You can always use std::string instead of Glib::ustring -- however, using std::string with multi-byte characters is quite hard. For instance,
std::string::operator[]might return a byte in the middle of a character, andstd::string::length()returns the number of bytes rather than characters. So don't do that without a good reason.
- In a perfect world the C++ Standard Library would contain a UTF-8 string class. Unfortunately, the C++ standard doesn't mention UTF-8 at all. Note that std::wstring is not a UTF-8 string class because it contains only fixed-width characters (where width could be 32, 16, or even 8 bits).
- Glib::ustring and stream input/output
- The stream I/O operators, that is operator<<() and operator>>(), perform implicit charset conversion to/from the current locale. If that's not what you intented (e.g. when writing to a configuration file that should always be UTF-8 encoded) use ustring::raw() to override this behaviour.
- If you're using std::ostringstream to build strings for display in the user interface, you must convert the result back to UTF-8 as shown below:
std::ostringstream output; output.imbue(std::locale("")); // use the user's locale for this stream output << percentage << " % done"; label->set_text(Glib::locale_to_utf8(output.str()));
- Formatted output and internationalization
- The methods ustring::compose() and ustring::format() provide a convenient and powerful alternative to string streams, as shown in the example below. Refer to the method documentation of compose() and format() for details.
using Glib::ustring; ustring message = ustring::compose("%1 is lower than 0x%2.", 12, ustring::format(std::hex, 16));
- Implementation notes
- Glib::ustring does not inherit from std::string, because std::string was intended to be a final class. For instance, it does not have a virtual destructor. Also, a HAS-A relationship is more appropriate because ustring can't just enhance the std::string interface. Rather, it has to reimplement the interface so that all operations are based on characters instead of bytes.
Member Typedef Documentation
| typedef std::string::difference_type Glib::ustring::difference_type |
| typedef gunichar Glib::ustring::value_type |
| typedef gunichar& Glib::ustring::reference |
| typedef const gunichar& Glib::ustring::const_reference |
| typedef ustring_Iterator<std::string::iterator> Glib::ustring::iterator |
| typedef ustring_Iterator<std::string::const_iterator> Glib::ustring::const_iterator |
Constructor & Destructor Documentation
| Glib::ustring::ustring | ( | ) |
Default constructor, which creates an empty string.
| Glib::ustring::~ustring | ( | ) |
| Glib::ustring::ustring | ( | const ustring& | other | ) |
| Glib::ustring::ustring | ( | const std::string & | src | ) |
Construct a ustring as a copy of another std::string.
- Parameters:
-
src A source std::stringcontaining text encoded as UTF-8.
| Glib::ustring::ustring | ( | const char * | src, | |
| size_type | n | |||
| ) |
Construct a ustring as a partial copy of a C string.
- Parameters:
-
src Source C string encoded as UTF-8. n Number of UTF-8 characters to copy.
| Glib::ustring::ustring | ( | const char * | src | ) |
Construct a ustring as a copy of a C string.
- Parameters:
-
src Source C string encoded as UTF-8.
| Glib::ustring::ustring | ( | size_type | n, | |
| gunichar | uc | |||
| ) |
Construct a ustring as multiple characters.
- Parameters:
-
n Number of characters. uc UCS-4 code point to use.
| Glib::ustring::ustring | ( | size_type | n, | |
| char | c | |||
| ) |
Construct a ustring as multiple characters.
- Parameters:
-
n Number of characters. c ASCII character to use.
template <class In>
| Glib::ustring::ustring | ( | In | pbegin, | |
| In | pend | |||
| ) | [inline] |
Construct a ustring as a copy of a range.
- Parameters:
-
pbegin Start of range. pend End of range.
Member Function Documentation
Assign the value of another string to this string.
- Parameters:
-
other A source string.
| void Glib::ustring::swap | ( | ustring& | other | ) |
| ustring& Glib::ustring::operator= | ( | const std::string & | src | ) |
| ustring& Glib::ustring::operator= | ( | const char * | src | ) |
| ustring& Glib::ustring::operator= | ( | gunichar | uc | ) |
| ustring& Glib::ustring::operator= | ( | char | c | ) |
| ustring& Glib::ustring::assign | ( | const char * | src | ) |
template <class In>
| ustring& Glib::ustring::assign | ( | In | pbegin, | |
| In | pend | |||
| ) | [inline] |
| ustring& Glib::ustring::operator+= | ( | const char * | src | ) |
| ustring& Glib::ustring::operator+= | ( | gunichar | uc | ) |
| ustring& Glib::ustring::operator+= | ( | char | c | ) |
| void Glib::ustring::push_back | ( | gunichar | uc | ) |
| void Glib::ustring::push_back | ( | char | c | ) |
