logo top
Main Page   glibmm Namespaces   Book  

Glib::ustring Class Reference

Glib::ustring has much the same interface as std::string, but contains Unicode characters encoded as UTF-8. More...

List of all members.

Public Types

typedef std::string::size_type size_type
typedef
std::string::difference_type 
difference_type
typedef gunichar value_type
typedef gunichar& reference
typedef const gunichar& const_reference
typedef ustring_Iterator
<std::string::iterator> 
iterator
typedef ustring_Iterator
<std::string::const_iterator> 
const_iterator
typedef std::reverse_iterator
<iterator
reverse_iterator
typedef std::reverse_iterator
<const_iterator
const_reverse_iterator

Public Member Functions

 ustring ()
 ~ustring ()
 ustring (const ustring& other)
ustringoperator= (const ustring& other)
void swap (ustring& other)
 ustring (const std::string& src)
 ustring (const ustring& src, size_type i, size_type n=npos)
 ustring (const char* src, size_type n)
 ustring (const char* src)
 ustring (size_type n, gunichar uc)
 ustring (size_type n, char c)
template<class In>
 ustring (In pbegin, In pend)
Assign new contents.
ustringoperator= (const std::string& src)
ustringoperator= (const char* src)
ustringoperator= (gunichar uc)
ustringoperator= (char c)
ustringassign (const ustring& src)
ustringassign (const ustring& src, size_type i, size_type n)
ustringassign (const char* src, size_type n)
ustringassign (const char* src)
ustringassign (size_type n, gunichar uc)
ustringassign (size_type n, char c)
template<class In>
ustringassign (In pbegin, In pend)
Append to the string.
ustringoperator+= (const ustring& src)
ustringoperator+= (const char* src)
ustringoperator+= (gunichar uc)
ustringoperator+= (char c)
void push_back (gunichar uc)
void push_back (char c)
ustringappend (const ustring& src)
ustringappend (const ustring& src, size_type i, size_type n)
ustringappend (const char* src, size_type n)
ustringappend (const char* src)
ustringappend (size_type n, gunichar uc)
ustringappend (size_type n, char c)
template<class In>
ustringappend (In pbegin, In pend)
Insert into the string.
ustringinsert (size_type i, const ustring& src)
ustringinsert (size_type i, const ustring& src, size_type i2, size_type n)
ustringinsert (size_type i, const char* src, size_type n)
ustringinsert (size_type i, const char* src)
ustringinsert (size_type i, size_type n, gunichar uc)
ustringinsert (size_type i, size_type n, char c)
iterator insert (iterator p, gunichar uc)
iterator insert (iterator p, char c)
void insert (iterator p, size_type n, gunichar uc)
void insert (iterator p, size_type n, char c)
template<class In>
void insert (iterator p, In pbegin, In pend)
Replace sub-strings.
ustringreplace (size_type i, size_type n, const ustring& src)
ustringreplace (size_type i, size_type n, const ustring& src, size_type i2, size_type n2)
ustringreplace (size_type i, size_type n, const char* src, size_type n2)
ustringreplace (size_type i, size_type n, const char* src)
ustringreplace (size_type i, size_type n, size_type n2, gunichar uc)
ustringreplace (size_type i, size_type n, size_type n2, char c)
ustringreplace (iterator pbegin, iterator pend, const ustring& src)
ustringreplace (iterator pbegin, iterator pend, const char* src, size_type n)
ustringreplace (iterator pbegin, iterator pend, const char* src)
ustringreplace (iterator pbegin, iterator pend, size_type n, gunichar uc)
ustringreplace (iterator pbegin, iterator pend, size_type n, char c)
template<class In>
ustringreplace (iterator pbegin, iterator pend, In pbegin2, In pend2)
Erase sub-strings.
void clear ()
ustringerase (size_type i, size_type n=npos)
ustringerase ()
iterator erase (iterator p)
iterator erase (iterator pbegin, iterator pend)
Compare and collate.
int compare (const ustring& rhs) const
int compare (const char* rhs) const
int compare (size_type i, size_type n, const ustring& rhs) const
int compare (size_type i, size_type n, const ustring& rhs, size_type i2, size_type n2) const
int compare (size_type i, size_type n, const char* rhs, size_type n2) const
int compare (size_type i, size_type n, const char* rhs) const
std::string collate_key () const
std::string casefold_collate_key () const
Extract characters and sub-strings.
value_type operator[] (size_type i) const
value_type at (size_type i) const
ustring substr (size_type i=0, size_type n=npos) const
Access a sequence of characters.
iterator begin ()
iterator end ()
const_iterator begin () const
const_iterator end () const
reverse_iterator rbegin ()
reverse_iterator rend ()
const_reverse_iterator rbegin () const
const_reverse_iterator rend () const
Find sub-strings.
size_type find (const ustring& str, size_type i=0) const
size_type find (const char* str, size_type i, size_type n) const
size_type find (const char* str, size_type i=0) const
size_type find (gunichar uc, size_type i=0) const
size_type find (char c, size_type i=0) const
size_type rfind (const ustring& str, size_type i=npos) const
size_type rfind (const char* str, size_type i, size_type n) const
size_type rfind (const char* str, size_type i=npos) const
size_type rfind (gunichar uc, size_type i=npos) const
size_type rfind (char c, size_type i=npos) const
Match against a set of characters.
size_type find_first_of (const ustring& match, size_type i=0) const
size_type find_first_of (const char* match, size_type i, size_type n) const
size_type find_first_of (const char* match, size_type i=0) const
size_type find_first_of (gunichar uc, size_type i=0) const
size_type find_first_of (char c, size_type i=0) const
size_type find_last_of (const ustring& match, size_type i=npos) const
size_type find_last_of (const char* match, size_type i, size_type n) const
size_type find_last_of (const char* match, size_type i=npos) const
size_type find_last_of (gunichar uc, size_type i=npos) const
size_type find_last_of (char c, size_type i=npos) const
size_type find_first_not_of (const ustring& match, size_type i=0) const
size_type find_first_not_of (const char* match, size_type i, size_type n) const
size_type find_first_not_of (const char* match, size_type i=0) const
size_type find_first_not_of (gunichar uc, size_type i=0) const
size_type find_first_not_of (char c, size_type i=0) const
size_type find_last_not_of (const ustring& match, size_type i=npos) const
size_type find_last_not_of (const char* match, size_type i, size_type n) const
size_type find_last_not_of (const char* match, size_type i=npos) const
size_type find_last_not_of (gunichar uc, size_type i=npos) const
size_type find_last_not_of (char c, size_type i=npos) const
Retrieve the string's size.
bool empty () const
 Returns true if the string is empty.
size_type size () const
 Returns the number of characters in the string, not including any null-termination.
size_type length () const
 This is the same as size().
size_type bytes () const
 Returns the number of bytes in the string, not including any null-termination.
Change the string's size.
void resize (size_type n, gunichar uc)
void resize (size_type n, char c='\0')
Control the allocated memory.
size_type capacity () const
size_type max_size () const
void reserve (size_type n=0)
Get a per-byte representation of the string.
 operator std::string () const
const std::stringraw () const
const char* data () const
const char* c_str () const
size_type copy (char* dest, size_type n, size_type i=0) const
UTF-8 utilities.
bool validate () const
bool validate (iterator& first_invalid)
bool validate (const_iterator& first_invalid) const
bool is_ascii () const
ustring normalize (NormalizeMode mode=NORMALIZE_DEFAULT_COMPOSE) const
Character case conversion.
ustring uppercase () const
ustring lowercase () const
ustring casefold () const

Static Public Member Functions

Message formatting.
template<class T1>
static ustring compose (const ustring& fmt, const T1& a1)
template<class T1, class T2>
static ustring compose (const ustring& fmt, const T1& a1, const T2& a2)
template<class T1, class T2, class T3>
static ustring compose (const ustring& fmt, const T1& a1, const T2& a2, const T3& a3)
template<class T1, class T2, class T3, class T4>
static ustring compose (const ustring& fmt, const T1& a1, const T2& a2, const T3& a3, const T4& a4)
template<class T1, class T2, class T3, class T4, class T5>
static ustring compose (const ustring& fmt, const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5)
template<class T1, class T2, class T3, class T4, class T5, class T6>
static ustring compose (const ustring& fmt, const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5, const T6& a6)
template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
static ustring compose (const ustring& fmt, const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5, const T6& a6, const T7& a7)
template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
static ustring compose (const ustring& fmt, const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5, const T6& a6, const T7& a7, const T8& a8)
template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
static ustring compose (const ustring& fmt, const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5, const T6& a6, const T7& a7, const T8& a8, const T9& a9)
template<class T1>
static ustring format (const T1& a1)
template<class T1, class T2>
static ustring format (const T1& a1, const T2& a2)
template<class T1, class T2, class T3>
static ustring format (const T1& a1, const T2& a2, const T3& a3)
template<class T1, class T2, class T3, class T4>
static ustring format (const T1& a1, const T2& a2, const T3& a3, const T4& a4)
template<class T1, class T2, class T3, class T4, class T5>
static ustring format (const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5)
template<class T1, class T2, class T3, class T4, class T5, class T6>
static ustring format (const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5, const T6& a6)
template<class T1, class T2, class T3, class T4, class T5, class T6, class T7>
static ustring format (const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5, const T6& a6, const T7& a7)
template<class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
static ustring format (const T1& a1, const T2& a2, const T3& a3, const T4& a4, const T5& a5, const T6& a6, const T7& a7, const T8& a8)

Static Public Attributes

static GLIBMM_API const size_type npos

Related Functions

(Note that these are not member functions.)

std::istreamoperator>> (std::istream& is, Glib::ustring& utf8_string)
 Stream input operator.
std::ostreamoperator<< (std::ostream& os, const Glib::ustring& utf8_string)
 Stream output operator.
void swap (ustring& lhs, ustring& rhs)
bool operator== (const ustring& lhs, const ustring& rhs)
bool operator== (const ustring& lhs, const char* rhs)
bool operator== (const char* lhs, const ustring& rhs)
bool operator!= (const ustring& lhs, const ustring& rhs)
bool operator!= (const ustring& lhs, const char* rhs)
bool operator!= (const char* lhs, const ustring& rhs)
bool operator< (const ustring& lhs, const ustring& rhs)
bool operator< (const ustring& lhs, const char* rhs)
bool operator< (const char* lhs, const ustring& rhs)
bool operator> (const ustring& lhs, const ustring& rhs)
bool operator> (const ustring& lhs, const char* rhs)
bool operator> (const char* lhs, const ustring& rhs)
bool operator<= (const ustring& lhs, const ustring& rhs)
bool operator<= (const ustring& lhs, const char* rhs)
bool operator<= (const char* lhs, const ustring& rhs)
bool operator>= (const ustring& lhs, const ustring& rhs)
bool operator>= (const ustring& lhs, const char* rhs)
bool operator>= (const char* lhs, const ustring& rhs)
ustring operator+ (const ustring& lhs, const ustring& rhs)
ustring operator+ (const ustring& lhs, const char* rhs)
ustring operator+ (const char* lhs, const ustring& rhs)
ustring operator+ (const ustring& lhs, gunichar rhs)
ustring operator+ (gunichar lhs, const ustring& rhs)
ustring operator+ (const ustring& lhs, char rhs)
ustring operator+ (char lhs, const ustring& rhs)


Detailed Description

Glib::ustring has much the same interface as std::string, but contains Unicode characters encoded as UTF-8.

About UTF-8 and ASCII
The standard character set ANSI_X3.4-1968 -- more commonly known as ASCII -- is a subset of UTF-8. So, if you want to, you can use Glib::ustring without even thinking about UTF-8.
Whenever ASCII is mentioned in this manual, we mean the real ASCII (i.e. as defined in ANSI_X3.4-1968), which contains only 7-bit characters. Glib::ustring can not be used with ASCII-compatible extended 8-bit charsets like ISO-8859-1. It's a good idea to avoid string literals containing non-ASCII characters (e.g. German umlauts) in source code, or at least you should use UTF-8 literals.
You can find a detailed UTF-8 and Unicode FAQ here: http://www.cl.cam.ac.uk/~mgk25/unicode.html
Glib::ustring vs. std::string
Glib::ustring has implicit type conversions to and from std::string. These conversions do not convert to/from the current locale (see Glib::locale_from_utf8() and Glib::locale_to_utf8() if you need that). You can always use std::string instead of Glib::ustring -- however, using std::string with multi-byte characters is quite hard. For instance, std::string::operator[] might return a byte in the middle of a character, and std::string::length() returns the number of bytes rather than characters. So don't do that without a good reason.
In a perfect world the C++ Standard Library would contain a UTF-8 string class. Unfortunately, the C++ standard doesn't mention UTF-8 at all. Note that std::wstring is not a UTF-8 string class because it contains only fixed-width characters (where width could be 32, 16, or even 8 bits).
Glib::ustring and stream input/output
The stream I/O operators, that is operator<<() and operator>>(), perform implicit charset conversion to/from the current locale. If that's not what you intented (e.g. when writing to a configuration file that should always be UTF-8 encoded) use ustring::raw() to override this behaviour.
If you're using std::ostringstream to build strings for display in the user interface, you must convert the result back to UTF-8 as shown below:
 std::ostringstream output;
 output.imbue(std::locale("")); // use the user's locale for this stream
 output << percentage << " % done";
 label->set_text(Glib::locale_to_utf8(output.str()));
Formatted output and internationalization
The methods ustring::compose() and ustring::format() provide a convenient and powerful alternative to string streams, as shown in the example below. Refer to the method documentation of compose() and format() for details.
 using Glib::ustring;

 ustring message = ustring::compose("%1 is lower than 0x%2.",
                                    12, ustring::format(std::hex, 16));
Implementation notes
Glib::ustring does not inherit from std::string, because std::string was intended to be a final class. For instance, it does not have a virtual destructor. Also, a HAS-A relationship is more appropriate because ustring can't just enhance the std::string interface. Rather, it has to reimplement the interface so that all operations are based on characters instead of bytes.

Member Typedef Documentation

typedef std::string::difference_type Glib::ustring::difference_type

typedef gunichar Glib::ustring::value_type

typedef gunichar& Glib::ustring::reference

typedef const gunichar& Glib::ustring::const_reference

typedef ustring_Iterator<std::string::iterator> Glib::ustring::iterator

typedef ustring_Iterator<std::string::const_iterator> Glib::ustring::const_iterator


Constructor & Destructor Documentation

Glib::ustring::ustring (  ) 

Default constructor, which creates an empty string.

Glib::ustring::~ustring (  ) 

Glib::ustring::ustring ( const ustring other  ) 

Construct a ustring as a copy of another ustring.

Parameters:
other A source string.

Glib::ustring::ustring ( const std::string src  ) 

Construct a ustring as a copy of another std::string.

Parameters:
src A source std::string containing text encoded as UTF-8.

Glib::ustring::ustring ( const ustring src,
size_type  i,
size_type  n = npos 
)

Construct a ustring as a copy of a substring.

Parameters:
src Source ustring.
i Index of first character to copy from.
n Number of UTF-8 characters to copy (defaults to copying the remainder).

Glib::ustring::ustring ( const char *  src,
size_type  n 
)

Construct a ustring as a partial copy of a C string.

Parameters:
src Source C string encoded as UTF-8.
n Number of UTF-8 characters to copy.

Glib::ustring::ustring ( const char *  src  ) 

Construct a ustring as a copy of a C string.

Parameters:
src Source C string encoded as UTF-8.

Glib::ustring::ustring ( size_type  n,
gunichar  uc 
)

Construct a ustring as multiple characters.

Parameters:
n Number of characters.
uc UCS-4 code point to use.

Glib::ustring::ustring ( size_type  n,
char  c 
)

Construct a ustring as multiple characters.

Parameters:
n Number of characters.
c ASCII character to use.

template <class In>
Glib::ustring::ustring ( In  pbegin,
In  pend 
) [inline]

Construct a ustring as a copy of a range.

Parameters:
pbegin Start of range.
pend End of range.


Member Function Documentation

ustring& Glib::ustring::operator= ( const ustring other  ) 

Assign the value of another string to this string.

Parameters:
other A source string.

void Glib::ustring::swap ( ustring other  ) 

Swap contents with another string.

Parameters:
other String to swap with.

Referenced by swap().

ustring& Glib::ustring::operator= ( const std::string src  ) 

ustring& Glib::ustring::operator= ( const char *  src  ) 

ustring& Glib::ustring::operator= ( gunichar  uc  ) 

ustring& Glib::ustring::operator= ( char  c  ) 

ustring& Glib::ustring::assign ( const ustring src  ) 

ustring& Glib::ustring::assign ( const ustring src,
size_type  i,
size_type  n 
)

ustring& Glib::ustring::assign ( const char *  src,
size_type  n 
)

ustring& Glib::ustring::assign ( const char *  src  ) 

ustring& Glib::ustring::assign ( size_type  n,
gunichar  uc 
)

ustring& Glib::ustring::assign ( size_type  n,
char  c 
)

template <class In>
ustring& Glib::ustring::assign ( In  pbegin,
In  pend 
) [inline]

ustring& Glib::ustring::operator+= ( const ustring src  ) 

ustring& Glib::ustring::operator+= ( const char *  src  ) 

ustring& Glib::ustring::operator+= ( gunichar  uc  ) 

ustring& Glib::ustring::operator+= ( char  c  ) 

void Glib::ustring::push_back ( gunichar  uc  ) 

void Glib::ustring::push_back ( char  c  ) 

ustring& Glib::ustring::append ( const ustring src  ) 

ustring& Glib::ustring::append ( const ustring src,
size_type  i,
size_type  n 
)