Tizen Native API
6.0
|
The Ustring module provides general unicode string handling information.
Required Header
#include <utils_i18n.h>
Overview
The Ustring module provides general unicode string handling information.
Sample Code 1
It converts a byte string to a unicode string and then to uppercase letters.
char str_1[64] = {0,}; i18n_uchar uchar_str_1[64] = {0,}; i18n_uchar uchar_str_2[64] = {0,}; int uchar_len = 0; i18n_uerror_code_e err_code = I18N_ERROR_NONE; strcpy(str_1, "tizen"); dlog_print(DLOG_INFO, LOG_TAG, "str_1 is %s\n", str_1); // str_1 is tizen // converts a byte string to a unicode string i18n_ustring_copy_ua_n(uchar_str_1, str_1, strlen(str_1)); // converts to uppercase letters i18n_ustring_to_upper(uchar_str_2, 64, uchar_str_1, i18n_ustring_get_length( uchar_str_1 ), "en_US", &err_code); i18n_ustring_copy_au(str_1, uchar_str_2); dlog_print(DLOG_INFO, LOG_TAG, "str_1 is %s\n", str_1); // str_1 is TIZEN
Functions | |
int32_t | i18n_ustring_get_length (const i18n_uchar *s) |
Determines the length of an array of i18n_uchar. | |
int32_t | i18n_ustring_count_char32 (const i18n_uchar *s, int32_t length) |
Counts Unicode code points in the length i18n_uchar code units of the string. | |
i18n_ubool | i18n_ustring_has_more_char32_than (const i18n_uchar *s, int32_t length, int32_t number) |
Checks if the string contains more Unicode code points than a certain number. | |
i18n_uchar * | i18n_ustring_cat (i18n_uchar *dest, const i18n_uchar *src) |
Concatenates two ustrings. | |
i18n_uchar * | i18n_ustring_cat_n (i18n_uchar *dest, const i18n_uchar *src, int32_t n) |
Concatenate two ustrings. | |
i18n_uchar * | i18n_ustring_string (const i18n_uchar *s, const i18n_uchar *sub_string) |
Finds the first occurrence of a substring in a string. | |
i18n_uchar * | i18n_ustring_find_first (const i18n_uchar *s, int32_t length, const i18n_uchar *sub_string, int32_t sub_length) |
Finds the first occurrence of a substring in a string. | |
i18n_uchar * | i18n_ustring_char (const i18n_uchar *s, i18n_uchar c) |
Finds the first occurrence of a BMP code point in a string. | |
i18n_uchar * | i18n_ustring_char32 (const i18n_uchar *s, i18n_uchar32 c) |
Finds the first occurrence of a code point in a string. | |
i18n_uchar * | i18n_ustring_r_string (const i18n_uchar *s, const i18n_uchar *sub_string) |
Finds the last occurrence of a substring in a string. | |
i18n_uchar * | i18n_ustring_find_last (const i18n_uchar *s, int32_t length, const i18n_uchar *sub_string, int32_t sub_length) |
Finds the last occurrence of a substring in a string. | |
i18n_uchar * | i18n_ustring_r_char (const i18n_uchar *s, i18n_uchar c) |
Finds the last occurrence of a BMP code point in a string. | |
i18n_uchar * | i18n_ustring_r_char32 (const i18n_uchar *s, i18n_uchar32 c) |
Finds the last occurrence of a code point in a string. | |
i18n_uchar * | i18n_ustring_pbrk (const i18n_uchar *string, const i18n_uchar *match_set) |
Locates the first occurrence in the string of any of the characters in the string matchSet. | |
int32_t | i18n_ustring_cspn (const i18n_uchar *string, const i18n_uchar *match_set) |
Returns the number of consecutive characters in string, beginning with the first, that do not occur somewhere in match_set. | |
int32_t | i18n_ustring_spn (const i18n_uchar *string, const i18n_uchar *match_set) |
Returns the number of consecutive characters in string, beginning with the first, that occur somewhere in match_set. | |
i18n_uchar * | i18n_ustring_tokenizer_r (i18n_uchar *src, const i18n_uchar *delim, i18n_uchar **save_state) |
The string tokenizer API allows an application to break a string into tokens. | |
int32_t | i18n_ustring_compare (const i18n_uchar *s1, const i18n_uchar *s2) |
Compares two Unicode strings for bitwise equality (code unit order). | |
int32_t | i18n_ustring_compare_code_point_order (const i18n_uchar *s1, const i18n_uchar *s2) |
Compare two Unicode strings in code point order. | |
int32_t | i18n_ustring_compare_binary_order (const i18n_uchar *s1, int32_t length1, const i18n_uchar *s2, int32_t length2, i18n_ubool code_point_order) |
Compare two Unicode strings (binary order). | |
int32_t | i18n_ustring_case_compare_with_length (const i18n_uchar *s1, int32_t length1, const i18n_uchar *s2, int32_t length2, uint32_t options, i18n_error_code_e *error_code) |
Compare two strings case-insensitively using full case folding. | |
int32_t | i18n_ustring_compare_n (const i18n_uchar *s1, const i18n_uchar *s2, int32_t n) |
Compare two ustrings for bitwise equality. | |
int32_t | i18n_ustring_compare_n_code_point_order (const i18n_uchar *s1, const i18n_uchar *s2, int32_t n) |
Compare two Unicode strings in code point order. | |
int32_t | i18n_ustring_case_compare (const i18n_uchar *s1, const i18n_uchar *s2, uint32_t options) |
Compare two strings case-insensitively using full case folding. | |
int32_t | i18n_ustring_case_compare_n (const i18n_uchar *s1, const i18n_uchar *s2, int32_t n, uint32_t options) |
Compare two strings case-insensitively using full case folding. | |
int32_t | i18n_ustring_mem_case_compare (const i18n_uchar *s1, const i18n_uchar *s2, int32_t length, uint32_t options) |
Compare two strings case-insensitively using full case folding. | |
i18n_uchar * | i18n_ustring_copy (i18n_uchar *dest, const i18n_uchar *src) |
Copies a ustring. Adds a NULL terminator. | |
i18n_uchar * | i18n_ustring_copy_n (i18n_uchar *dest, const i18n_uchar *src, int32_t n) |
Copies a ustring. | |
i18n_uchar * | i18n_ustring_copy_ua (i18n_uchar *dest, const char *src) |
Copies a byte string encoded in the default codepage to a ustring. | |
i18n_uchar * | i18n_ustring_copy_ua_n (i18n_uchar *dest, const char *src, int32_t n) |
Copies a byte string encoded in the default codepage to a ustring. | |
char * | i18n_ustring_copy_au (char *dest, const i18n_uchar *src) |
Copies a ustring to a byte string encoded in the default codepage. | |
char * | i18n_ustring_copy_au_n (char *dest, const i18n_uchar *src, int32_t n) |
Copies a ustring to a byte string encoded in the default codepage. | |
i18n_uchar * | i18n_ustring_mem_copy (i18n_uchar *dest, const i18n_uchar *src, int32_t count) |
Synonym for memcpy(), but with i18n_uchar characters only. | |
i18n_uchar * | i18n_ustring_mem_move (i18n_uchar *dest, const i18n_uchar *src, int32_t count) |
Synonym for memmove(), but with i18n_uchar characters only. | |
i18n_uchar * | i18n_ustring_mem_set (i18n_uchar *dest, const i18n_uchar c, int32_t count) |
Initialize count characters of dest to c. | |
int32_t | i18n_ustring_mem_compare (const i18n_uchar *buf1, const i18n_uchar *buf2, int32_t count) |
Compare the first count i18n_uchar characters of each buffer. | |
int32_t | i18n_ustring_mem_compare_code_point_order (const i18n_uchar *s1, const i18n_uchar *s2, int32_t count) |
Compare two Unicode strings in code point order. | |
i18n_uchar * | i18n_ustring_mem_char (const i18n_uchar *s, i18n_uchar c, int32_t count) |
Finds the first occurrence of a BMP code point in a string. | |
i18n_uchar * | i18n_ustring_mem_char32 (const i18n_uchar *s, i18n_uchar32 c, int32_t count) |
Finds the first occurrence of a code point in a string. | |
i18n_uchar * | i18n_ustring_mem_r_char (const i18n_uchar *s, i18n_uchar c, int32_t count) |
Finds the last occurrence of a BMP code point in a string. | |
i18n_uchar * | i18n_ustring_mem_r_char32 (const i18n_uchar *s, i18n_uchar32 c, int32_t count) |
Finds the last occurrence of a code point in a string. | |
int32_t | i18n_ustring_unescape (const char *src, i18n_uchar *dest, int32_t dest_capacity) |
Unescape a string of characters and write the resulting Unicode characters to the destination buffer. | |
i18n_uchar32 | i18n_ustring_unescape_at (i18n_ustring_unescape_char_at_cb char_at, int32_t *offset, int32_t length, void *context) |
Unescape a single sequence. | |
int32_t | i18n_ustring_to_upper (i18n_uchar *dest, int32_t dest_capacity, const i18n_uchar *src, int32_t src_len, const char *locale, i18n_error_code_e *error_code) |
Uppercases the characters in a string. | |
int32_t | i18n_ustring_to_lower (i18n_uchar *dest, int32_t dest_capacity, const i18n_uchar *src, int32_t src_len, const char *locale, i18n_error_code_e *error_code) |
Lowercase the characters in a string. | |
int32_t | i18n_ustring_to_title_new (i18n_uchar *dest, int32_t dest_capacity, const i18n_uchar *src, int32_t src_len, i18n_ubreak_iterator_h title_iter, const char *locale) |
Titlecases a string. | |
int32_t | i18n_ustring_fold_case (i18n_uchar *dest, int32_t dest_capacity, const i18n_uchar *src, int32_t src_len, uint32_t options, i18n_error_code_e *error_code) |
Case-folds the characters in a string. | |
wchar_t * | i18n_ustring_to_WCS (wchar_t *dest, int32_t dest_capacity, int32_t *dest_len, const i18n_uchar *src, int32_t src_len, i18n_error_code_e *error_code) |
Convert a UTF-16 string to a wchar_t string. | |
i18n_uchar * | i18n_ustring_from_WCS (i18n_uchar *dest, int32_t dest_capacity, int32_t *dest_len, const wchar_t *src, int32_t src_len, i18n_error_code_e *error_code) |
Convert a wchar_t string to UTF-16. | |
char * | i18n_ustring_to_UTF8 (char *dest, int32_t dest_capacity, int32_t *dest_len, const i18n_uchar *src, int32_t src_len, i18n_error_code_e *error_code) |
Converts a UTF-16 string to UTF-8. | |
i18n_uchar * | i18n_ustring_from_UTF8 (i18n_uchar *dest, int32_t dest_capacity, int32_t *dest_len, const char *src, int32_t src_len, i18n_error_code_e *error_code) |
Converts a UTF-8 string to UTF-16. | |
char * | i18n_ustring_to_UTF8_with_sub (char *dest, int32_t dest_capacity, int32_t *dest_len, const i18n_uchar *src, int32_t src_len, i18n_uchar32 sub_char, int32_t *num_substitutions, i18n_error_code_e *error_code) |
Convert a UTF-16 string to UTF-8. Same as i18n_ustring_to_UTF8() except for the additional sub_char which is output for illegal input sequences, instead of stopping with the I18N_ERROR_INVALID_CHAR_FOUND error code. | |
i18n_uchar * | i18n_ustring_from_UTF8_with_sub (i18n_uchar *dest, int32_t dest_capacity, int32_t *dest_len, const char *src, int32_t src_len, i18n_uchar32 sub_char, int32_t *num_substitutions, i18n_error_code_e *error_code) |
Convert a UTF-8 string to UTF-16. | |
i18n_uchar * | i18n_ustring_from_UTF8_lenient (i18n_uchar *dest, int32_t dest_capacity, int32_t *dest_len, const char *src, int32_t src_len, i18n_error_code_e *error_code) |
Convert a UTF-8 string to UTF-16. | |
i18n_uchar32 * | i18n_ustring_to_UTF32 (i18n_uchar32 *dest, int32_t dest_capacity, int32_t *dest_len, const i18n_uchar *src, int32_t src_len, i18n_error_code_e *error_code) |
Convert a UTF-16 string to UTF-32. | |
i18n_uchar * | i18n_ustring_from_UTF32 (i18n_uchar *dest, int32_t dest_capacity, int32_t *dest_len, const i18n_uchar32 *src, int32_t src_len, i18n_error_code_e *error_code) |
Convert a UTF-32 string to UTF-16. | |
i18n_uchar32 * | i18n_ustring_to_UTF32_with_sub (i18n_uchar32 *dest, int32_t dest_capacity, int32_t *dest_len, const i18n_uchar *src, int32_t src_len, i18n_uchar32 sub_char, int32_t *num_substitutions, i18n_error_code_e *error_code) |
Convert a UTF-16 string to UTF-32. | |
i18n_uchar * | i18n_ustring_from_UTF32_with_sub (i18n_uchar *dest, int32_t dest_capacity, int32_t *dest_len, const i18n_uchar32 *src, int32_t src_len, i18n_uchar32 sub_char, int32_t *num_substitutions, i18n_error_code_e *error_code) |
Convert a UTF-32 string to UTF-16. Same as i18n_ustring_from_UTF32() except for the additional sub_char which is output for illegal input sequences, instead of stopping with the I18N_ERROR_INVALID_CHAR_FOUND error code. | |
Typedefs | |
typedef i18n_uchar(* | i18n_ustring_unescape_char_at_cb )(int32_t offset, void *context) |
Callback function for i18n_ustring_unescape_at() that returns a character of the source text given an offset and a context pointer. The context pointer will be whatever is passed into i18n_ustring_unescape_at(). | |
Defines | |
#define | I18N_USTRING_U_FOLD_CASE_DEFAULT 0 |
Option value for case folding: use default mappings defined in CaseFolding.txt. | |
#define | I18N_USTRING_U_COMPARE_CODE_POINT_ORDER 0x8000 |
Option bit i18n_ustring_case_compare_with_length(), i18n_ustring_case_compare(), etc: Compare strings in code point order instead of code unit order. | |
#define | I18N_USTRING_U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 |
Option value for case folding: Use the modified set of mappings provided in CaseFolding.txt to handle dotted I and dotless i appropriately for Turkic languages (tr, az). Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that are to be included for default mappings and excluded for the Turkic-specific mappings. Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that are to be excluded for default mappings and included for the Turkic-specific mappings. |
Define Documentation
#define I18N_USTRING_U_COMPARE_CODE_POINT_ORDER 0x8000 |
Option bit i18n_ustring_case_compare_with_length(), i18n_ustring_case_compare(), etc: Compare strings in code point order instead of code unit order.
- Since :
- 2.3.1
#define I18N_USTRING_U_FOLD_CASE_DEFAULT 0 |
Option value for case folding: use default mappings defined in CaseFolding.txt.
- Since :
- 2.3.1
Option value for case folding:
Use the modified set of mappings provided in CaseFolding.txt to handle dotted I and dotless i appropriately for Turkic languages (tr, az).
Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that are to be included for default mappings and excluded for the Turkic-specific mappings.
Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that are to be excluded for default mappings and included for the Turkic-specific mappings.
- Since :
- 2.3.1
Typedef Documentation
typedef i18n_uchar(* i18n_ustring_unescape_char_at_cb)(int32_t offset, void *context) |
Callback function for i18n_ustring_unescape_at() that returns a character of the source text given an offset and a context pointer.
The context pointer will be whatever is passed into i18n_ustring_unescape_at().
- Since :
- 2.3.1
- Parameters:
-
[in] offset pointer to the offset that will be passed to i18n_ustring_unescape_at(). [in] context an opaque pointer passed directly into i18n_ustring_unescape_at()
- Return values:
-
character the character represented by the escape sequence at offset
- See also:
- i18n_ustring_unescape_at()
Function Documentation
int32_t i18n_ustring_case_compare | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2, | ||
uint32_t | options | ||
) |
Compare two strings case-insensitively using full case folding.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s1 A string to compare. [in] s2 A string to compare. [in] options bit set of options:
- I18N_USTRING_U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding.
- I18N_USTRING_U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details).
- I18N_USTRING_U_FOLD_CASE_EXCLUDE_SPECIAL_I
- Returns:
- A negative, zero, or positive integer indicating the comparison result.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_case_compare_n | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2, | ||
int32_t | n, | ||
uint32_t | options | ||
) |
Compare two strings case-insensitively using full case folding.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s1 A string to compare. [in] s2 A string to compare. [in] n The maximum number of characters each string to case-fold and then compare. [in] options A bit set of options:
- I18N_USTRING_U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding.
- I18N_USTRING_U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details).
- I18N_USTRING_U_FOLD_CASE_EXCLUDE_SPECIAL_I
- Returns:
- A negative, zero, or positive integer indicating the comparison result.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_case_compare_with_length | ( | const i18n_uchar * | s1, |
int32_t | length1, | ||
const i18n_uchar * | s2, | ||
int32_t | length2, | ||
uint32_t | options, | ||
i18n_error_code_e * | error_code | ||
) |
Compare two strings case-insensitively using full case folding.
The comparison can be done in UTF-16 code unit order or in code point order. They differ only when comparing supplementary code points (U+10000..U+10ffff) to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). In code unit order, high BMP code points sort after supplementary code points because they are stored as pairs of surrogates which are at U+d800..U+dfff.
This functions works with strings of different explicitly specified lengths unlike the ANSI C-like i18n_ustring_compare() and i18n_ustring_mem_compare() etc. NULL-terminated strings are possible with length arguments of -1.
- Since :
- 2.3.1
- Parameters:
-
[in] s1 First source string. [in] length1 Length of first source string, or -1
if NULL-terminated.[in] s2 Second source string. [in] length2 Length of second source string, or -1
if NULL-terminated.[in] options A bit set of options:
- I18N_USTRING_U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding.
- I18N_USTRING_U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see i18n_ustring_compare_code_pointer_order() for details).
- I18N_USTRING_U_FOLD_CASE_EXCLUDE_SPECIAL_I
[out] error_code Must be a valid pointer to an error code value, which must not indicate a failure before the function call. - I18N_USTRING_U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding.
- Returns:
- <0 or 0 or >0 as usual for string comparisons
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_cat | ( | i18n_uchar * | dest, |
const i18n_uchar * | src | ||
) |
Concatenates two ustrings.
Appends a copy of src, including the NULL terminator, to dest. The initial copied character from src overwrites the NULL terminator in dest.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[out] dest The destination string. [in] src The source string.
- Returns:
- A pointer to dest.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_cat_n | ( | i18n_uchar * | dest, |
const i18n_uchar * | src, | ||
int32_t | n | ||
) |
Concatenate two ustrings.
Appends a copy of src, including the NULL terminator, to dest. The initial copied character from src overwrites the NULL terminator in dest.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[out] dest The destination string. [in] src The source string. [in] n The maximum number of characters to append; no-op if <=0.
- Returns:
- A pointer to dest.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_char | ( | const i18n_uchar * | s, |
i18n_uchar | c | ||
) |
Finds the first occurrence of a BMP code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s The string to search (NULL-terminated). [in] c The BMP code point to find.
- Returns:
- A pointer to the first occurrence of c in s or
NULL
if c is not in s.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_char32 | ( | const i18n_uchar * | s, |
i18n_uchar32 | c | ||
) |
Finds the first occurrence of a code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s The string to search (NULL-terminated). [in] c The code point to find.
- Returns:
- A pointer to the first occurrence of c in s or
NULL
if c is not in s.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_compare | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2 | ||
) |
Compares two Unicode strings for bitwise equality (code unit order).
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s1 A string to compare. [in] s2 A string to compare.
- Returns:
- 0 if s1 and s2 are bitwise equal; a negative value if s1 is bitwise less than s2; a positive value if s1 is bitwise greater than s2.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_compare_binary_order | ( | const i18n_uchar * | s1, |
int32_t | length1, | ||
const i18n_uchar * | s2, | ||
int32_t | length2, | ||
i18n_ubool | code_point_order | ||
) |
Compare two Unicode strings (binary order).
The comparison can be done in code unit order or in code point order. They differ only in UTF-16 when comparing supplementary code points (U+10000..U+10ffff) to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). In code unit order, high BMP code points sort after supplementary code points because they are stored as pairs of surrogates which are at U+d800..U+dfff.
This functions works with strings of different explicitly specified lengths unlike the ANSI C-like i18n_ustring_compare() and i18n_ustring_mem_compare() etc. NULL-terminated strings are possible with length arguments of -1.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s1 First source string. [in] length1 Length of first source string, or -1
if NULL-terminated.[in] s2 Second source string. [in] length2 Length of second source string, or -1
if NULL-terminated.[in] code_point_order Choose between code unit order (false) and code point order (true).
- Returns:
- < 0, 0 or > 0 as usual for string comparisons
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_compare_code_point_order | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2 | ||
) |
Compare two Unicode strings in code point order.
See i18n_ustring_compare() for details.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s1 A string to compare. [in] s2 A string to compare.
- Returns:
- a negative/zero/positive integer corresponding to whether the first string is less than/equal to/greater than the second one in code point order
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_compare_n | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2, | ||
int32_t | n | ||
) |
Compare two ustrings for bitwise equality.
Compares at most n characters.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s1 A string to compare (can be NULL/invalid if n<=0). [in] s2 A string to compare (can be NULL/invalid if n<=0). [in] n The maximum number of characters to compare; always returns 0 if n<=0.
- Returns:
- 0 if s1 and s2 are bitwise equal; a negative value if s1 is bitwise less than s2; a positive value if s1 is bitwise greater than s2.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_compare_n_code_point_order | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2, | ||
int32_t | n | ||
) |
Compare two Unicode strings in code point order.
This is different in UTF-16 from i18n_ustring_compare_n() if supplementary characters are present. For details, see i18n_ustring_compare_binary_order().
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s1 A string to compare. [in] s2 A string to compare. [in] n The maximum number of characters to compare.
- Returns:
- a negative/zero/positive integer corresponding to whether the first string is less than/equal to/greater than the second one in code point order
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_copy | ( | i18n_uchar * | dest, |
const i18n_uchar * | src | ||
) |
Copies a ustring. Adds a NULL terminator.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[out] dest The destination string [in] src The source string
- Returns:
- A pointer to dest.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
char* i18n_ustring_copy_au | ( | char * | dest, |
const i18n_uchar * | src | ||
) |
Copies a ustring to a byte string encoded in the default codepage.
Adds a NULL terminator. Performs an i18n_uchar to host byte conversion.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[out] dest The destination string [in] src The source string
- Returns:
- A pointer to dest.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
char* i18n_ustring_copy_au_n | ( | char * | dest, |
const i18n_uchar * | src, | ||
int32_t | n | ||
) |
Copies a ustring to a byte string encoded in the default codepage.
Copies at most n characters. The result will be NULL terminated if the length of src is less than n. Performs an i18n_uchar to host byte conversion.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[out] dest The destination string [in] src The source string [in] n The maximum number of characters to copy
- Returns:
- A pointer to dest.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_copy_n | ( | i18n_uchar * | dest, |
const i18n_uchar * | src, | ||
int32_t | n | ||
) |
Copies a ustring.
Copies at most n characters. The result will be NULL terminated if the length of src is less than n.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[out] dest The destination string [in] src The source string [in] n The maximum number of characters to copy
- Returns:
- A pointer to dest.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_copy_ua | ( | i18n_uchar * | dest, |
const char * | src | ||
) |
Copies a byte string encoded in the default codepage to a ustring.
Adds a NULL terminator. Performs a host byte to i18n_uchar conversion.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[out] dest The destination string [in] src The source string
- Returns:
- A pointer to dest.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_copy_ua_n | ( | i18n_uchar * | dest, |
const char * | src, | ||
int32_t | n | ||
) |
Copies a byte string encoded in the default codepage to a ustring.
Copies at most n characters. The result will be NULL terminated if the length of src is less than n. Performs a host byte to i18n_uchar conversion.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[out] dest The destination string [in] src The source string [in] n The maximum number of characters to copy
- Returns:
- A pointer to dest.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_count_char32 | ( | const i18n_uchar * | s, |
int32_t | length | ||
) |
Counts Unicode code points in the length i18n_uchar code units of the string.
A code point may occupy either one or two i18n_uchar code units. Counting code points involves reading all code units.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s The input string. [in] length The number of i18n_uchar code units to be checked, or -1
to count all code points before the first NULL (U+0000).
- Returns:
- The number of code points in the specified code units.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_cspn | ( | const i18n_uchar * | string, |
const i18n_uchar * | match_set | ||
) |
Returns the number of consecutive characters in string, beginning with the first, that do not occur somewhere in match_set.
Works just like C's strcspn but with Unicode.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] string The string in which to search, NULL-terminated. [in] match_set A NULL-terminated string defining a set of code points for which to search in the text string.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
- Returns:
- The number of initial characters in string that do not occur in match_set.
- See also:
- i18n_ustring_spn()
i18n_uchar* i18n_ustring_find_first | ( | const i18n_uchar * | s, |
int32_t | length, | ||
const i18n_uchar * | sub_string, | ||
int32_t | sub_length | ||
) |
Finds the first occurrence of a substring in a string.
The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s The string to search (NULL-terminated). [in] length The length of s (number of i18n_uchar characters), or -1
if it is NULL-terminated.[in] sub_string The substring to find (NULL-terminated). [in] sub_length The length of substring (number of i18n_uchar characters), or -1
if it is NULL-terminated.
- Returns:
- A pointer to the first occurrence of sub_string in s, or s itself if the sub_string is empty, or
NULL
if sub_string is not in s.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_find_last | ( | const i18n_uchar * | s, |
int32_t | length, | ||
const i18n_uchar * | sub_string, | ||
int32_t | sub_length | ||
) |
Finds the last occurrence of a substring in a string.
The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s The string to search. [in] length The length of s (number of i18n_uchar), or -1
if it is NULL-terminated.[in] sub_string The sub_string to find (NULL-terminated). [in] sub_length The length of sub_string (number of i18n_uchar), or -1
if it is NULL-terminated.
- Returns:
- A pointer to the last occurrence of sub_string in s, or s itself if the substring is empty, or
NULL
if sub_string is not in s.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_fold_case | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
uint32_t | options, | ||
i18n_error_code_e * | error_code | ||
) |
Case-folds the characters in a string.
Case-folding is locale-independent and not context-sensitive, but there is an option for whether to include or exclude mappings for dotted I and dotless i.
The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.
- Since :
- 2.3.1
- Parameters:
-
[out] dest A buffer for the result string
The result will be zero-terminated if the buffer is large enough.[in] dest_capacity The size of the buffer (number of i18n_uchar characters)
If it is0
, then dest may beNULL
and the function will only return the length of the result without writing any of the result string.[in] src The original string [in] src_len The length of the original string.
If-1
, then src must be zero-terminated.[in] options Either I18N_USTRING_U_FOLD_CASE_DEFAULT or I18N_USTRING_U_FOLD_CASE_EXCLUDE_SPECIAL_I [out] error_code Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
- Returns:
- The length of the result string. It may be greater than destCapacity. In that case, only some of the result was written to the destination buffer.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_from_UTF32 | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const i18n_uchar32 * | src, | ||
int32_t | src_len, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-32 string to UTF-16.
If the input string is not well-formed, then the I18N_ERROR_INVALID_CHAR_FOUND error code is set.
- Since :
- 2.3.1
- Parameters:
-
[out] dest A buffer for the result string.
The result will be zero-terminated if the buffer is large enough.[in] dest_capacity The size of the buffer (number of i18n_uchar characters)
If it is0
, then dest may beNULL
and the function will only return the length of the result without writing any of the result string (pre-flighting).[out] dest_len A pointer to receive the number of units written to the destination.
If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.[in] src The original source string [in] src_len The length of the original string.
If-1
, then src must be zero-terminated.[out] error_code Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
- Returns:
- The pointer to destination buffer.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_from_UTF32_with_sub | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const i18n_uchar32 * | src, | ||
int32_t | src_len, | ||
i18n_uchar32 | sub_char, | ||
int32_t * | num_substitutions, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-32 string to UTF-16. Same as i18n_ustring_from_UTF32() except for the additional sub_char which is output for illegal input sequences, instead of stopping with the I18N_ERROR_INVALID_CHAR_FOUND error code.
- Since :
- 2.3.1
- Parameters:
-
[out] dest A buffer for the result string.
The result will be zero-terminated if the buffer is large enough.[in] dest_capacity The size of the buffer (number of i18n_chars)
If it is0
, then dest may beNULL
and the function will only return the length of the result without writing any of the result string (pre-flighting).[out] dest_len A pointer to receive the number of units written to the destination.
If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.[in] src The original source string [in] src_len The length of the original string.
If-1
, then src must be zero-terminated.[in] sub_char The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with I18N_ERROR_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER". [out] num_substitutions Output parameter receiving the number of substitutions if sub_char>=0. Set to 0 if no substitutions occur or sub_char<0. num_substitutions can be NULL. [out] error_code Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
- Returns:
- The pointer to destination buffer.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_from_UTF8 | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const char * | src, | ||
int32_t | src_len, | ||
i18n_error_code_e * | error_code | ||
) |
Converts a UTF-8 string to UTF-16.
If the input string is not well-formed, then the I18N_ERROR_INVALID_CHAR_FOUND error code is set.
- Since :
- 2.3.1
- Parameters:
-
[out] dest A buffer for the result string.
The result will be zero-terminated if the buffer is large enough.[in] dest_capacity The size of the buffer (number of i18n_uchar characters)
If it is0
, then dest may beNULL
and the function will only return the length of the result without writing any of the result string (pre-flighting).[out] dest_len A pointer to receive the number of units written to the destination.
If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.[in] src The original source string [in] src_len The length of the original string.
If-1
, then src must be zero-terminated.[out] error_code Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
- Returns:
- The pointer to destination buffer.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_from_UTF8_lenient | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const char * | src, | ||
int32_t | src_len, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-8 string to UTF-16.
Same as i18n_ustring_from_UTF8() except that this function is designed to be very fast, which it achieves by being lenient about malformed UTF-8 sequences. This function is intended for use in environments where UTF-8 text is expected to be well-formed.
Its semantics are:
- Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
- The function will not read beyond the input string, nor write beyond the dest_capacity.
- Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not be well-formed UTF-16. The function will resynchronize to valid code point boundaries within a small number of code points after an illegal sequence.
- Non-shortest forms are not detected and will result in "spoofing" output.
For further performance improvement, if src_len is given (>=0), then it must be dest_capacity>=src_len.
There is no inverse i18n_ustring_to_UTF8_lenient() function because there is practically no performance gain from not checking that a UTF-16 string is well-formed.- Since :
- 2.3.1
- Parameters:
-
[out] dest A buffer for the result string.
The result will be zero-terminated if the buffer is large enough.[in] dest_capacity The size of the buffer (number of i18n_uchar characters)
If it is0
, then dest may beNULL
and the function will only return the length of the result without writing any of the result string (pre-flighting). Unlike for other I18N functions, if src_len>=0 then it must be dest_capacity>=src_len.[out] dest_len A pointer to receive the number of units written to the destination.
If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow. Unlike for other I18N functions, if src_len>=0 but dest_capacity<src_len, then *dest_len will be set to src_len (and I18N_U_BUFFER_OVERFLOW_ERROR will be set) regardless of the actual result length.[in] src The original source string [in] src_len The length of the original string.
If-1
, then src must be zero-terminated.[out] error_code Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
- Returns:
- The pointer to destination buffer.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_from_UTF8_with_sub | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const char * | src, | ||
int32_t | src_len, | ||
i18n_uchar32 | sub_char, | ||
int32_t * | num_substitutions, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-8 string to UTF-16.
- Since :
- 2.3.1
- Parameters:
-
[out] dest A buffer for the result string.
The result will be zero-terminated if the buffer is large enough.[in] dest_capacity The size of the buffer (number of i18n_uchar characters)
If it is0
, then dest may beNULL
and the function will only return the length of the result without writing any of the result string (pre-flighting).[out] dest_len A pointer to receive the number of units written to the destination.
If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.[in] src The original source string [in] src_len The length of the original string.
If-1
, then src must be zero-terminated.[in] sub_char The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with I18N_ERROR_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER". [out] num_substitutions Output parameter receiving the number of substitutions if sub_char>=0. Set to 0 if no substitutions occur or sub_char<0. num_substitutions can be NULL. [out] error_code Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
- Returns:
- The pointer to destination buffer.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_from_WCS | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const wchar_t * | src, | ||
int32_t | src_len, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a wchar_t string to UTF-16.
If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then this function simply calls the fast, dedicated function for that. Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed.
- Since :
- 2.3.1
- Parameters:
-
[out] dest A buffer for the result string. The result will be zero-terminated if the buffer is large enough. [in] dest_capacity The size of the buffer (number of i18n_uchar characters).
If it is0
, then dest may beNULL
and the function will only return the length of the result without writing any of the result string (pre-flighting).[out] dest_len A pointer to receive the number of units written to the destination.
If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.[in] src The original source string. [in] src_len The length of the original string.
If-1
, then src must be zero-terminated.[out] error_code Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
- Returns:
- The pointer to destination buffer.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_get_length | ( | const i18n_uchar * | s | ) |
Determines the length of an array of i18n_uchar.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s The array of i18n_uchar characters, NULL
(U+0000) terminated.
- Returns:
- The number of i18n_uchar characters in
chars
, minus the terminator
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_ubool i18n_ustring_has_more_char32_than | ( | const i18n_uchar * | s, |
int32_t | length, | ||
int32_t | number | ||
) |
Checks if the string contains more Unicode code points than a certain number.
This is more efficient than counting all code points in the entire string and comparing that number with a threshold. This function may not need to scan the string at all if the length is known (not -1
for NULL-termination) and falls within a certain range, and never needs to count more than 'number+1' code points. Logically equivalent to ( i18n_ustring_count_char32 (s, length, &number_of_code_points); number_of_code_points > number ). A Unicode code point may occupy either one or two i18n_uchar code units.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s The input string. [in] length The length of the string, or -1
if it is NULL-terminated.[in] number The number of code points in the string is compared against the number parameter.
- Returns:
- Boolean value for whether the string contains more Unicode code points than number. Same as ( i18n_ustring_count_char32 (s, length, &number_of_code_points); number_of_code_points > number).
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_mem_case_compare | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2, | ||
int32_t | length, | ||
uint32_t | options | ||
) |
Compare two strings case-insensitively using full case folding.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s1 A string to compare. [in] s2 A string to compare. [in] length The number of characters in each string to case-fold and then compare. [in] options A bit set of options:
- I18N_USTRING_U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding.
- I18N_USTRING_U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details).
- I18N_USTRING_U_FOLD_CASE_EXCLUDE_SPECIAL_I
- Returns:
- A negative, zero, or positive integer indicating the comparison result.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_mem_char | ( | const i18n_uchar * | s, |
i18n_uchar | c, | ||
int32_t | count | ||
) |
Finds the first occurrence of a BMP code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s The string to search (contains count i18n_uchar characters). [in] c The BMP code point to find. [in] count The length of the string.
- Returns:
- A pointer to the first occurrence of c in s or
NULL
if c is not in s.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_mem_char32 | ( | const i18n_uchar * | s, |
i18n_uchar32 | c, | ||
int32_t | count | ||
) |
Finds the first occurrence of a code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s The string to search (contains count i18n_uchar characters). [in] c The code point to find. [in] count The length of the string.
- Returns:
- A pointer to the first occurrence of c in s or
NULL
if c is not in s.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_mem_compare | ( | const i18n_uchar * | buf1, |
const i18n_uchar * | buf2, | ||
int32_t | count | ||
) |
Compare the first count i18n_uchar characters of each buffer.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] buf1 The first string to compare. [in] buf2 The second string to compare. [in] count The maximum number of i18n_uchar characters to compare.
- Returns:
- When buf1 < buf2, a negative number is returned. When buf1 == buf2, 0 is returned. When buf1 > buf2, a positive number is returned.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_mem_compare_code_point_order | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2, | ||
int32_t | count | ||
) |
Compare two Unicode strings in code point order.
This is different in UTF-16 from i18n_ustring_mem_compare() if supplementary characters are present. For details, see i18n_ustring_compare_binary_order().
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s1 A string to compare. [in] s2 A string to compare. [in] count The maximum number of characters to compare.
- Returns:
- a negative/zero/positive integer corresponding to whether the first string is less than/equal to/greater than the second one in code point order
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_mem_copy | ( | i18n_uchar * | dest, |
const i18n_uchar * | src, | ||
int32_t | count | ||
) |
Synonym for memcpy(), but with i18n_uchar characters only.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[out] dest The destination string [in] src The source string (can be NULL/invalid if count<=0) [in] count The number of characters to copy; no-op if <=0
- Returns:
- A pointer to dest
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_mem_move | ( | i18n_uchar * | dest, |
const i18n_uchar * | src, | ||
int32_t | count | ||
) |
Synonym for memmove(), but with i18n_uchar characters only.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[out] dest The destination string [in] src The source string (can be NULL/invalid if count<=0) [in] count The number of characters to copy; no-op if <=0
- Returns:
- A pointer to dest
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_mem_r_char | ( | const i18n_uchar * | s, |
i18n_uchar | c, | ||
int32_t | count | ||
) |
Finds the last occurrence of a BMP code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s The string to search (contains count i18n_uchar characters). [in] c The BMP code point to find. [in] count The length of the string.
- Returns:
- A pointer to the last occurrence of c in s or
NULL
if c is not in s.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_mem_r_char32 | ( | const i18n_uchar * | s, |
i18n_uchar32 | c, | ||
int32_t | count | ||
) |
Finds the last occurrence of a code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s The string to search (contains count i18n_uchar characters). [in] c The code point to find. [in] count The length of the string.
- Returns:
- A pointer to the last occurrence of c in s or
NULL
if c is not in s.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_mem_set | ( | i18n_uchar * | dest, |
const i18n_uchar | c, | ||
int32_t | count | ||
) |
Initialize count characters of dest to c.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[out] dest The destination string [in] c The character to initialize the string. [in] count The maximum number of characters to set.
- Returns:
- A pointer to dest.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_pbrk | ( | const i18n_uchar * | string, |
const i18n_uchar * | match_set | ||
) |
Locates the first occurrence in the string of any of the characters in the string matchSet.
Works just like C's strpbrk but with Unicode.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] string The string in which to search, NULL-terminated. [in] match_set A NULL-terminated string defining a set of code points for which to search in the text string.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
- Returns:
- A pointer to the character in string that matches one of the characters in match_set, or NULL if no such character is found.
i18n_uchar* i18n_ustring_r_char | ( | const i18n_uchar * | s, |
i18n_uchar | c | ||
) |
Finds the last occurrence of a BMP code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s The string to search (NULL-terminated). [in] c The BMP code point to find.
- Returns:
- A pointer to the last occurrence of c in s or
NULL
if c is not in s.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_r_char32 | ( | const i18n_uchar * | s, |
i18n_uchar32 | c | ||
) |
Finds the last occurrence of a code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s The string to search (NULL-terminated). [in] c The code point to find.
- Returns:
- A pointer to the last occurrence of c in s or
NULL
if c is not in s.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_r_string | ( | const i18n_uchar * | s, |
const i18n_uchar * | sub_string | ||
) |
Finds the last occurrence of a substring in a string.
The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s The string to search (NULL-terminated). [in] sub_string The substring to find (NULL-terminated).
- Returns:
- A pointer to the last occurrence of substring in s, or s itself if the sub_string is empty, or
NULL
if sub_string is not in s.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_spn | ( | const i18n_uchar * | string, |
const i18n_uchar * | match_set | ||
) |
Returns the number of consecutive characters in string, beginning with the first, that occur somewhere in match_set.
Works just like C's strspn but with Unicode.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] string The string in which to search, NULL-terminated. [in] match_set A NULL-terminated string defining a set of code points for which to search in the text string.
- Returns:
- The number of initial characters in string that do occur in match_set.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
- See also:
- i18n_ustring_cspn()
i18n_uchar* i18n_ustring_string | ( | const i18n_uchar * | s, |
const i18n_uchar * | sub_string | ||
) |
Finds the first occurrence of a substring in a string.
The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] s The string to search (NULL-terminated). [in] sub_string The substring to find (NULL-terminated).
- Returns:
- A pointer to the first occurrence of sub_string in s, or s itself if the sub_string is empty, or
NULL
if sub_string is not in s.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_to_lower | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
const char * | locale, | ||
i18n_error_code_e * | error_code | ||
) |
Lowercase the characters in a string.
Casing is locale-dependent and context-sensitive. The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap. The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.
- Since :
- 2.3.1
- Parameters:
-
[out] dest A buffer for the result string. The result will be zero-terminated if the buffer is large enough. [in] dest_capacity The size of the buffer (number of i18n_uchar characters)
If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string.[in] src The original string [in] src_len The length of the original string.
If-1
, then src must be zero-terminated.[in] locale The locale to consider, or "" for the root locale or NULL
for the default locale.[out] error_code Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
- Returns:
- The length of the result string. It may be greater than destCapacity. In that case, only some of the result was written to the destination buffer.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_to_title_new | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
i18n_ubreak_iterator_h | title_iter, | ||
const char * | locale | ||
) |
Titlecases a string.
Casing is locale-dependent and context-sensitive. Titlecasing uses a break iterator to find the first characters of words that are to be titlecased. It titlecases those characters and lowercases all others.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section and in i18n_error_code_e description.
The titlecase break iterator can be provided to customize arbitrary styles, using rules and dictionaries beyond the standard iterators. It may be more efficient to always provide an iterator to avoid opening and closing one for each string. The standard titlecase iterator for the root locale implements the algorithm of Unicode TR 21.
The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.
- Since :
- 2.3.1
- Parameters:
-
[out] dest A buffer for the result string.
The result will be zero-terminated if the buffer is large enough.[in] dest_capacity The size of the buffer (number of i18n_uchar characters.
If it is0
, then dest may beNULL
and the function will only return the length of the result without writing any of the result string.[in] src The original string [in] src_len The length of the original string.
If-1
, then src must be zero-terminated.[in] title_iter A break iterator to find the first characters of words that are to be titlecased.
If none are provided (NULL
), then a standard titlecase break iterator is opened.[in] locale The locale to consider, or "" for the root locale or NULL
for the default locale.
- Returns:
- The length of the result string. It may be greater than dest_capacity. In that case, only some of the result were written to the destination buffer.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_to_upper | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
const char * | locale, | ||
i18n_error_code_e * | error_code | ||
) |
Uppercases the characters in a string.
Casing is locale-dependent and context-sensitive. The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.
- Since :
- 2.3.1
- Parameters:
-
[out] dest A buffer for the result string
The result will be zero-terminated if the buffer is large enough.[in] dest_capacity The size of the buffer (number of i18n_uchar characters)
If it is0
, then dest may beNULL
and the function will only return the length of the result without writing any of the result string.[in] src The original string [in] src_len The length of the original string
If-1
, then src must be zero-terminated.[in] locale The locale to consider, or "" for the root locale or NULL
for the default locale.[out] error_code Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
- Returns:
- The length of the result string. It may be greater than destCapacity. In that case, only some of the result was written to the destination buffer.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar32* i18n_ustring_to_UTF32 | ( | i18n_uchar32 * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-16 string to UTF-32.
If the input string is not well-formed, then the I18N_ERROR_INVALID_CHAR_FOUND error code is set.
- Since :
- 2.3.1
- Parameters:
-
[out] dest A buffer for the result string.
The result will be zero-terminated if the buffer is large enough.[in] dest_capacity The size of the buffer (number of i18n_uchar32 characters)
If it is0
, then dest may beNULL
and the function will only return the length of the result without writing any of the result string (pre-flighting).[out] dest_len A pointer to receive the number of units written to the destination.
If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.[in] src The original source string [in] src_len The length of the original string.
If-1
, then src must be zero-terminated.[out] error_code Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
- Returns:
- The pointer to destination buffer.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar32* i18n_ustring_to_UTF32_with_sub | ( | i18n_uchar32 * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
i18n_uchar32 | sub_char, | ||
int32_t * | num_substitutions, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-16 string to UTF-32.
Same as i18n_ustring_to_UTF32() except for the additional sub_char which is output for illegal input sequences, instead of stopping with the I18N_ERROR_INVALID_CHAR_FOUND error code.
- Since :
- 2.3.1
- Parameters:
-
[out] dest A buffer for the result string.
The result will be zero-terminated if the buffer is large enough.[in] dest_capacity The size of the buffer (number of i18n_char32s)
If it is0
, then dest may beNULL
and the function will only return the length of the result without writing any of the result string (pre-flighting).[out] dest_len A pointer to receive the number of units written to the destination.
If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.[in] src The original source string [in] src_len The length of the original string.
If-1
, then src must be zero-terminated.[in] sub_char The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with I18N_ERROR_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER". [out] num_substitutions Output parameter receiving the number of substitutions if sub_char>=0. Set to 0 if no substitutions occur or sub_char<0. num_substitutions can be NULL. [out] error_code Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
- Returns:
- The pointer to destination buffer.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
char* i18n_ustring_to_UTF8 | ( | char * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
i18n_error_code_e * | error_code | ||
) |
Converts a UTF-16 string to UTF-8.
If the input string is not well-formed, then the I18N_ERROR_INVALID_CHAR_FOUND error code is set.
- Since :
- 2.3.1
- Parameters:
-
[out] dest A buffer for the result string.
The result will be zero-terminated if the buffer is large enough.[in] dest_capacity The size of the buffer (number of chars)
If it is0
, then dest may beNULL
and the function will only return the length of the result without writing any of the result string (pre-flighting).[out] dest_len A pointer to receive the number of units written to the destination.
If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.[in] src The original source string [in] src_len The length of the original string.
If-1
, then src must be zero-terminated.[out] error_code Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
- Returns:
- The pointer to destination buffer.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
- See also:
- i18n_ustring_from_UTF8()
char* i18n_ustring_to_UTF8_with_sub | ( | char * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
i18n_uchar32 | sub_char, | ||
int32_t * | num_substitutions, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-16 string to UTF-8. Same as i18n_ustring_to_UTF8() except for the additional sub_char which is output for illegal input sequences, instead of stopping with the I18N_ERROR_INVALID_CHAR_FOUND error code.
- Since :
- 2.3.1
- Parameters:
-
[out] dest A buffer for the result string.
The result will be zero-terminated if the buffer is large enough.[in] dest_capacity The size of the buffer (number of chars)
If it is0
, then dest may beNULL
and the function will only return the length of the result without writing any of the result string (pre-flighting).[out] dest_len A pointer to receive the number of units written to the destination.
If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.[in] src The original source string [in] src_len The length of the original string.
If-1
, then src must be zero-terminated.[in] sub_char The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with I18N_ERROR_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER". [out] num_substitutions Output parameter receiving the number of substitutions if sub_char>=0. Set to 0 if no substitutions occur or sub_char<0. num_substitutions can be NULL. [out] error_code Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
- Returns:
- The pointer to destination buffer.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
wchar_t* i18n_ustring_to_WCS | ( | wchar_t * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-16 string to a wchar_t string.
If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then this function simply calls the fast, dedicated function for that. Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed.
- Since :
- 2.3.1
- Parameters:
-
[out] dest A buffer for the result string. The result will be zero-terminated if the buffer is large enough. [in] dest_capacity The size of the buffer (number of wchar_t's).
If it is0
, then dest may beNULL
and the function will only return the length of the result without writing any of the result string (pre-flighting).[out] dest_len A pointer to receive the number of units written to the destination.
If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow.[in] src The original source string. [in] src_len The length of the original string.
If-1
, then src must be zero-terminated.[out] error_code Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
- Returns:
- The pointer to destination buffer.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
i18n_uchar* i18n_ustring_tokenizer_r | ( | i18n_uchar * | src, |
const i18n_uchar * | delim, | ||
i18n_uchar ** | save_state | ||
) |
The string tokenizer API allows an application to break a string into tokens.
Works just like C's strspn but with Unicode.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] src String containing token(s). This string will be modified. After the first call to i18n_ustring_tokenizer_r(), this argument must be NULL to get to the next token. [in] delim Set of delimiter characters (Unicode code points). [out] save_state The current pointer within the original string, which is set by this function. The save_state parameter should the address of a local variable of type i18n_uchar *.
- Returns:
- A pointer to the next token found in src, or NULL when there are no more tokens.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
int32_t i18n_ustring_unescape | ( | const char * | src, |
i18n_uchar * | dest, | ||
int32_t | dest_capacity | ||
) |
Unescape a string of characters and write the resulting Unicode characters to the destination buffer.
The following escape sequences are recognized:
\uhhhh 4 hex digits; h in [0-9A-Fa-f] \Uhhhhhhhh 8 hex digits \xhh 1-2 hex digits \x{h...} 1-8 hex digits \ooo 1-3 octal digits; o in [0-7] \cX control-X; X is masked with 0x1F
as well as the standard ANSI C escapes:
\a => U+0007, \b => U+0008, \t => U+0009, \n => U+000A, \v => U+000B, \f => U+000C, \r => U+000D, \e => U+001B, \" => U+0022, \' => U+0027, \? => U+003F, \\ => U+005C
Anything else following a backslash is generically escaped. For example, "[a\-z]" returns "[a-z]".
If an escape sequence is ill-formed, this method returns an empty string. An example of an ill-formed sequence is "\\u" followed by fewer than 4 hex digits.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] src a zero-terminated string of invariant characters [in] dest pointer to buffer to receive converted and unescaped text and, if there is room, a zero terminator. May be NULL for preflighting, in which case no i18n_uchar characters will be written, but the return value will still be valid. On error, an empty string is stored here (if possible). [in] dest_capacity the number of i18n_uchar characters that may be written at dest. Ignored if dest == NULL.
- Returns:
- the length of unescaped string.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
- See also:
- i18n_ustring_unescape_at()
i18n_uchar32 i18n_ustring_unescape_at | ( | i18n_ustring_unescape_char_at_cb | char_at, |
int32_t * | offset, | ||
int32_t | length, | ||
void * | context | ||
) |
Unescape a single sequence.
The character at offset-1 is assumed (without checking) to be a backslash. This method takes a callback pointer to a function that returns the i18n_uchar at a given offset. By varying this callback, I18N functions are able to unescape char* strings, and UnicodeString objects.
If offset is out of range, or if the escape sequence is ill-formed, (i18n_uchar32)0xFFFFFFFF is returned. See documentation of i18n_ustring_unescape() for a list of recognized sequences.
- Remarks:
- The specific error code can be obtained using the get_last_result() method. Error codes are described in Exceptions section.
- Since :
- 2.3.1
- Parameters:
-
[in] char_at callback function that returns a i18n_uchar of the source text given an offset and a context pointer. [in] offset pointer to the offset that will be passed to char_at. The offset value will be updated upon return to point after the last parsed character of the escape sequence. On error the offset is unchanged. [in] length the number of i18n_uchar characters that may be written at dest. Ignored if dest == NULL. [in] context an opaque pointer passed directly into char_at.
- Returns:
- the character represented by the escape sequence at offset, or (i18n_uchar32)0xFFFFFFFF on error.
- Exceptions:
-
I18N_ERROR_NONE Success I18N_ERROR_INVALID_PARAMETER Invalid function parameter
- See also:
- i18n_ustring_unescape()