Tizen Native API
5.0
|
The Ucollator module performs locale-sensitive string comparison.
#include <utils_i18n.h>
The Ucollator module performs locale-sensitive string comparison. It builds searching and sorting routines for natural language text and provides correct sorting orders for most locales supported.
Converts two different byte strings to two different unicode strings and compares the unicode strings to check if the strings are equal to each other.
i18n_uchar uchar_src[64] = {0,}; i18n_uchar uchar_target[64] = {0,}; char *src = "tizen"; char *target = "bada"; int uchar_src_len = 0; int uchar_target_len = 0; i18n_ucollator_h coll = NULL; i18n_ubool result = NULL; i18n_ustring_from_UTF8( uchar_src, 64, NULL, src, -1 ); i18n_ustring_from_UTF8( uchar_target, 64, NULL, target, -1 ); // creates a collator i18n_ucollator_create( "en_US", &coll ); // sets strength for coll i18n_ucollator_set_strength( coll, I18N_UCOLLATOR_PRIMARY ); // compares uchar_src with uchar_target i18n_ustring_get_length( uchar_src, &uchar_src_len ); i18n_ustring_get_length( uchar_target, &uchar_target_len ); i18n_ucollator_equal( coll, uchar_src, uchar_src_len, uchar_target, uchar_target_len, &result ); dlog_print(DLOG_INFO, LOG_TAG, "%s %s %s\n", src, result == 1 ? "is equal to" : "is not equal to", target ); // tizen is not equal to bada // destroys the collator i18n_ucollator_destroy( coll );
Sorts in ascending order on the given data using string_ucollator
i18n_ucollator_h coll = NULL; char *src[3] = { "cat", "banana", "airplane" }; char *tmp = NULL; i18n_uchar buf_01[16] = {0,}; i18n_uchar buf_02[16] = {0,}; i18n_ucollator_result_e result = I18N_UCOLLATOR_EQUAL; int i = 0, j = 0; int ret = I18N_ERROR_NONE; int buf_01_len = 0, buf_02_len = 0; for (i = 0; i < sizeof(src) / sizeof(src[0]); i++) { dlog_print(DLOG_INFO, LOG_TAG, "%s\n", src[i]); } // cat banana airplane // creates a collator ret = i18n_ucollator_create("en_US", &coll); // compares and sorts in ascending order if (ret == I18N_ERROR_NONE) { i18n_ucollator_set_strength(coll, I18N_UCOLLATOR_TERTIARY); for (i = 0; i < 2; i++) { for (j = 0; j < 2 - i; j++) { i18n_ustring_copy_ua(buf_01, src[j]); i18n_ustring_copy_ua(buf_02, src[j+1]); i18n_ustring_get_length(buf_01, &buf_01_len); i18n_ustring_get_length(buf_02, &buf_02_len); // compares buf_01 with buf_02 i18n_ucollator_str_collator(coll, buf_01, buf_01_len, buf_02, buf_02_len, &result); if (result == I18N_UCOLLATOR_GREATER) { tmp = src[j]; src[j] = src[j+1]; src[j+1] = tmp; } } } } // destroys the collator i18n_ucollator_destroy( coll ); // deallocate memory for collator for (i = 0; i < sizeof(src) / sizeof(src[0]); i++) { dlog_print(DLOG_INFO, LOG_TAG, "%s\n", src[i]); } // ariplane banana cat
Functions | |
int | i18n_ucollator_create (const char *locale, i18n_ucollator_h *collator) |
Creates a i18n_ucollator_h for comparing strings. | |
int | i18n_ucollator_create_rules (const i18n_uchar *rules, int32_t rules_length, i18n_ucollator_attribute_value_e normalization_mode, i18n_ucollator_strength_e strength, i18n_uparse_error_s *parse_error, i18n_ucollator_h *collator) |
Produces an i18n_ucollator_h instance according to the rules supplied. | |
int | i18n_ucollator_get_contractions_and_expansions (const i18n_ucollator_h collator, i18n_ubool add_prefixes, i18n_uset_h contractions, i18n_uset_h expansions) |
Gets a set containing the expansions defined by the collator. | |
int | i18n_ucollator_destroy (i18n_ucollator_h collator) |
Destroys a i18n_ucollator_h. | |
int | i18n_ucollator_str_collator (const i18n_ucollator_h collator, const i18n_uchar *src, int32_t src_len, const i18n_uchar *target, int32_t target_len, i18n_ucollator_result_e *result) |
Compares two strings. | |
int | i18n_ucollator_str_collator_utf8 (const i18n_ucollator_h collator, const char *src, int32_t src_len, const char *target, int32_t target_len, i18n_ucollator_result_e *result) |
Compares two strings in UTF-8. | |
int | i18n_ucollator_greater (const i18n_ucollator_h collator, const i18n_uchar *src, int32_t src_len, const i18n_uchar *target, int32_t target_len, i18n_ubool *is_greater) |
Determines if one string is greater than another. | |
int | i18n_ucollator_greater_or_equal (const i18n_ucollator_h collator, const i18n_uchar *src, int32_t src_len, const i18n_uchar *target, int32_t target_len, i18n_ubool *result) |
Determines if one string is greater than or equal to another. | |
int | i18n_ucollator_equal (const i18n_ucollator_h collator, const i18n_uchar *src, int32_t src_len, const i18n_uchar *target, int32_t target_len, i18n_ubool *equal) |
Compares two strings for equality. | |
int | i18n_ucollator_str_collator_iter (const i18n_ucollator_h collator, const i18n_uchar_iter_h *src_iter, const i18n_uchar_iter_h *dest_iter, i18n_ucollator_result_e *result) |
Compares two UTF-8 encoded strings. | |
int | i18n_ucollator_get_strength (const i18n_ucollator_h collator, i18n_ucollator_strength_e *strength) |
Gets the collation strength used in an i18n_ucollator_h. | |
int | i18n_ucollator_set_strength (i18n_ucollator_h collator, i18n_ucollator_strength_e strength) |
Sets the collation strength used in a collator. | |
int | i18n_ucollator_get_reorder_codes (const i18n_ucollator_h collator, int32_t dest_size, int32_t *dest, int32_t *n_codes) |
Retrieves the reordering codes for this collator. | |
int | i18n_ucollator_set_reorder_codes (i18n_ucollator_h collator, const int32_t *reorder_codes, int32_t codes_length) |
Sets the reordering codes for this collator. | |
int | i18n_ucollator_get_equivalent_reorder_codes (int32_t reorder_code, int32_t dest_size, int32_t *dest, int32_t *n_codes) |
Retrieves the reorder codes that are grouped with the given reorder code. | |
int | i18n_ucollator_get_display_name (const char *obj_locale, const char *disp_locale, int32_t dest_size, i18n_uchar *dest, int32_t *display_size) |
Gets the display name for an i18n_ucollator_h. | |
int | i18n_ucollator_get_available (int32_t locale_index, const char **locale) |
Gets a locale for which collation rules are available. | |
int | i18n_ucollator_count_available (int32_t *n_available) |
Determines how many locales have collation rules available. | |
int | i18n_ucollator_create_available_locales (i18n_uenumeration_h *locales) |
Creates a string enumerator of all locales for which a valid collator may be created. | |
int | i18n_ucollator_get_keywords (i18n_uenumeration_h *keywords) |
Creates a string enumerator of all possible keywords that are relevant to collation. | |
int | i18n_ucollator_get_keyword_values (const char *keyword, i18n_uenumeration_h *keywords) |
Given a keyword, create a string enumeration of all values for that keyword that are currently in use. | |
int | i18n_ucollator_get_keyword_values_for_locale (const char *key, const char *locale, i18n_ubool commonly_used, i18n_uenumeration_h *keywords) |
Given a key and a locale, returns an array of string values in a preferred order that would make a difference. | |
int | i18n_ucollator_get_functional_equivalent (const char *keyword, const char *locale, int32_t dest_size, char *dest, i18n_ubool *is_available, int32_t *buffer_size) |
Returns the functionally equivalent locale for the specified input locale, with respect to given keyword, for the collation service. | |
int | i18n_ucollator_get_rules (const i18n_ucollator_h collator, int32_t *length, const i18n_uchar **rules) |
Gets the collation tailoring rules from a i18n_ucollator_h. | |
int | i18n_ucollator_get_sort_key (const i18n_ucollator_h collator, const i18n_uchar *src, int32_t src_length, int32_t dest_size, uint8_t *dest, int32_t *result_length) |
Gets a sort key for a string from a i18n_ucollator_h. | |
int | i18n_ucollator_next_sort_key_part (const i18n_ucollator_h collator, i18n_uchar_iter_h *iter, uint32_t state[2], uint8_t *dest, int32_t count, int32_t *result_length) |
Gets the next count bytes of a sort key. | |
int | i18n_ucollator_get_bound (const uint8_t *src, int32_t src_length, i18n_ucollator_bound_mode_e mode, uint32_t n_levels, uint8_t *dest, int32_t dest_length, int32_t *needed_size) |
Produces a bound for a given sortkey and a number of levels. | |
int | i18n_ucollator_get_version (const i18n_ucollator_h collator, i18n_uversion_info info) |
Gets the version information for a i18n_ucollator_h. | |
int | i18n_ucollator_get_uca_version (const i18n_ucollator_h collator, i18n_uversion_info info) |
Gets the UCA version information for a i18n_ucollator_h. | |
int | i18n_ucollator_merge_sort_keys (const uint8_t *src1, int32_t src1_length, const uint8_t *src2, int32_t src2_length, int32_t dest_size, uint8_t *dest, int32_t *merged_length) |
Merges two sort keys. | |
int | i18n_ucollator_set_attribute (i18n_ucollator_h collator, i18n_ucollator_attribute_e attr, i18n_ucollator_attribute_value_e val) |
Sets an attribute's value. | |
int | i18n_ucollator_get_attribute (i18n_ucollator_h collator, i18n_ucollator_attribute_e attr, i18n_ucollator_attribute_value_e *val) |
Gets an attribute's value. | |
int | i18n_ucollator_set_max_variable (i18n_ucollator_h collator, i18n_ucollator_reorder_code_e group) |
Sets the variable top to the top of the specified reordering group. | |
int | i18n_ucollator_get_max_variable (i18n_ucollator_h collator, i18n_ucollator_reorder_code_e *group) |
Returns the maximum reordering group whose characters are affected by I18N_UCOLLATOR_ALTERNATE_HANDLING. | |
int | i18n_ucollator_get_variable_top (i18n_ucollator_h collator, uint32_t *weight) |
Gets the variable top value of a i18n_ucollator_h. | |
int | i18n_ucollator_safe_clone (i18n_ucollator_h collator, i18n_ucollator_h *clone) |
Clones the given collator, the cloning is thread-safe. | |
int | i18n_ucollator_get_rules_ex (i18n_ucollator_h collator, i18n_ucollator_rule_option_e option, int32_t dest_size, i18n_uchar *dest, int32_t *current_rules) |
Returns the current rules. | |
int | i18n_ucollator_get_locale_by_type (i18n_ucollator_h collator, i18n_ulocale_data_locale_type_e type, const char **locale) |
Gets the locale name of the collator. | |
int | i18n_ucollator_get_tailored_set (i18n_ucollator_h collator, i18n_uset_h *uset) |
Gets a Unicode set that contains all the characters and sequences tailored in this collator. | |
int | i18n_ucollator_clone_binary (i18n_ucollator_h collator, int32_t dest_size, uint8_t *dest, int32_t *image_size) |
Creates a binary image of a collator. | |
int | i18n_ucollator_create_binary (const uint8_t *bin, int32_t length, i18n_ucollator_h base, i18n_ucollator_h *collator) |
Creates a collator from a collator binary image created using i18n_ucollator_create_binary(). | |
Typedefs | |
typedef void * | i18n_ucollator_h |
Structure representing a collator object instance. | |
typedef i18n_ucollator_attribute_value_e | i18n_ucollator_strength_e |
Enumeration in which the base letter represents a primary difference. Set comparison level to I18N_UCOLLATOR_PRIMARY to ignore secondary and tertiary differences. Use this to set the strength of an i18n_ucollator_h. Example of primary difference, "abc" < "abd" Diacritical differences on the same base letter represent a secondary difference. Set comparison level to I18N_UCOLLATOR_SECONDARY to ignore tertiary differences. Use this to set the strength of an i18n_ucollator_h. Example of secondary difference, "ä" >> "a". Uppercase and lowercase versions of the same character represent a tertiary difference. Set comparison level to I18N_UCOLLATOR_TERTIARY to include all comparison differences. Use this to set the strength of an i18n_ucollator_h. Example of tertiary difference, "abc" <<< "ABC". Two characters are considered "identical" when they have the same unicode spellings. I18N_UCOLLATOR_IDENTICAL. For example, "ä" == "ä". i18n_ucollator_strength_e is also used to determine the strength of sort keys generated from i18n_ucollator_h. These values can now be found in the i18n_ucollator_attribute_value_e enum. |
typedef void* i18n_ucollator_h |
Structure representing a collator object instance.
Enumeration in which the base letter represents a primary difference. Set comparison level to I18N_UCOLLATOR_PRIMARY to ignore secondary and tertiary differences. Use this to set the strength of an i18n_ucollator_h. Example of primary difference, "abc" < "abd" Diacritical differences on the same base letter represent a secondary difference. Set comparison level to I18N_UCOLLATOR_SECONDARY to ignore tertiary differences. Use this to set the strength of an i18n_ucollator_h. Example of secondary difference, "ä" >> "a". Uppercase and lowercase versions of the same character represent a tertiary difference. Set comparison level to I18N_UCOLLATOR_TERTIARY to include all comparison differences. Use this to set the strength of an i18n_ucollator_h. Example of tertiary difference, "abc" <<< "ABC". Two characters are considered "identical" when they have the same unicode spellings. I18N_UCOLLATOR_IDENTICAL. For example, "ä" == "ä". i18n_ucollator_strength_e is also used to determine the strength of sort keys generated from i18n_ucollator_h. These values can now be found in the i18n_ucollator_attribute_value_e enum.
Enumeration for attributes that collation service understands. All the attributes can take I18N_UCOLLATOR_DEFAULT value, as well as the values specific to each one.
I18N_UCOLLATOR_FRENCH_COLLATION |
Attribute for direction of secondary weights - used in Canadian French. Acceptable values are I18N_UCOLLATOR_ON, which results in secondary weights being considered backwards, and I18N_UCOLLATOR_OFF which treats secondary weights in the order they appear |
I18N_UCOLLATOR_ALTERNATE_HANDLING |
Attribute for handling variable elements. Acceptable values are I18N_UCOLLATOR_NON_IGNORABLE (default) which treats all the codepoints with non-ignorable primary weights in the same way, and I18N_UCOLLATOR_SHIFTED which causes codepoints with primary weights that are equal or below the variable top value to be ignored at the primary level and moved to the quaternary level |
I18N_UCOLLATOR_CASE_FIRST |
Controls the ordering of upper and lower case letters. Acceptable values are I18N_UCOLLATOR_OFF (default), which orders upper and lower case letters in accordance to their tertiary weights, I18N_UCOLLATOR_UPPER_FIRST which forces upper case letters to sort before lower case letters, and I18N_UCOLLATOR_LOWER_FIRST which does the opposite |
I18N_UCOLLATOR_CASE_LEVEL |
Controls whether an extra case level (positioned before the third level) is generated or not. Acceptable values are I18N_UCOLLATOR_OFF (default), when case level is not generated, and I18N_UCOLLATOR_ON which causes the case level to be generated. Contents of the case level are affected by the value of the I18N_UCOLLATOR_CASE_FIRST attribute. A simple way to ignore accent differences in a string is to set the strength to I18N_UCOLLATOR_PRIMARY and enable case level |
I18N_UCOLLATOR_NORMALIZATION_MODE |
Controls whether the normalization check and necessary normalizations are performed. When set to I18N_UCOLLATOR_OFF (default) no normalization check is performed. The correctness of the result is guaranteed only if the input data is in so-called FCD form (see users manual for more info). When set to I18N_UCOLLATOR_ON, an incremental check is performed to see whether the input data is in the FCD form. If the data is not in the FCD form, incremental NFD normalization is performed |
I18N_UCOLLATOR_DECOMPOSITION_MODE |
An alias for the I18N_UCOLLATOR_NORMALIZATION_MODE attribute |
I18N_UCOLLATOR_STRENGTH |
The strength attribute. Can be either I18N_UCOLLATOR_PRIMARY, I18N_UCOLLATOR_SECONDARY, I18N_UCOLLATOR_TERTIARY, I18N_UCOLLATOR_QUATERNARY, or I18N_UCOLLATOR_IDENTICAL. The usual strength for most locales (except Japanese) is tertiary. Quaternary strength is useful when combined with shifted setting for the alternate handling attribute and for JIS X 4061 collation, when it is used to distinguish between Katakana and Hiragana. Otherwise, quaternary level is affected only by the number of non-ignorable code points in the string. Identical strength is rarely useful, as it amounts to codepoints of the NFD form of the string |
I18N_UCOLLATOR_NUMERIC_COLLATION |
When turned on, this attribute makes substrings of digits that are sort according to their numeric values. This is a way to get '100' to sort AFTER '2'. Note that the longest digit substring that can be treated as a single unit is 254 digits (not counting leading zeros). If a digit substring is longer than that, the digits beyond the limit will be treated as a separate digit substring. A "digit" in this sense is a code point with General_Category=Nd, which does not include circled numbers, roman numerals, and so on. Only a contiguous digit substring is considered, that is, non-negative integers without separators. There is no support for plus/minus signs, decimals, exponents, and so on |
I18N_UCOLLATOR_ATTRIBUTE_COUNT |
The number of i18n_ucollator_attribute_e constants |
Enumeration containing attribute values for controlling collation behavior. Here are all the allowable values. Not every attribute can take every value. The only universal value is I18N_UCOLLATOR_DEFAULT, which resets the attribute value to the predefined value for that locale.
I18N_UCOLLATOR_DEFAULT |
Accepted by most attributes |
I18N_UCOLLATOR_PRIMARY |
Primary collation strength |
I18N_UCOLLATOR_SECONDARY |
Secondary collation strength |
I18N_UCOLLATOR_TERTIARY |
Tertiary collation strength |
I18N_UCOLLATOR_DEFAULT_STRENGTH |
Default collation strength |
I18N_UCOLLATOR_QUATERNARY |
Quaternary collation strength |
I18N_UCOLLATOR_IDENTICAL |
Identical collation strength |
I18N_UCOLLATOR_OFF |
Turn the feature off - works for I18N_UCOLLATOR_FRENCH_COLLATION, I18N_UCOLLATOR_CASE_LEVEL & I18N_UCOLLATOR_DECOMPOSITION_MODE |
I18N_UCOLLATOR_ON |
Turn the feature on - works for I18N_UCOLLATOR_FRENCH_COLLATION, I18N_UCOLLATOR_CASE_LEVEL & I18N_UCOLLATOR_DECOMPOSITION_MODE |
I18N_UCOLLATOR_SHIFTED |
Valid for I18N_UCOLLATOR_ALTERNATE_HANDLING. Alternate handling will be shifted. |
I18N_UCOLLATOR_NON_IGNORABLE |
Valid for I18N_UCOLLATOR_ALTERNATE_HANDLING. Alternate handling will be non ignorable. |
I18N_UCOLLATOR_LOWER_FIRST |
Valid for I18N_UCOLLATOR_CASE_FIRST - lower case sorts before upper case. |
I18N_UCOLLATOR_UPPER_FIRST |
Upper case sorts before lower case. |
Enumeration that is taken by i18n_ucollator_get_bound().
I18N_UCOLLATOR_BOUND_LOWER |
Lower bound. |
I18N_UCOLLATOR_BOUND_UPPER |
Upper bound that will match strings of exact size. |
I18N_UCOLLATOR_BOUND_UPPER_LONG |
Upper bound that will match all the strings that have the same initial substring as the given string. |
I18N_UCOLLATOR_BOUND_VALUE_COUNT |
One more than the highest normal i18n_ucollator_bound_mode_e value. |
Enumeration containing the codes for reordering segments of the collation table that are not script codes. These reordering codes are to be used in conjunction with the script codes.
I18N_UCOLLATOR_REORDER_CODE_DEFAULT |
A special reordering code that is used to specify the default reordering codes for a locale. |
I18N_UCOLLATOR_REORDER_CODE_NONE |
A special reordering code that is used to specify no reordering codes. |
I18N_UCOLLATOR_REORDER_CODE_OTHERS |
A special reordering code that is used to specify all other codes used for reordering except for the codes listed as i18n_ucollator_reorder_code_e values and those listed explicitly in a reordering. |
I18N_UCOLLATOR_REORDER_CODE_SPACE |
Characters with the space property. This is equivalent to the rule value "space". |
I18N_UCOLLATOR_REORDER_CODE_FIRST |
The first entry in the enumeration of reordering groups. This is intended for use in range checking and enumeration of the reorder codes. |
I18N_UCOLLATOR_REORDER_CODE_PUNCTUATION |
Characters with the punctuation property. This is equivalent to the rule value "punct". |
I18N_UCOLLATOR_REORDER_CODE_SYMBOL |
Characters with the symbol property. This is equivalent to the rule value "symbol". |
I18N_UCOLLATOR_REORDER_CODE_CURRENCY |
Characters with the currency property. This is equivalent to the rule value "currency". |
I18N_UCOLLATOR_REORDER_CODE_DIGIT |
Characters with the digit property. This is equivalent to the rule value "digit". |
I18N_UCOLLATOR_REORDER_CODE_LIMIT |
The limit of the reorder codes. This is intended for use in range checking and enumeration of the reorder codes. |
Enumeration for source and target string comparison result. I18N_UCOLLATOR_LESS is returned if the source string is compared to be less than the target string in the i18n_ucollator_str_collator() method. I18N_UCOLLATOR_EQUAL is returned if the source string is compared to be equal to the target string in the i18n_ucollator_str_collator() method. I18N_UCOLLATOR_GREATER is returned if the source string is compared to be greater than the target string in the i18n_ucollator_str_collator() method.
Options for retrieving the rule string.
I18N_UCOLLATOR_TAILORING_ONLY |
Retrieves the tailoring rules only. Same as calling the version of i18n_ucollator_get_rules() without i18n_ucollator_rule_option_e. |
I18N_UCOLLATOR_FULL_RULES |
Retrieves the "UCA rules" concatenated with the tailoring rules. The "UCA rules" are an approximation of the root collator's sort order. See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales. |
int i18n_ucollator_clone_binary | ( | i18n_ucollator_h | collator, |
int32_t | dest_size, | ||
uint8_t * | dest, | ||
int32_t * | image_size | ||
) |
Creates a binary image of a collator.
This binary image can be stored and later used to instantiate a collator using i18n_ucollator_create_binary(). This API supports preflighting.
[in] | collator | The collator |
[in] | dest_size | Capacity of the dest buffer |
[out] | dest | A fill-in buffer to receive the binary image |
[out] | image_size | The size of the image |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_count_available | ( | int32_t * | n_available | ) |
Determines how many locales have collation rules available.
This function is most useful for determining the loop ending condition for calls to i18n_ucollator_get_available().
[out] | n_available | The number of locales for which collation rules are available |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_create | ( | const char * | locale, |
i18n_ucollator_h * | collator | ||
) |
Creates a i18n_ucollator_h for comparing strings.
For some languages, multiple collation types are available; for example, "de@collation=phonebook". Collation attributes can be specified via locale keywords as well, in the old locale extension syntax ("el@colCaseFirst=upper") or in language tag syntax ("el-u-kf-upper"). See User Guide: Collation API.
The i18n_ucollator_h is used in all the calls to the Collation service.
After finished, collator must be disposed off by calling i18n_ucollator_destroy().
[in] | locale | The locale containing the required collation rules Special values for locales can be passed in - if NULL is passed for the locale, the default locale collation rules will be used If empty string ("") or "root" is passed, UCA rules will be used. |
[out] | collator | i18n_ucollator_h, otherwise 0 if an error occurs |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_create_available_locales | ( | i18n_uenumeration_h * | locales | ) |
Creates a string enumerator of all locales for which a valid collator may be created.
[out] | locales | A string enumeration over locale strings. The caller is responsible for releasing the result. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_create_binary | ( | const uint8_t * | bin, |
int32_t | length, | ||
i18n_ucollator_h | base, | ||
i18n_ucollator_h * | collator | ||
) |
Creates a collator from a collator binary image created using i18n_ucollator_create_binary().
Binary image used in instantiation of the collator remains owned by the user and should stay around for the lifetime of the collator. The API also takes a base collator which must be the root collator.
[in] | bin | The binary image owned by the user and required through the lifetime of the collator |
[in] | length | The size of the image. If negative, the API will try to figure out the length of the image. |
[in] | base | Base collator, for lookup of untailored characters. Must be the root collator, must not be NULL . The base is required to be present through the lifetime of the collator. |
[in] | collator | The newly created collator |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_create_rules | ( | const i18n_uchar * | rules, |
int32_t | rules_length, | ||
i18n_ucollator_attribute_value_e | normalization_mode, | ||
i18n_ucollator_strength_e | strength, | ||
i18n_uparse_error_s * | parse_error, | ||
i18n_ucollator_h * | collator | ||
) |
Produces an i18n_ucollator_h instance according to the rules supplied.
The rules are used to change the default ordering, defined in the UCA in a process called tailoring. The resulting i18n_ucollator_h pointer can be used in the same way as the one obtained by i18n_ucollator_str_collator().
[in] | rules | A string describing the collation rules. For the syntax of the rules please see users guide |
[in] | rules_length | The length of rules, or -1 if null-terminated |
[in] | normalization_mode | The normalization mode, one of: I18N_UCOLLATOR_OFF (expect the text to not need normalization), I18N_UCOLLATOR_ON (normalize) I18N_UCOLLATOR_DEFAULT (set the mode according to the rules) |
[in] | strength | The default collation strength: One of I18N_UCOLLATOR_PRIMARY , I18N_UCOLLATOR_SECONDARY , I18N_UCOLLATOR_TERTIARY , I18N_UCOLLATOR_IDENTICAL , I18N_UCOLLATOR_DEFAULT_STRENGTH - can be also set in the rules |
[out] | parse_error | A pointer to i18n_uparse_error_s to recieve information about errors occurred during parsing. This argument can currently be set to NULL , but at users own risk. Please provide a real structure. |
[out] | collator | A pointer to a i18n_ucollator_h. It is not guaranteed that NULL be returned in case of error - please use status argument to check for errors. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_destroy | ( | i18n_ucollator_h | collator | ) |
Destroys a i18n_ucollator_h.
Once destroyed, the i18n_ucollator_h should not be used. Every created collator should be destroyed.
[in] | collator | The i18n_ucollator_h to close |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_equal | ( | const i18n_ucollator_h | collator, |
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
const i18n_uchar * | target, | ||
int32_t | target_len, | ||
i18n_ubool * | equal | ||
) |
Compares two strings for equality.
This function is equivalent to i18n_ucollator_str_collator().
[in] | collator | The i18n_ucollator_h containing the comparison rules |
[in] | src | The source string |
[in] | src_len | The length of the source, otherwise -1 if null-terminated |
[in] | target | The target string |
[in] | target_len | The length of the target, otherwise -1 if null-terminated |
[out] | equal | If true source is equal to target, otherwise false |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_attribute | ( | i18n_ucollator_h | collator, |
i18n_ucollator_attribute_e | attr, | ||
i18n_ucollator_attribute_value_e * | val | ||
) |
Gets an attribute's value.
[in] | collator | The collator |
[in] | attr | The attribute type |
[out] | val | The attribute value |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_available | ( | int32_t | locale_index, |
const char ** | locale | ||
) |
Gets a locale for which collation rules are available.
An i18n_ucollator_h in a locale returned by this function will perform the correct collation for the locale.
[in] | locale_index | The index of the desired locale |
[out] | locale | A locale for which collation rules are available, or 0 if none |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_bound | ( | const uint8_t * | src, |
int32_t | src_length, | ||
i18n_ucollator_bound_mode_e | mode, | ||
uint32_t | n_levels, | ||
uint8_t * | dest, | ||
int32_t | dest_length, | ||
int32_t * | needed_size | ||
) |
Produces a bound for a given sortkey and a number of levels.
Return value is always the number of bytes needed, regardless of whether the result buffer was big enough or even valid.
Resulting bounds can be used to produce a range of strings that are between upper and lower bounds. For example, if bounds are produced for a sortkey of string "smith", strings between upper and lower bounds with one level would include "Smith", "SMITH", "sMiTh".
There are two upper bounds that can be produced. If I18N_UCOLLATOR_BOUND_UPPER is produced, strings matched would be as above. However, if bound produced using UCOL_BOUND_UPPER_LONG is used, the above example will also match "Smithsonian" and similar.
Sort keys may be compared using strcmp.
[in] | src | The source sortkey |
[in] | src_length | The length of source, or -1 if null-terminated. (If an unmodified sortkey is passed, it is always null terminated). |
[in] | mode | Type of bound required. It can be I18N_UCOLLATOR_BOUND_LOWER, which produces a lower inclusive bound, I18N_UCOLLATOR_BOUND_UPPER, that produces upper bound that matches strings of the same length or I18N_UCOLLATOR_BOUND_UPPER_LONG that matches strings that have the same starting substring as the source string. |
[in] | n_levels | Number of levels required in the resulting bound (for most uses, the recommended value is 1). See users guide for explanation on number of levels a sortkey can have. |
[in] | dest | A pointer to a buffer to receive the resulting sortkey. |
[out] | dest_length | The maximum size of result |
[out] | needed_size | The size needed to fully store the bound |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_contractions_and_expansions | ( | const i18n_ucollator_h | collator, |
i18n_ubool | add_prefixes, | ||
i18n_uset_h | contractions, | ||
i18n_uset_h | expansions | ||
) |
Gets a set containing the expansions defined by the collator.
The set includes both the root collator's expansions and the expansions defined by the tailoring.
[in] | collator | The collator |
[in] | add_prefixes | Add the prefix contextual elements to contractions |
[out] | contractions | If not NULL , the set to hold the contractions |
[out] | expansions | If not NULL , the set to hold the expansions |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_display_name | ( | const char * | obj_locale, |
const char * | disp_locale, | ||
int32_t | dest_size, | ||
i18n_uchar * | dest, | ||
int32_t * | display_size | ||
) |
Gets the display name for an i18n_ucollator_h.
The display name is suitable for presentation to a user.
[in] | obj_locale | The locale of the collator in question |
[in] | disp_locale | The locale for display |
[in] | dest_size | The maximum size of dest |
[out] | dest | A pointer to a buffer to receive the attribute |
[out] | display_size | The total buffer size needed; if greater than dest_size, the output was truncated |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_equivalent_reorder_codes | ( | int32_t | reorder_code, |
int32_t | dest_size, | ||
int32_t * | dest, | ||
int32_t * | n_codes | ||
) |
Retrieves the reorder codes that are grouped with the given reorder code.
Some reorder codes will be grouped and must reorder together. Beginning with ICU 55, scripts only reorder together if they are primary-equal, for example Hiragana and Katakana.
[in] | reorder_code | The reorder code to determine equivalence for |
[in] | dest_size | The length of dest. If it is 0 , then dest may be NULL and the function will only return the length of the result without writing any codes (pre-flighting). |
[out] | dest | The array to fill with the script ordering |
[out] | n_codes | The number of reordering codes written to the dest array. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_functional_equivalent | ( | const char * | keyword, |
const char * | locale, | ||
int32_t | dest_size, | ||
char * | dest, | ||
i18n_ubool * | is_available, | ||
int32_t * | buffer_size | ||
) |
Returns the functionally equivalent locale for the specified input locale, with respect to given keyword, for the collation service.
If two different input locale + keyword combinations produce the same result locale, then collators instantiated for these two different input locales will behave equivalently. The converse is not always true; two collators may in fact be equivalent, but return different results, due to internal details. The return result has no other meaning than that stated above, and implies nothing as to the relationship between the two locales. This is intended for use by applications who wish to cache collators, or otherwise reuse collators when possible. The functional equivalent may change over time.
[in] | keyword | A particular keyword as enumerated by i18n_ucollator_get_keywords() |
[in] | locale | The specified input locale |
[in] | dest_size | Capacity of the dest buffer |
[out] | dest | The functionally equivalent result locale |
[out] | is_available | If non-NULL, indicates whether the specified input locale was 'available' to the collation service. A locale is defined as 'available' if it physically exists within the collation locale data. |
[out] | buffer_size | The actual buffer size needed for the locale. If greater than dest_size, the returned full name will be truncated and an error code will be returned. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_keyword_values | ( | const char * | keyword, |
i18n_uenumeration_h * | keywords | ||
) |
Given a keyword, create a string enumeration of all values for that keyword that are currently in use.
[in] | keyword | A particular keyword as enumerated by i18n_ucollator_get_keywords(). If any other keyword is passed in, returns I18N_ERROR_INVALID_PARAMETER |
[out] | keywords | A string enumeration over collation keyword values, or NULL upon error. The caller is responsible for releasing the result. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_keyword_values_for_locale | ( | const char * | key, |
const char * | locale, | ||
i18n_ubool | commonly_used, | ||
i18n_uenumeration_h * | keywords | ||
) |
Given a key and a locale, returns an array of string values in a preferred order that would make a difference.
These are all and only those values where the creation of the service with the locale formed from the input locale plus input keyword and that value has different behavior than creation with the input locale alone.
[in] | key | One of the keys supported by this service. For now, only "collation" is supported |
[in] | locale | The locale |
[in] | commonly_used | If set to true it will return only commonly used values with the given locale in preferred order. Otherwise, it will return all the available values for the locale |
[out] | keywords | A string enumeration over keyword values for the given key and the locale |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_keywords | ( | i18n_uenumeration_h * | keywords | ) |
Creates a string enumerator of all possible keywords that are relevant to collation.
At this point, the only recognized keyword for this service is "collation".
[out] | keywords | A string enumeration over locale strings. The caller is responsible for releasing the result. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_locale_by_type | ( | i18n_ucollator_h | collator, |
i18n_ulocale_data_locale_type_e | type, | ||
const char ** | locale | ||
) |
Gets the locale name of the collator.
If the collator is instantiated from the rules, then this function returns NULL
.
[in] | collator | The i18n_ucollator_h for which the locale is needed |
[in] | type | You can choose between requested, valid and actual locale. For description see the definition of i18n_ulocale_data_locale_type_e. |
[out] | locale | The real locale name from which the collation data comes. If the collator was instantiated from rules, returns NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_max_variable | ( | i18n_ucollator_h | collator, |
i18n_ucollator_reorder_code_e * | group | ||
) |
Returns the maximum reordering group whose characters are affected by I18N_UCOLLATOR_ALTERNATE_HANDLING.
[in] | collator | The collator |
[out] | group | The maximum variable reordering group |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_reorder_codes | ( | const i18n_ucollator_h | collator, |
int32_t | dest_size, | ||
int32_t * | dest, | ||
int32_t * | n_codes | ||
) |
Retrieves the reordering codes for this collator.
These reordering codes are a combination of i18n_uscript_code_e codes and i18n_ucollator_reorder_code_e entries.
[in] | collator | The i18n_ucollator_h to query |
[in] | dest_size | The length of dest. If it is 0 , then dest may be NULL and the function will only set the n_codes parameter to the length of the result without writing any codes (pre-flighting). |
[out] | dest | The array to fill with the script ordering |
[out] | n_codes | The number of reordering codes written to the dest array |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_rules | ( | const i18n_ucollator_h | collator, |
int32_t * | length, | ||
const i18n_uchar ** | rules | ||
) |
Gets the collation tailoring rules from a i18n_ucollator_h.
The rules will follow the rule syntax.
[in] | collator | The i18n_ucollator_h to query |
[out] | length | The length of the rules |
[out] | rules | The collation tailoring rules |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_rules_ex | ( | i18n_ucollator_h | collator, |
i18n_ucollator_rule_option_e | option, | ||
int32_t | dest_size, | ||
i18n_uchar * | dest, | ||
int32_t * | current_rules | ||
) |
Returns the current rules.
Delta defines whether full rules are returned or just the tailoring. Returns number of i18n_uchar needed to store rules. If dest is NULL
or dest_size is not enough to store rules, will store up to available space. See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
[in] | collator | The collator to get the rules from |
[in] | option | The first sort key |
[in] | dest_size | The length of buffer to store rules in. If less than needed you'll get only the part that fits in. |
[out] | dest | The buffer to store the result in. If NULL , you'll get no rules. |
[out] | current_rules | The current rules. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_sort_key | ( | const i18n_ucollator_h | collator, |
const i18n_uchar * | src, | ||
int32_t | src_length, | ||
int32_t | dest_size, | ||
uint8_t * | dest, | ||
int32_t * | result_length | ||
) |
Gets a sort key for a string from a i18n_ucollator_h.
Sort keys may be compared using strcmp. Note that sort keys are often less efficient than simply doing comparison. For more details, see the ICU User Guide. Like ICU functions that write to an output buffer, the buffer contents is undefined if the buffer capacity (dest_size parameter) is too small. Unlike ICU functions that write a string to an output buffer, the terminating zero byte is counted in the sort key length.
[in] | collator | The i18n_ucollator_h containing the collation rules |
[in] | src | The string to transform |
[in] | src_length | The length of source, or -1 if null-terminated |
[in] | dest_size | The maximum size of dest buffer |
[out] | dest | A pointer to a buffer to receive the attribute |
[out] | result_length | The size needed to fully store the sort key. If there was an internal error generating the sort key, a zero value is returned. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_strength | ( | const i18n_ucollator_h | collator, |
i18n_ucollator_strength_e * | strength | ||
) |
Gets the collation strength used in an i18n_ucollator_h.
The strength influences how strings are compared.
[in] | collator | The i18n_ucollator_h to query |
[out] | strength | The collation strength; One of I18N_UCOLLATOR_PRIMARY, I18N_UCOLLATOR_SECONDARY, I18N_UCOLLATOR_TERTIARY, I18N_UCOLLATOR_QUATERNARY, I18N_UCOLLATOR_IDENTICAL |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_tailored_set | ( | i18n_ucollator_h | collator, |
i18n_uset_h * | uset | ||
) |
Gets a Unicode set that contains all the characters and sequences tailored in this collator.
The result must be disposed of by using i18n_uset_destroy().
[in] | collator | The i18n_ucollator_h for which we want to get tailored chars |
[out] | uset | A pointer to the newly created i18n_uset_h. Must be be disposed by using i18n_uset_destroy(). |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_uca_version | ( | const i18n_ucollator_h | collator, |
i18n_uversion_info | info | ||
) |
Gets the UCA version information for a i18n_ucollator_h.
[in] | collator | The i18n_ucollator_h to query |
[in] | info | The version information, the result will be filled in |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_variable_top | ( | i18n_ucollator_h | collator, |
uint32_t * | weight | ||
) |
Gets the variable top value of a i18n_ucollator_h.
[in] | collator | The collator, which variable top needs to be retrieve |
[out] | weight | The variable top primary weight |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_get_version | ( | const i18n_ucollator_h | collator, |
i18n_uversion_info | info | ||
) |
Gets the version information for a i18n_ucollator_h.
Version is currently an opaque 32-bit number which depends, among other things, on major versions of the collator tailoring and UCA.
[in] | collator | The i18n_ucollator_h to query |
[in] | info | The version information, the result will be filled in |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_greater | ( | const i18n_ucollator_h | collator, |
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
const i18n_uchar * | target, | ||
int32_t | target_len, | ||
i18n_ubool * | is_greater | ||
) |
Determines if one string is greater than another.
This function is equivalent to i18n_ucollator_str_collator() == I18N_UCOLLATOR_GREATER
[in] | collator | The i18n_ucollator_h containing the comparison rules |
[in] | src | The source string |
[in] | src_len | The length of the src, otherwise -1 if null-terminated |
[in] | target | The target string |
[in] | target_len | The length of the target, otherwise -1 if null-terminated |
[out] | is_greater | TRUE if source is greater than target, FALSE otherwise |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_greater_or_equal | ( | const i18n_ucollator_h | collator, |
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
const i18n_uchar * | target, | ||
int32_t | target_len, | ||
i18n_ubool * | result | ||
) |
Determines if one string is greater than or equal to another.
This function is equivalent to i18n_ucollator_str_collator() != I18N_UCOLLATOR_LESS
[in] | collator | The i18n_ucollator_h containing the comparison rules |
[in] | src | The source string |
[in] | src_len | The length of the src, otherwise -1 if null-terminated |
[in] | target | The target string |
[in] | target_len | The length of the target, otherwise -1 if null-terminated |
[out] | result | TRUE if source string is greater than or equal to target, FALSE otherwise |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_merge_sort_keys | ( | const uint8_t * | src1, |
int32_t | src1_length, | ||
const uint8_t * | src2, | ||
int32_t | src2_length, | ||
int32_t | dest_size, | ||
uint8_t * | dest, | ||
int32_t * | merged_length | ||
) |
Merges two sort keys.
The levels are merged with their corresponding counterparts (primaries with primaries, secondaries with secondaries etc.). Between the values from the same level a separator is inserted.
This is useful, for example, for combining sort keys from first and last names to sort such pairs. See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys
The recommended way to achieve "merged" sorting is by concatenating strings with U+FFFE between them. The concatenation has the same sort order as the merged sort keys, but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '' + str2). Using strings with U+FFFE may yield shorter sort keys.
For details about Sort Key Features see http://userguide.icu-project.org/collation/api#TOC-Sort-Key-Features
It is possible to merge multiple sort keys by consecutively merging another one with the intermediate result.
The length of the merge result is the sum of the lengths of the input sort keys.
Example (uncompressed):
191B1D 01 050505 01 910505 00
1F2123 01 050505 01 910505 00
will be merged as
191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00
If the destination buffer is not big enough, then its contents are undefined. If any of source lengths are zero or any of the source pointers are NULL
/ undefined, the result is of size zero.
[in] | src1 | The first sort key |
[in] | src1_length | The length of the first sort key, including the zero byte at the end; can be -1 if the function is to find the length. |
[in] | src2 | The second sort key |
[in] | src2_length | The length of the second sort key, including the zero byte at the end; can be -1 if the function is to find the length. |
[in] | dest_size | The number of bytes in the dest buffer |
[out] | dest | The buffer where the merged sort key is written, can be NULL if dest_size==0 |
[out] | merged_length | The length of the merged sort key, src1_length+ src2_length; can be larger than dest_size, or 0 if an error occurs (only for illegal arguments), in which cases the contents of dest is undefined . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_next_sort_key_part | ( | const i18n_ucollator_h | collator, |
i18n_uchar_iter_h * | iter, | ||
uint32_t | state[2], | ||
uint8_t * | dest, | ||
int32_t | count, | ||
int32_t * | result_length | ||
) |
Gets the next count bytes of a sort key.
Caller needs to preserve state array between calls and to provide the same type of i18n_ucollator_h set with the same string. The destination buffer provided must be big enough to store the number of requested bytes. The generated sort key may or may not be compatible with sort keys generated using i18n_ucollator_get_sort_key().
[in] | collator | The i18n_ucollator_h containing the collation rules |
[in] | iter | i18n_uchar_iter_h containing the string we need the sort key to be calculated for |
[in] | state | Opaque state of sortkey iteration |
[in] | dest | Buffer to hold the resulting sortkey part |
[out] | count | The number of sort key bytes required |
[out] | result_length | The actual number of bytes of a sortkey. It can be smaller than count if we have reached the end of the sort key. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_safe_clone | ( | i18n_ucollator_h | collator, |
i18n_ucollator_h * | clone | ||
) |
Clones the given collator, the cloning is thread-safe.
[in] | collator | The collator to be cloned |
[out] | clone | The pointer to the new clone |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_set_attribute | ( | i18n_ucollator_h | collator, |
i18n_ucollator_attribute_e | attr, | ||
i18n_ucollator_attribute_value_e | val | ||
) |
Sets an attribute's value.
[in] | collator | The i18n_ucollator_h containing attributes to be changed |
[in] | attr | The attribute type |
[in] | val | The attribute value |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_set_max_variable | ( | i18n_ucollator_h | collator, |
i18n_ucollator_reorder_code_e | group | ||
) |
Sets the variable top to the top of the specified reordering group.
The variable top determines the highest-sorting character which is affected by I18N_UCOLLATOR_ALTERNATE_HANDLING. If that attribute is set to I18N_UCOLLATOR_NON_IGNORABLE, then the variable top has no effect.
[in] | collator | The collator |
[in] | group | One of: I18N_UCOLLATOR_REORDER_CODE_SPACE , I18N_UCOLLATOR_REORDER_CODE_PUNCTUATION , I18N_UCOLLATOR_REORDER_CODE_SYMBOL , I18N_UCOLLATOR_REORDER_CODE_CURRENCY , I18N_UCOLLATOR_REORDER_CODE_DEFAULT to restore the default max variable group |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_set_reorder_codes | ( | i18n_ucollator_h | collator, |
const int32_t * | reorder_codes, | ||
int32_t | codes_length | ||
) |
Sets the reordering codes for this collator.
Collation reordering allows scripts and some other groups of characters to be moved relative to each other. This reordering is done on top of the DUCET/CLDR standard collation order. Reordering can specify groups to be placed at the start and/or the end of the collation order. These groups are specified using i18n_uscript_code_e codes and i18n_ucollator_reorder_code_e entries.
By default, reordering codes specified for the start of the order are placed in the order given after several special non-script blocks. These special groups of characters are space, punctuation, symbol, currency, and digit. These special groups are represented with i18n_ucollator_reorder_code_e entries. Script groups can be intermingled with these special non-script groups if those special groups are explicitly specified in the reordering.
The special code OTHERS stands for any script that is not explicitly mentioned in the list of reordering codes given.
The special reorder code DEFAULT will reset the reordering for this collator to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that was specified when this collator was created from resource data or from rules. The DEFAULT code must be the sole code supplied when it is used. If not, then I18N_ERROR_INVALID_PARAMETER will be set.
The special reorder code NONE will remove any reordering for this collator. The result of setting no reordering will be to have the DUCET/CLDR ordering used.
[in] | collator | The i18n_ucollator_h to query |
[in] | reorder_codes | An array of script codes in the new order. This can be NULL if the codes_length is also set to 0 . An empty array will clear any reordering codes on the collator. |
[in] | codes_length | The length of reorder_codes |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_set_strength | ( | i18n_ucollator_h | collator, |
i18n_ucollator_strength_e | strength | ||
) |
Sets the collation strength used in a collator.
The strength influences how strings are compared.
[in] | collator | The i18n_ucollator_h to set. |
[in] | strength | The desired collation strength. One of i18n_ucollator_strength_e |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_str_collator | ( | const i18n_ucollator_h | collator, |
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
const i18n_uchar * | target, | ||
int32_t | target_len, | ||
i18n_ucollator_result_e * | result | ||
) |
Compares two strings.
The strings will be compared using the options already specified.
[in] | collator | The i18n_ucollator_h containing the comparison rules |
[in] | src | The source string |
[in] | src_len | The length of the source, otherwise -1 if null-terminated |
[in] | target | The target string |
[in] | target_len | The length of the target, otherwise -1 if null-terminated |
[out] | result | The result of comparing the strings One of I18N_UCOLLATOR_EQUAL, I18N_UCOLLATOR_GREATER, or I18N_UCOLLATOR_LESS |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_str_collator_iter | ( | const i18n_ucollator_h | collator, |
const i18n_uchar_iter_h * | src_iter, | ||
const i18n_uchar_iter_h * | dest_iter, | ||
i18n_ucollator_result_e * | result | ||
) |
Compares two UTF-8 encoded strings.
The strings will be compared using the options already specified.
[in] | collator | The i18n_ucollator_h containing the comparison rules |
[in] | src_iter | The source string iterator |
[in] | dest_iter | The dest string iterator |
[out] | result | The result of comparing the strings One of I18N_UCOLLATOR_EQUAL, I18N_UCOLLATOR_GREATER, or I18N_UCOLLATOR_LESS |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ucollator_str_collator_utf8 | ( | const i18n_ucollator_h | collator, |
const char * | src, | ||
int32_t | src_len, | ||
const char * | target, | ||
int32_t | target_len, | ||
i18n_ucollator_result_e * | result | ||
) |
Compares two strings in UTF-8.
The strings will be compared using the options already specified.
[in] | collator | The i18n_ucollator_h containing the comparison rules |
[in] | src | The source UTF-8 string |
[in] | src_len | The length of the src, otherwise -1 if null-terminated |
[in] | target | The target UTF-8 string |
[in] | target_len | The length of the target, otherwise -1 if null-terminated |
[out] | result | The result of comparing the strings One of I18N_UCOLLATOR_EQUAL, I18N_UCOLLATOR_GREATER, or I18N_UCOLLATOR_LESS |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |