Tizen Native API
5.0
|
Uset module allows to specify a subset of character used in strings.
#include <utils_i18n.h>
Uset module allows to specify a subset of character used in strings.
Functions | |
int | i18n_uset_create_empty (i18n_uset_h *set) |
Creates an empty i18n_uset_h object. | |
int | i18n_uset_create (i18n_uchar32 start, i18n_uchar32 end, i18n_uset_h *set) |
Creates an i18n_uset_h object that contains the range of characters start..end, inclusive. | |
int | i18n_uset_create_pattern (const i18n_uchar *pattern, int32_t pattern_length, i18n_uset_h *set) |
Creates a set based on a given pattern. | |
int | i18n_uset_create_pattern_options (const i18n_uchar *pattern, int32_t pattern_length, uint32_t options, i18n_uset_h *set) |
Creates a set based on a given pattern. | |
int | i18n_uset_destroy (i18n_uset_h set) |
Disposes of the storage used by a i18n_uset_h object. | |
int | i18n_uset_clone (const i18n_uset_h set, i18n_uset_h *set_clone) |
Returns a copy of this object. | |
i18n_ubool | i18n_uset_is_frozen (const i18n_uset_h set) |
Determines whether the set has been frozen (made immutable) or not. | |
int | i18n_uset_freeze (i18n_uset_h set) |
Freezes the set (make it immutable). | |
int | i18n_uset_clone_as_thawed (const i18n_uset_h set, i18n_uset_h *set_copy) |
Clones the set and make the clone mutable. | |
int | i18n_uset_set (i18n_uset_h set, i18n_uchar32 start, i18n_uchar32 end) |
Causes the i18n_uset_h object to represent the range start - end . | |
int32_t | i18n_uset_apply_pattern (i18n_uset_h set, const i18n_uchar *pattern, int32_t pattern_length, uint32_t options) |
Modifies the set to represent the set specified by the given pattern. | |
int | i18n_uset_apply_int_property_value (i18n_uset_h set, i18n_uchar_uproperty_e prop, int32_t value) |
Modifies the set to contain those code points which have the given value for the given binary or enumerated property, as returned by i18n_uchar_get_int_property_value(). | |
int | i18n_uset_apply_property_alias (i18n_uset_h set, const i18n_uchar *prop, int32_t prop_length, const i18n_uchar *value, int32_t value_length) |
Modifies the set to contain those code points which have the given value for the given property. | |
i18n_ubool | i18n_uset_resembles_pattern (const i18n_uchar *pattern, int32_t pattern_length, int32_t pos) |
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet pattern. | |
int32_t | i18n_uset_to_pattern (const i18n_uset_h set, i18n_uchar *result, int32_t result_capacity, i18n_ubool escape_unprintable) |
Returns a string representation of the given set. | |
int | i18n_uset_add (i18n_uset_h set, i18n_uchar32 character) |
Adds the given character to the given i18n_uset_h. | |
int | i18n_uset_add_all (i18n_uset_h set, const i18n_uset_h additional_set) |
Adds all of the elements in the specified set to this set if they are not already present. | |
int | i18n_uset_add_range (i18n_uset_h set, i18n_uchar32 start, i18n_uchar32 end) |
Adds the given range of characters to the given i18n_uset_h. After this call, i18n_uset_contains(set, start, end) will return true. A frozen set will not be modified. | |
int | i18n_uset_add_string (i18n_uset_h set, const i18n_uchar *str, int32_t str_len) |
Adds the given string to the given i18n_uset_h. | |
int | i18n_uset_add_all_code_points (i18n_uset_h set, const i18n_uchar *str, int32_t str_len) |
Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}. | |
int | i18n_uset_remove (i18n_uset_h set, i18n_uchar32 character) |
Removes the given character from the given i18n_uset_h. | |
int | i18n_uset_remove_range (i18n_uset_h set, i18n_uchar32 start, i18n_uchar32 end) |
Removes the given range of characters from the given i18n_uset_h. | |
int | i18n_uset_remove_string (i18n_uset_h set, const i18n_uchar *str, int32_t str_len) |
Removes the given string to the given i18n_uset_h. | |
int | i18n_uset_remove_all (i18n_uset_h set, const i18n_uset_h remove_set) |
Removes from this set all of its elements that are contained in the specified set. | |
int | i18n_uset_retain (i18n_uset_h set, i18n_uchar32 start, i18n_uchar32 end) |
Retains only the elements in this set that are contained in the specified range. | |
int | i18n_uset_retain_all (i18n_uset_h set, const i18n_uset_h retain) |
Retains only the elements in this set that are contained in the specified set. | |
int | i18n_uset_compact (i18n_uset_h set) |
Reallocates this objects internal structures to take up the least possible space, without changing this object's value. | |
int | i18n_uset_complement (i18n_uset_h set) |
Inverts this set. This operation modifies this set so that its value is its complement. | |
int | i18n_uset_complement_all (i18n_uset_h set, const i18n_uset_h complement) |
Complements in this set all elements contained in the specified set. | |
int | i18n_uset_clear (i18n_uset_h set) |
Removes all of the elements from this set. | |
int | i18n_uset_destroy_over (i18n_uset_h set, int32_t attributes) |
Closes this set over the given attribute. | |
int | i18n_uset_remove_all_strings (i18n_uset_h set) |
Removes all strings from this set. | |
i18n_ubool | i18n_uset_is_empty (const i18n_uset_h set) |
Returns true if the given i18n_uset_h contains no characters and no strings. | |
i18n_ubool | i18n_uset_contains (const i18n_uset_h set, i18n_uchar32 character) |
Returns true if the given i18n_uset_h contains the given character. | |
i18n_ubool | i18n_uset_contains_range (const i18n_uset_h set, i18n_uchar32 start, i18n_uchar32 end) |
Returns true if the given i18n_uset_h contains all characters c where start <= c && c <= end. | |
i18n_ubool | i18n_uset_contains_string (const i18n_uset_h set, const i18n_uchar *str, int32_t str_len) |
Returns true if the given i18n_uset_h contains the given string. | |
int32_t | i18n_uset_index_of (const i18n_uset_h set, i18n_uchar32 character) |
Returns the index of the given character within this set, where the set is ordered by ascending code point. | |
i18n_uchar32 | i18n_uset_char_at (const i18n_uset_h set, int32_t char_index) |
Returns the character at the given index within this set, where the set is ordered by ascending code point. | |
int32_t | i18n_uset_size (const i18n_uset_h set) |
Returns the number of characters and strings contained in the given i18n_uset_h. | |
int32_t | i18n_uset_get_item_count (const i18n_uset_h set) |
Returns the number of items in this set. | |
int32_t | i18n_uset_get_item (const i18n_uset_h set, int32_t item_index, i18n_uchar32 *start, i18n_uchar32 *end, i18n_uchar *str, int32_t str_capacity) |
Returns an item of this set. | |
i18n_ubool | i18n_uset_contains_all (const i18n_uset_h set1, const i18n_uset_h set2) |
Returns true if set1 contains all the characters and strings of set2. It answers the question, 'Is set1 a superset of set2?'. | |
i18n_ubool | i18n_uset_contains_all_code_points (const i18n_uset_h set, const i18n_uchar *str, int32_t str_len) |
Returns true if this set contains all the characters of the given string. | |
i18n_ubool | i18n_uset_contains_none (const i18n_uset_h set1, const i18n_uset_h set2) |
Returns true if set1 contains none of the characters and strings of set2. | |
i18n_ubool | i18n_uset_contains_some (const i18n_uset_h set1, const i18n_uset_h set2) |
Returns true if set1 contains some of the characters and strings of set2. | |
int32_t | i18n_uset_span (const i18n_uset_h set, const i18n_uchar *str, int32_t length, i18n_uset_span_condition_e span_condition) |
Returns the length of the initial substring of the input string which consists only of characters and strings that are contained in this set (I18N_USET_SPAN_CONTAINED, I18N_USET_SPAN_SIMPLE), or only of characters and strings that are not contained in this set (I18N_USET_SPAN_NOT_CONTAINED). | |
int32_t | i18n_uset_span_back (const i18n_uset_h set, const i18n_uchar *str, int32_t length, i18n_uset_span_condition_e span_condition) |
Returns the start of the trailing substring of the input string which consists only of characters and strings that are contained in this set (I18N_USET_SPAN_CONTAINED, I18N_USET_SPAN_SIMPLE), or only of characters and strings that are not contained in this set (I18N_USET_SPAN_NOT_CONTAINED). | |
int32_t | i18n_uset_span_utf8 (const i18n_uset_h set, const char *str, int32_t length, i18n_uset_span_condition_e span_condition) |
Returns the length of the initial substring of the input string which consists only of characters and strings that are contained in this set (I18N_USET_SPAN_CONTAINED, I18N_USET_SPAN_SIMPLE), or only of characters and strings that are not contained in this set (I18N_USET_SPAN_NOT_CONTAINED). | |
int32_t | i18n_uset_span_back_utf8 (const i18n_uset_h set, const char *str, int32_t length, i18n_uset_span_condition_e span_condition) |
Returns the start of the trailing substring of the input string which consists only of characters and strings that are contained in this set (I18N_USET_SPAN_CONTAINED, I18N_USET_SPAN_SIMPLE), or only of characters and strings that are not contained in this set (I18N_USET_SPAN_NOT_CONTAINED). | |
i18n_ubool | i18n_uset_equals (const i18n_uset_h set1, const i18n_uset_h set2) |
Returns true if set1 contains all of the characters and strings of set2, and vice versa. It answers the question, 'Is set1 equal to set2?'. | |
int32_t | i18n_uset_serialize (const i18n_uset_h set, uint16_t *dest, int32_t dest_capacity) |
Serializes this set into an array of 16-bit integers. | |
i18n_ubool | i18n_uset_get_serialized_set (const uint16_t *src, int32_t src_length, i18n_userialized_set_s *fill_set) |
Given a serialized array, fill in the given serialized set object. | |
int | i18n_uset_set_serialized_to_one (i18n_uchar32 character, i18n_userialized_set_s *fill_set) |
Sets the i18n_userialized_set_s to contain the given character (and nothing else). | |
i18n_ubool | i18n_uset_serialized_contains (const i18n_userialized_set_s *set, i18n_uchar32 character) |
Returns true if the given i18n_userialized_set_s contains the given character. | |
int32_t | i18n_uset_get_serialized_range_count (const i18n_userialized_set_s *set) |
Returns the number of disjoint ranges of characters contained in the given serialized set. | |
i18n_ubool | i18n_uset_get_serialized_range (const i18n_userialized_set_s *set, int32_t range_index, i18n_uchar32 *p_start, i18n_uchar32 *p_end) |
Returns a range of characters contained in the given serialized set. | |
Typedefs | |
typedef void * | i18n_uset_h |
An i18n_uset_h handle. | |
Defines | |
#define | I18N_USET_SERIALIZED_STATIC_ARRAY_CAPACITY 8 |
Capacity of i18n_userialized_set_s::static_array. Enough for any single-code point set. |
Capacity of i18n_userialized_set_s::static_array. Enough for any single-code point set.
typedef void* i18n_uset_h |
An i18n_uset_h handle.
Use the i18n_uset_*
API to manipulate. Create with i18n_uset_create*
, and destroy with i18n_uset_destroy().
anonymous enum |
Enumeration for bitmask values to be passed to i18n_uset_pattern_options_create() or i18n_uset_apply_pattern() taking an option parameter.
Enumeration for span conditions.
I18N_USET_SPAN_NOT_CONTAINED |
Continue a span() while there is no set element at the current position. Stops before the first set element (character or string). (For code points only, this is like while contains(current) == false). When span() returns, the substring between where it started and the position it returned consists only of characters that are not in the set, and none of its strings overlap with the span. |
I18N_USET_SPAN_CONTAINED |
Continue a span() while there is a set element at the current position. (For characters only, this is like while contains(current) == true). When span() returns, the substring between where it started and the position it returned consists only of set elements (characters or strings) that are in the set. If a set contains strings, then the span will be the longest substring matching any of the possible concatenations of set elements (characters or strings). (There must be a single, non-overlapping concatenation of characters or strings.) This is equivalent to a POSIX regular expression for (OR of each set element). |
I18N_USET_SPAN_SIMPLE |
Continue a span() while there is a set element at the current position. (For characters only, this is like while contains(current) == true). When span() returns, the substring between where it started and the position it returned consists only of set elements (characters or strings) that are in the set. If a set only contains single characters, then this is the same as I18N_USET_SPAN_CONTAINED. If a set contains strings, then the span will be the longest substring with a match at each position with the longest single set element (character or string). |
I18N_USET_SPAN_CONDITION_COUNT |
One more than the last span condition. |
int i18n_uset_add | ( | i18n_uset_h | set, |
i18n_uchar32 | character | ||
) |
Adds the given character to the given i18n_uset_h.
After this call, i18n_uset_contains(set, character) will return true. A frozen set will not be modified.
[in] | set | The object to which to add the character. Must not be NULL . |
[in] | character | The character to add. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_add_all | ( | i18n_uset_h | set, |
const i18n_uset_h | additional_set | ||
) |
Adds all of the elements in the specified set to this set if they are not already present.
This operation effectively modifies this set so that its value is the union of the two sets. The behavior of this operation is unspecified if the specified collection is modified while the operation is in progress. A frozen set will not be modified.
[in] | set | The object to which to add the set. Must not be NULL . |
[in] | additional_set | The source set whose elements are to be added to this set. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_add_all_code_points | ( | i18n_uset_h | set, |
const i18n_uchar * | str, | ||
int32_t | str_len | ||
) |
Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}.
If this set already any particular character, it has no effect on that character. A frozen set will not be modified.
[in] | set | The object to which to add the character. Must not be NULL . |
[in] | str | The source string. |
[in] | str_len | The length of the string, >= 0, or -1 if NULL terminated. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_add_range | ( | i18n_uset_h | set, |
i18n_uchar32 | start, | ||
i18n_uchar32 | end | ||
) |
Adds the given range of characters to the given i18n_uset_h. After this call, i18n_uset_contains(set, start, end) will return true. A frozen set will not be modified.
[in] | set | The object to which to add the character. Must not be NULL . |
[in] | start | The first character of the range to add, inclusive |
[in] | end | The last character of the range to add, inclusive |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_add_string | ( | i18n_uset_h | set, |
const i18n_uchar * | str, | ||
int32_t | str_len | ||
) |
Adds the given string to the given i18n_uset_h.
After this call, i18n_uset_contains_string(set, str, str_len) will return true. A frozen set will not be modified.
[in] | set | The object to which to add the character. Must not be NULL . |
[in] | str | The string to add. |
[in] | str_len | The length of the string, >= 0, or -1 if NULL terminated. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_apply_int_property_value | ( | i18n_uset_h | set, |
i18n_uchar_uproperty_e | prop, | ||
int32_t | value | ||
) |
Modifies the set to contain those code points which have the given value for the given binary or enumerated property, as returned by i18n_uchar_get_int_property_value().
Prior contents of this set are lost. A frozen set will not be modified.
[in] | set | The object to contain the code points defined by the property. Must not be NULL . |
[in] | prop | A property in the range I18N_UCHAR_INT_START..I18N_UCHAR_INT_LIMIT-1 or I18N_UCHAR_MASK_START..I18N_UCHAR_MASK_LIMIT-1. |
[in] | value | A value in the range i18n_uchar_get_int_property_min_value(prop).. i18n_uchar_get_int_property_max_value(prop), with one exception. If prop is I18N_UCHAR_GENERAL_CATEGORY_MASK, then value should not be a i18n_uchar_category_e, but rather a mask value produced by I18N_U_GET_GC_MASK(). This allows grouped categories such as [:L:] to be represented. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_uset_apply_pattern | ( | i18n_uset_h | set, |
const i18n_uchar * | pattern, | ||
int32_t | pattern_length, | ||
uint32_t | options | ||
) |
Modifies the set to represent the set specified by the given pattern.
See the UnicodeSet class description for the syntax of the pattern language. See also the User Guide chapter about UnicodeSet. Empties the set passed before applying the pattern. A frozen set will not be modified.
[in] | set | The set to which the pattern is to be applied. Must not be NULL . |
[in] | pattern | A pointer to i18n_uchar string specifying what characters are in the set. The character at pattern[0] must be a '['. |
[in] | pattern_length | The length of the i18n_uchar string, >= 0, or -1 if NULL terminated. |
[in] | options | A bitmask for options to apply to the pattern. Valid options are I18N_USET_IGNORE_SPACE and I18N_USET_CASE_INSENSITIVE. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_apply_property_alias | ( | i18n_uset_h | set, |
const i18n_uchar * | prop, | ||
int32_t | prop_length, | ||
const i18n_uchar * | value, | ||
int32_t | value_length | ||
) |
Modifies the set to contain those code points which have the given value for the given property.
Prior contents of this set are lost. A frozen set will not be modified.
[in] | set | The object to contain the code points defined by the given property and value alias. Must not be NULL . |
[in] | prop | A string specifying a property alias, either short or long. The name is matched loosely. See PropertyAliases.txt for names and a description of loose matching. If the value string is empty, then this string is interpreted as either a General_Category value alias, a Script value alias, a binary property alias, or a special ID. Special IDs are matched loosely and correspond to the following sets: |
"ANY" = [\u0000-\U0010FFFF], "ASCII" = [\u0000-\u007F], "Assigned" = [:^Cn:].
[in] | prop_length | The length of the prop, >= 0, or -1 if NULL . |
[in] | value | A string specifying a value alias, either short or long. The name is matched loosely. See PropertyValueAliases.txt for names and a description of loose matching. In addition to aliases listed, numeric values and canonical combining classes may be expressed numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string may also be empty. |
[in] | value_length | The length of the value, >= 0, or -1 if NULL. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar32 i18n_uset_char_at | ( | const i18n_uset_h | set, |
int32_t | char_index | ||
) |
Returns the character at the given index within this set, where the set is ordered by ascending code point.
If the index is out of range, return (i18n_uchar32)-1. The inverse of this function is i18n_uset_index_of()
.
[in] | set | The set. Must not be NULL . |
[in] | char_index | An index from 0..size()-1 to obtain the char for |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_clear | ( | i18n_uset_h | set | ) |
Removes all of the elements from this set.
This set will be empty after this call returns. A frozen set will not be modified.
[in] | set | The set. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_clone | ( | const i18n_uset_h | set, |
i18n_uset_h * | set_clone | ||
) |
Returns a copy of this object.
If this set is frozen, then the clone will be frozen as well. Uses i18n_uset_clone_as_thawed() for a mutable clone of a frozen set.
[in] | set | The original set. Must not be NULL . |
[out] | set_clone | The newly allocated copy of the set |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_clone_as_thawed | ( | const i18n_uset_h | set, |
i18n_uset_h * | set_copy | ||
) |
Clones the set and make the clone mutable.
See the ICU4J Freezable interface for details.
[in] | set | The set. Must not be NULL . |
[out] | set_copy | The mutable clone |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_compact | ( | i18n_uset_h | set | ) |
Reallocates this objects internal structures to take up the least possible space, without changing this object's value.
A frozen set will not be modified.
[in] | set | The object on which to perfrom the compact. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_complement | ( | i18n_uset_h | set | ) |
Inverts this set. This operation modifies this set so that its value is its complement.
This operation does not affect the multicharacter strings, if any. A frozen set will not be modified.
[in] | set | The set. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_complement_all | ( | i18n_uset_h | set, |
const i18n_uset_h | complement | ||
) |
Complements in this set all elements contained in the specified set.
Any character in the other set will be removed if it is in this set, or will be added if it is not in this set. A frozen set will not be modified.
[in] | set | The set with which to complement. Must not be NULL . |
[in] | complement | Set that defines which elements will be xor'ed from this set. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_ubool i18n_uset_contains | ( | const i18n_uset_h | set, |
i18n_uchar32 | character | ||
) |
Returns true
if the given i18n_uset_h contains the given character.
This function works faster with a frozen set.
[in] | set | the set. Must not be NULL . |
[in] | character | the codepoint to check for within the set |
true
if set contains the given character I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_ubool i18n_uset_contains_all | ( | const i18n_uset_h | set1, |
const i18n_uset_h | set2 | ||
) |
Returns true if set1 contains all the characters and strings of set2. It answers the question, 'Is set1 a superset of set2?'.
[in] | set1 | Set to be checked for containment. Must not be NULL . |
[in] | set2 | Set to be checked for containment. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_ubool i18n_uset_contains_all_code_points | ( | const i18n_uset_h | set, |
const i18n_uchar * | str, | ||
int32_t | str_len | ||
) |
Returns true if this set contains all the characters of the given string.
This is does not check containment of grapheme clusters, like i18n_uset_contains_string().
[in] | set | Set of characters to be checked for containment. Must not be NULL . |
[in] | str | String containing codepoints to be checked for containment |
[in] | str_len | The length of the string, >= 0, or -1 if NULL terminated. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_ubool i18n_uset_contains_none | ( | const i18n_uset_h | set1, |
const i18n_uset_h | set2 | ||
) |
Returns true if set1 contains none of the characters and strings of set2.
It answers the question, 'Is set1 a disjoint set of set2?'
[in] | set1 | Set to be checked for containment. Must not be NULL . |
[in] | set2 | Set to be checked for containment. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_ubool i18n_uset_contains_range | ( | const i18n_uset_h | set, |
i18n_uchar32 | start, | ||
i18n_uchar32 | end | ||
) |
Returns true if the given i18n_uset_h contains all characters c where start <= c && c <= end.
[in] | set | The set. Must not be NULL . |
[in] | start | The first character of the range to test, inclusive |
[in] | end | The last character of the range to test, inclusive |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_ubool i18n_uset_contains_some | ( | const i18n_uset_h | set1, |
const i18n_uset_h | set2 | ||
) |
Returns true if set1 contains some of the characters and strings of set2.
It answers the question, 'Does set1 and set2 have an intersection?'
[in] | set1 | Set to be checked for containment. Must not be NULL . |
[in] | set2 | Set to be checked for containment. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_ubool i18n_uset_contains_string | ( | const i18n_uset_h | set, |
const i18n_uchar * | str, | ||
int32_t | str_len | ||
) |
Returns true if the given i18n_uset_h contains the given string.
[in] | set | The set. Must not be NULL . |
[in] | str | The string. |
[in] | str_len | The length of the string, >= 0, or -1 if NULL terminated |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_create | ( | i18n_uchar32 | start, |
i18n_uchar32 | end, | ||
i18n_uset_h * | set | ||
) |
Creates an i18n_uset_h object that contains the range of characters start..end, inclusive.
If start > end
then an empty set is created (same as using i18n_uset_empty_create()).
[in] | start | First character of the range, inclusive |
[in] | end | Last character of the range, inclusive |
[out] | set | A pointer to the newly created i18n_uset_h object. The caller must call i18n_uset_destroy() on it when done. * |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_create_empty | ( | i18n_uset_h * | set | ) |
Creates an empty i18n_uset_h object.
Equivalent to i18n_uset_create(1, 0).
[out] | set | A pointer to the newly created i18n_uset_h. The caller must call i18n_uset_destroy() on it when done. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_create_pattern | ( | const i18n_uchar * | pattern, |
int32_t | pattern_length, | ||
i18n_uset_h * | set | ||
) |
Creates a set based on a given pattern.
See the UnicodeSet class description for the syntax of the pattern language.
[in] | pattern | A string specifying what characters are in the set |
[in] | pattern_length | The length of the pattern, >= 0, or -1 if NULL-terminated. |
[out] | set | A pointer to the newly created i18n_uset_h object. The caller must call i18n_uset_destroy() on it when done. * |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_create_pattern_options | ( | const i18n_uchar * | pattern, |
int32_t | pattern_length, | ||
uint32_t | options, | ||
i18n_uset_h * | set | ||
) |
Creates a set based on a given pattern.
See the UnicodeSet class description for the syntax of the pattern language.
[in] | pattern | A string specifying what characters are in the set |
[in] | pattern_length | The length of the pattern, >= 0, or -1 if NULL-terminated |
[in] | options | Bitmask for options to apply to the pattern. Valid options are I18N_USET_IGNORE_SPACE and I18N_USET_CASE_INSENSITIVE. |
[out] | set | A pointer to the newly created i18n_uset_h object. The caller must call i18n_uset_destroy() on it when done. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_destroy | ( | i18n_uset_h | set | ) |
Disposes of the storage used by a i18n_uset_h object.
This function should be called exactly once for objects returned by i18n_uset_create().
[in] | set | The object to dispose of |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_destroy_over | ( | i18n_uset_h | set, |
int32_t | attributes | ||
) |
Closes this set over the given attribute.
For the attribute I18N_USET_CASE_INSENSITIVE, the result is to modify this set so that:
1. For each character or string 'a' in this set, all strings or characters 'b' such that foldCase(a) == foldCase(b) are added to this set.
2. For each string 'e' in the resulting set, if e != foldCase(e), 'e' will be removed.
Example: [aq\u00DF{Bc}{bC}{Fi}] => [aAqQ\u00DF\uFB01{ss}{bc}{fi}]
(Here foldCase(x) refers to the operation i18n_ustring_fold_case(), and a == b denotes that the contents are the same, not pointer comparison.)
A frozen set will not be modified.
[in] | set | The set. Must not be NULL . |
[in] | attributes | Bitmask for attributes to close over. Currently only the I18N_USET_CASE_INSENSITIVE bit is supported. Any undefined bits are ignored. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_ubool i18n_uset_equals | ( | const i18n_uset_h | set1, |
const i18n_uset_h | set2 | ||
) |
Returns true if set1 contains all of the characters and strings of set2, and vice versa. It answers the question, 'Is set1 equal to set2?'.
[in] | set1 | Set to be checked for containment. Must not be NULL . |
[in] | set2 | Set to be checked for containment. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_freeze | ( | i18n_uset_h | set | ) |
Freezes the set (make it immutable).
Once frozen, it cannot be unfrozen and is therefore thread-safe until it is deleted. See the ICU4J Freezable interface for details. Freezing the set may also make some operations faster, for example i18n_uset_contains() and i18n_uset_span(). A frozen set will not be modified. (It remains frozen.)
[in] | set | The set. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_uset_get_item | ( | const i18n_uset_h | set, |
int32_t | item_index, | ||
i18n_uchar32 * | start, | ||
i18n_uchar32 * | end, | ||
i18n_uchar * | str, | ||
int32_t | str_capacity | ||
) |
Returns an item of this set.
An item is either a range of characters or a single multicharacter string.
[in] | set | The set. Must not be NULL . |
[in] | item_index | A non-negative integer in the range [0; i18n_uset_get_item_count(set)-1]. |
[in] | start | Pointer to variable to receive first character in range, inclusive |
[in] | end | Pointer to variable to receive last character in range, inclusive |
[out] | str | Buffer to receive the string, may be NULL |
[in] | str_capacity | Capacity of str, or 0 if str is NULL |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_uset_get_item_count | ( | const i18n_uset_h | set | ) |
Returns the number of items in this set.
An item is either a range of characters or a single multicharacter string.
[in] | set | The set. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_ubool i18n_uset_get_serialized_range | ( | const i18n_userialized_set_s * | set, |
int32_t | range_index, | ||
i18n_uchar32 * | p_start, | ||
i18n_uchar32 * | p_end | ||
) |
Returns a range of characters contained in the given serialized set.
[in] | set | The serialized set. Must not be NULL . |
[in] | range_index | A non-negative integer in the range 0.. i18n_uset_get_serialized_range_count(set)-1 |
[out] | p_start | Pointer to variable to receive first character in range, inclusive |
[out] | p_end | Pointer to variable to receive last character in range, inclusive |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_uset_get_serialized_range_count | ( | const i18n_userialized_set_s * | set | ) |
Returns the number of disjoint ranges of characters contained in the given serialized set.
Ignores any strings contained in the set.
[in] | set | The serialized set. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_ubool i18n_uset_get_serialized_set | ( | const uint16_t * | src, |
int32_t | src_length, | ||
i18n_userialized_set_s * | fill_set | ||
) |
Given a serialized array, fill in the given serialized set object.
[in] | src | pointer to start of array. Must not be NULL . |
[in] | src_length | length of src array, >= 0. |
[out] | fill_set | the serialized set to be filled |
true
if the given array is valid, otherwise false
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_uset_index_of | ( | const i18n_uset_h | set, |
i18n_uchar32 | character | ||
) |
Returns the index of the given character within this set, where the set is ordered by ascending code point.
If the character is not in this set, return -1
. The inverse of this function is i18n_uset_char_at()
.
[in] | set | the set. Must not be NULL . |
[in] | character | the character to obtain the index for |
-1
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_ubool i18n_uset_is_empty | ( | const i18n_uset_h | set | ) |
Returns true if the given i18n_uset_h contains no characters and no strings.
[in] | set | The set. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_ubool i18n_uset_is_frozen | ( | const i18n_uset_h | set | ) |
Determines whether the set has been frozen (made immutable) or not.
See the ICU4J Freezable interface for details.
[in] | set | The set. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_remove | ( | i18n_uset_h | set, |
i18n_uchar32 | character | ||
) |
Removes the given character from the given i18n_uset_h.
After this call, i18n_uset_contains(set, character) will return false
. A frozen set will not be modified.
[in] | set | the object from which to remove the character. Must not be NULL . |
[in] | character | the character to remove |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_remove_all | ( | i18n_uset_h | set, |
const i18n_uset_h | remove_set | ||
) |
Removes from this set all of its elements that are contained in the specified set.
This operation effectively modifies this set so that its value is the asymmetric set difference of the two sets. A frozen set will not be modified.
[in] | set | The object from which the elements are to be removed. Must not be NULL . |
[in] | remove_set | The object that defines which elements will be removed from this set. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_remove_all_strings | ( | i18n_uset_h | set | ) |
Removes all strings from this set.
[in] | set | The set. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_remove_range | ( | i18n_uset_h | set, |
i18n_uchar32 | start, | ||
i18n_uchar32 | end | ||
) |
Removes the given range of characters from the given i18n_uset_h.
After this call, i18n_uset_contains(set, start, end) will return false. A frozen set will not be modified.
[in] | set | The object to which to add the character. Must not be NULL . |
[in] | start | The first character of the range to remove, inclusive |
[in] | end | The last character of the range to remove, inclusive |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_remove_string | ( | i18n_uset_h | set, |
const i18n_uchar * | str, | ||
int32_t | str_len | ||
) |
Removes the given string to the given i18n_uset_h.
After this call, i18n_uset_contains_string(set, str, str_len) will return false. A frozen set will not be modified.
[in] | set | The object to which to add the character. Must not be NULL . |
[in] | str | The string to remove. |
[in] | str_len | The length of the string, >= 0, or -1 if NULL terminated. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_ubool i18n_uset_resembles_pattern | ( | const i18n_uchar * | pattern, |
int32_t | pattern_length, | ||
int32_t | pos | ||
) |
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet pattern.
[in] | pattern | A string specifying the pattern. |
[in] | pattern_length | The length of the pattern, >= 0, or -1 if NULL . |
[in] | pos | The given position, >= 0. |
true
if the given position, in the given pattern, appears to be the start of a UnicodeSet pattern.I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_retain | ( | i18n_uset_h | set, |
i18n_uchar32 | start, | ||
i18n_uchar32 | end | ||
) |
Retains only the elements in this set that are contained in the specified range.
If start > end
then an empty range is retained, leaving the set empty. This is equivalent to a boolean logic AND, or a set INTERSECTION. A frozen set will not be modified.
[in] | set | The object for which to retain only the specified range. Must not be NULL . |
[in] | start | First character, inclusive, of range to be retained to this set. |
[in] | end | Last character, inclusive, of range to be retained to this set. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_retain_all | ( | i18n_uset_h | set, |
const i18n_uset_h | retain | ||
) |
Retains only the elements in this set that are contained in the specified set.
In other words, removes from this set all of its elements that are not contained in the specified set. This operation effectively modifies this set so that its value is the intersection of the two sets. A frozen set will not be modified.
[in] | set | The object on which to perform the retain. Must not be NULL . |
[in] | retain | Set that defines which elements this set will retain. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_uset_serialize | ( | const i18n_uset_h | set, |
uint16_t * | dest, | ||
int32_t | dest_capacity | ||
) |
Serializes this set into an array of 16-bit integers.
Serialization (currently) only records the characters in the set; multicharacter strings are ignored.
The array has following format (each line is one 16-bit integer):
length = (n+2*m) | (m!=0?0x8000:0) bmpLength = n; present if m!=0 bmp[0] bmp[1] ... bmp[n-1] supp-high[0] supp-low[0] supp-high[1] supp-low[1] ... supp-high[m-1] supp-low[m-1]
The array starts with a header. After the header are n bmp code points, then m supplementary code points. Either n or m or both may be zero. n+2*m is always <= 0x7FFF.
If there are no supplementary characters (if m==0) then the header is one 16-bit integer, 'length', with value n.
If there are supplementary characters (if m!=0) then the header is two 16-bit integers. The first, 'length', has value (n+2*m)|0x8000. The second, 'bmpLength', has value n.
After the header the code points are stored in ascending order. Supplementary code points are stored as most significant 16 bits followed by least significant 16 bits.
[in] | set | the set. Must not be NULL . |
[out] | dest | pointer to buffer of dest_capacity 16-bit integers May be NULL only if dest_capacity is zero. |
[in] | dest_capacity | size of dest, or zero Must not be negative. |
n+2*m+(m != 0 ? 2 : 1)
, or 0
on error other than I18N_ERROR_BUFFER_OVERFLOW.I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
I18N_ERROR_INDEX_OUTOFBOUNDS | If n+2*m > 0x7FFF |
I18N_ERROR_BUFFER_OVERFLOW | If n+2*m+(m != 0 ? 2 : 1) > dest_capacity . |
i18n_ubool i18n_uset_serialized_contains | ( | const i18n_userialized_set_s * | set, |
i18n_uchar32 | character | ||
) |
Returns true
if the given i18n_userialized_set_s contains the given character.
[in] | set | the serialized set. Must not be NULL . |
[in] | character | the code point to check for within the set |
true
if set contains character I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_set | ( | i18n_uset_h | set, |
i18n_uchar32 | start, | ||
i18n_uchar32 | end | ||
) |
Causes the i18n_uset_h object to represent the range start - end
.
If start > end
then this i18n_uset_h is set to an empty range. A frozen set will not be modified.
[in] | set | The object to set to the given range. Must not be NULL . |
[in] | start | First character in the set, inclusive |
[in] | end | Last character in the set, inclusive |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_uset_set_serialized_to_one | ( | i18n_uchar32 | character, |
i18n_userialized_set_s * | fill_set | ||
) |
Sets the i18n_userialized_set_s to contain the given character (and nothing else).
[in] | character | the code point to set |
[out] | fill_set | the serialized set to be filled |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_uset_size | ( | const i18n_uset_h | set | ) |
Returns the number of characters and strings contained in the given i18n_uset_h.
[in] | set | The set. Must not be NULL . |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_uset_span | ( | const i18n_uset_h | set, |
const i18n_uchar * | str, | ||
int32_t | length, | ||
i18n_uset_span_condition_e | span_condition | ||
) |
Returns the length of the initial substring of the input string which consists only of characters and strings that are contained in this set (I18N_USET_SPAN_CONTAINED, I18N_USET_SPAN_SIMPLE), or only of characters and strings that are not contained in this set (I18N_USET_SPAN_NOT_CONTAINED).
See i18n_uset_span_condition_e for details. Similar to the strspn() C library function. Unpaired surrogates are treated according to contains() of their surrogate code points. This function works faster with a frozen set and with a non-negative string length argument.
[in] | set | the set. Must not be NULL . |
[in] | str | start of the input string. |
[in] | length | length of the string; >= 0, can be -1 for NULL-terminated |
[in] | span_condition | specifies the containment condition |
0
if the start of the string does not fit the span_condition I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_uset_span_back | ( | const i18n_uset_h | set, |
const i18n_uchar * | str, | ||
int32_t | length, | ||
i18n_uset_span_condition_e | span_condition | ||
) |
Returns the start of the trailing substring of the input string which consists only of characters and strings that are contained in this set (I18N_USET_SPAN_CONTAINED, I18N_USET_SPAN_SIMPLE), or only of characters and strings that are not contained in this set (I18N_USET_SPAN_NOT_CONTAINED).
See i18n_uset_span_condition_e for details. Unpaired surrogates are treated according to contains() of their surrogate code points. This function works faster with a frozen set and with a non-negative string length argument.
[in] | set | the set. Must not be NULL . |
[in] | str | start of the input string |
[in] | length | length of the @ string; >= 0, can be -1 for NULL-terminated |
[in] | span_condition | specifies the containment condition |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_uset_span_back_utf8 | ( | const i18n_uset_h | set, |
const char * | str, | ||
int32_t | length, | ||
i18n_uset_span_condition_e | span_condition | ||
) |
Returns the start of the trailing substring of the input string which consists only of characters and strings that are contained in this set (I18N_USET_SPAN_CONTAINED, I18N_USET_SPAN_SIMPLE), or only of characters and strings that are not contained in this set (I18N_USET_SPAN_NOT_CONTAINED).
See i18n_uset_span_condition_e for details. Malformed byte sequences are treated according to contains(0xfffd). This function works faster with a frozen set and with a non-negative string length argument.
[in] | set | the set. Must not be NULL . |
[in] | str | start of the string (UTF-8) |
[in] | length | length of the string; >= 0, can be -1 for NULL-terminated |
[in] | span_condition | specifies the containment condition |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_uset_span_utf8 | ( | const i18n_uset_h | set, |
const char * | str, | ||
int32_t | length, | ||
i18n_uset_span_condition_e | span_condition | ||
) |
Returns the length of the initial substring of the input string which consists only of characters and strings that are contained in this set (I18N_USET_SPAN_CONTAINED, I18N_USET_SPAN_SIMPLE), or only of characters and strings that are not contained in this set (I18N_USET_SPAN_NOT_CONTAINED).
See i18n_uset_span_condition_e for details. Similar to the strspn() C library function. Malformed byte sequences are treated according to contains(0xfffd). This function works faster with a frozen set and with a non-negative string length argument.
[in] | set | the set. Must not be NULL . |
[in] | str | start of the string (UTF-8) |
[in] | length | length of the string; >= 0, can be -1 for NULL-terminated |
[in] | span_condition | specifies the containment condition |
0
if the start of the string does not fit the span_condition I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_uset_to_pattern | ( | const i18n_uset_h | set, |
i18n_uchar * | result, | ||
int32_t | result_capacity, | ||
i18n_ubool | escape_unprintable | ||
) |
Returns a string representation of the given set.
If the result of calling this function is passed to an i18n_uset_pattern_create(), it will produce another set that is equal to this one.
[in] | set | The set. Must not be NULL . |
[in,out] | result | The string to receive the rules, may be NULL . |
[in] | result_capacity | The capacity of result, >= 0, may be 0 if result is NULL . |
[in] | escape_unprintable | If true then convert unprintable character to their hex escape representations, \uxxxx or \Uxxxxxxxx. Unprintable characters are those other than U+000A, U+0020..U+007E. |
I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |