ICU 50.1.2
50.1.2
|
The RuleBasedCollator class provides the simple implementation of Collator, using data-driven tables. More...
#include <tblcoll.h>
Public Member Functions | |
RuleBasedCollator (const UnicodeString &rules, UErrorCode &status) | |
RuleBasedCollator constructor. More... | |
RuleBasedCollator (const UnicodeString &rules, ECollationStrength collationStrength, UErrorCode &status) | |
RuleBasedCollator constructor. More... | |
RuleBasedCollator (const UnicodeString &rules, UColAttributeValue decompositionMode, UErrorCode &status) | |
RuleBasedCollator constructor. More... | |
RuleBasedCollator (const UnicodeString &rules, ECollationStrength collationStrength, UColAttributeValue decompositionMode, UErrorCode &status) | |
RuleBasedCollator constructor. More... | |
RuleBasedCollator (const RuleBasedCollator &other) | |
Copy constructor. More... | |
RuleBasedCollator (const uint8_t *bin, int32_t length, const RuleBasedCollator *base, UErrorCode &status) | |
Opens a collator from a collator binary image created using cloneBinary. More... | |
virtual | ~RuleBasedCollator () |
Destructor. More... | |
RuleBasedCollator & | operator= (const RuleBasedCollator &other) |
Assignment operator. More... | |
virtual UBool | operator== (const Collator &other) const |
Returns true if argument is the same as this object. More... | |
virtual Collator * | clone (void) const |
Makes a copy of this object. More... | |
virtual CollationElementIterator * | createCollationElementIterator (const UnicodeString &source) const |
Creates a collation element iterator for the source string. More... | |
virtual CollationElementIterator * | createCollationElementIterator (const CharacterIterator &source) const |
Creates a collation element iterator for the source. More... | |
virtual UCollationResult | compare (const UnicodeString &source, const UnicodeString &target, UErrorCode &status) const |
The comparison function compares the character data stored in two different strings. More... | |
virtual UCollationResult | compare (const UnicodeString &source, const UnicodeString &target, int32_t length, UErrorCode &status) const |
Does the same thing as compare but limits the comparison to a specified length. More... | |
virtual UCollationResult | compare (const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength, UErrorCode &status) const |
The comparison function compares the character data stored in two different string arrays. More... | |
virtual UCollationResult | compare (UCharIterator &sIter, UCharIterator &tIter, UErrorCode &status) const |
Compares two strings using the Collator. More... | |
virtual CollationKey & | getCollationKey (const UnicodeString &source, CollationKey &key, UErrorCode &status) const |
Transforms a specified region of the string into a series of characters that can be compared with CollationKey.compare. More... | |
virtual CollationKey & | getCollationKey (const UChar *source, int32_t sourceLength, CollationKey &key, UErrorCode &status) const |
Transforms a specified region of the string into a series of characters that can be compared with CollationKey.compare. More... | |
virtual int32_t | hashCode (void) const |
Generates the hash code for the rule-based collation object. More... | |
virtual Locale | getLocale (ULocDataLocaleType type, UErrorCode &status) const |
Gets the locale of the Collator. More... | |
const UnicodeString & | getRules (void) const |
Gets the tailoring rules for this collator. More... | |
virtual void | getVersion (UVersionInfo info) const |
Gets the version information for a Collator. More... | |
int32_t | getMaxExpansion (int32_t order) const |
Return the maximum length of any expansion sequences that end with the specified comparison order. More... | |
virtual UClassID | getDynamicClassID (void) const |
Returns a unique class ID POLYMORPHICALLY. More... | |
uint8_t * | cloneRuleData (int32_t &length, UErrorCode &status) |
Returns the binary format of the class's rules. More... | |
int32_t | cloneBinary (uint8_t *buffer, int32_t capacity, UErrorCode &status) |
Creates a binary image of a collator. More... | |
void | getRules (UColRuleOption delta, UnicodeString &buffer) |
Returns current rules. More... | |
virtual void | setAttribute (UColAttribute attr, UColAttributeValue value, UErrorCode &status) |
Universal attribute setter. More... | |
virtual UColAttributeValue | getAttribute (UColAttribute attr, UErrorCode &status) const |
Universal attribute getter. More... | |
virtual uint32_t | setVariableTop (const UChar *varTop, int32_t len, UErrorCode &status) |
Sets the variable top to a collation element value of a string supplied. More... | |
virtual uint32_t | setVariableTop (const UnicodeString &varTop, UErrorCode &status) |
Sets the variable top to a collation element value of a string supplied. More... | |
virtual void | setVariableTop (uint32_t varTop, UErrorCode &status) |
Sets the variable top to a collation element value supplied. More... | |
virtual uint32_t | getVariableTop (UErrorCode &status) const |
Gets the variable top value of a Collator. More... | |
virtual UnicodeSet * | getTailoredSet (UErrorCode &status) const |
Get an UnicodeSet that contains all the characters and sequences tailored in this collator. More... | |
virtual int32_t | getSortKey (const UnicodeString &source, uint8_t *result, int32_t resultLength) const |
Get the sort key as an array of bytes from an UnicodeString. More... | |
virtual int32_t | getSortKey (const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) const |
Get the sort key as an array of bytes from an UChar buffer. More... | |
virtual int32_t | getReorderCodes (int32_t *dest, int32_t destCapacity, UErrorCode &status) const |
Retrieves the reordering codes for this collator. More... | |
virtual void | setReorderCodes (const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode &status) |
Sets the ordering of scripts for this collator. More... | |
const UCollator * | getUCollator () |
Get UCollator data struct. More... | |
virtual int32_t | internalGetShortDefinitionString (const char *locale, char *buffer, int32_t capacity, UErrorCode &status) const |
Get the short definition string for a collator. More... | |
![]() | |
virtual | ~Collator () |
Destructor. More... | |
virtual UBool | operator!= (const Collator &other) const |
Returns true if "other" is not the same as "this". More... | |
virtual EComparisonResult | compare (const UnicodeString &source, const UnicodeString &target) const |
The comparison function compares the character data stored in two different strings. More... | |
virtual EComparisonResult | compare (const UnicodeString &source, const UnicodeString &target, int32_t length) const |
Does the same thing as compare but limits the comparison to a specified length. More... | |
virtual EComparisonResult | compare (const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) const |
The comparison function compares the character data stored in two different string arrays. More... | |
virtual UCollationResult | compareUTF8 (const StringPiece &source, const StringPiece &target, UErrorCode &status) const |
Compares two UTF-8 strings using the Collator. More... | |
UBool | greater (const UnicodeString &source, const UnicodeString &target) const |
Convenience method for comparing two strings based on the collation rules. More... | |
UBool | greaterOrEqual (const UnicodeString &source, const UnicodeString &target) const |
Convenience method for comparing two strings based on the collation rules. More... | |
UBool | equals (const UnicodeString &source, const UnicodeString &target) const |
Convenience method for comparing two strings based on the collation rules. More... | |
virtual ECollationStrength | getStrength (void) const |
Determines the minimum strength that will be used in comparison or transformation. More... | |
virtual void | setStrength (ECollationStrength newStrength) |
Sets the minimum strength to be used in comparison or transformation. More... | |
virtual Collator * | safeClone (void) const |
Same as clone(). More... | |
![]() | |
virtual | ~UObject () |
Destructor. More... | |
Static Public Member Functions | |
static UClassID | getStaticClassID (void) |
Returns the class ID for this class. More... | |
static int32_t | getEquivalentReorderCodes (int32_t reorderCode, int32_t *dest, int32_t destCapacity, UErrorCode &status) |
Retrieves the reorder codes that are grouped with the given reorder code. More... | |
![]() | |
static Collator * | createInstance (UErrorCode &err) |
Creates the Collator object for the current default locale. More... | |
static Collator * | createInstance (const Locale &loc, UErrorCode &err) |
Gets the table-based collation object for the desired locale. More... | |
static int32_t | getEquivalentReorderCodes (int32_t reorderCode, int32_t *dest, int32_t destCapacity, UErrorCode &status) |
Retrieves the reorder codes that are grouped with the given reorder code. More... | |
static UnicodeString & | getDisplayName (const Locale &objectLocale, const Locale &displayLocale, UnicodeString &name) |
Get name of the object for the desired Locale, in the desired langauge. More... | |
static UnicodeString & | getDisplayName (const Locale &objectLocale, UnicodeString &name) |
Get name of the object for the desired Locale, in the langauge of the default locale. More... | |
static const Locale * | getAvailableLocales (int32_t &count) |
Get the set of Locales for which Collations are installed. More... | |
static StringEnumeration * | getAvailableLocales (void) |
Return a StringEnumeration over the locales available at the time of the call, including registered locales. More... | |
static StringEnumeration * | getKeywords (UErrorCode &status) |
Create a string enumerator of all possible keywords that are relevant to collation. More... | |
static StringEnumeration * | getKeywordValues (const char *keyword, UErrorCode &status) |
Given a keyword, create a string enumeration of all values for that keyword that are currently in use. More... | |
static StringEnumeration * | getKeywordValuesForLocale (const char *keyword, const Locale &locale, UBool commonlyUsed, UErrorCode &status) |
Given a key and a locale, returns an array of string values in a preferred order that would make a difference. More... | |
static Locale | getFunctionalEquivalent (const char *keyword, const Locale &locale, UBool &isAvailable, UErrorCode &status) |
Return the functionally equivalent locale for the given requested locale, with respect to given keyword, for the collation service. More... | |
static URegistryKey | registerInstance (Collator *toAdopt, const Locale &locale, UErrorCode &status) |
Register a new Collator. More... | |
static URegistryKey | registerFactory (CollatorFactory *toAdopt, UErrorCode &status) |
Register a new CollatorFactory. More... | |
static UBool | unregister (URegistryKey key, UErrorCode &status) |
Unregister a previously-registered Collator or CollatorFactory using the key returned from the register call. More... | |
static int32_t | getBound (const uint8_t *source, int32_t sourceLength, UColBoundMode boundType, uint32_t noOfLevels, uint8_t *result, int32_t resultLength, UErrorCode &status) |
Produce a bound for a given sortkey and a number of levels. More... | |
static UCollator * | createUCollator (const char *loc, UErrorCode *status) |
used only by ucol_open, not for public use More... | |
Protected Member Functions | |
virtual void | setLocales (const Locale &requestedLocale, const Locale &validLocale, const Locale &actualLocale) |
Used internally by registraton to define the requested and valid locales. More... | |
![]() | |
Collator () | |
Default constructor. More... | |
Collator (UCollationStrength collationStrength, UNormalizationMode decompositionMode) | |
Constructor. More... | |
Collator (const Collator &other) | |
Copy constructor. More... | |
Friends | |
class | CollationElementIterator |
Used to iterate over collation elements in a character source. | |
class | Collator |
Collator ONLY needs access to RuleBasedCollator(const Locale&, UErrorCode&) | |
class | StringSearch |
Searching over collation elements in a character source. | |
Additional Inherited Members | |
![]() | |
enum | ECollationStrength { PRIMARY = UCOL_PRIMARY, SECONDARY = UCOL_SECONDARY, TERTIARY = UCOL_TERTIARY, QUATERNARY = UCOL_QUATERNARY, IDENTICAL = UCOL_IDENTICAL } |
Base letter represents a primary difference. More... | |
enum | EComparisonResult { LESS = UCOL_LESS, EQUAL = UCOL_EQUAL, GREATER = UCOL_GREATER } |
LESS is returned if source string is compared to be less than target string in the compare() method. More... | |
The RuleBasedCollator class provides the simple implementation of Collator, using data-driven tables.
The user can create a customized table-based collation.
Important: The ICU collation service has been reimplemented in order to achieve better performance and UCA compliance. For details, see the collation design document.
RuleBasedCollator is a thin C++ wrapper over the C implementation.
For more information about the collation service see the users guide.
Collation service provides correct sorting orders for most locales supported in ICU. If specific data for a locale is not available, the orders eventually falls back to the UCA sort order.
Sort ordering may be customized by providing your own set of rules. For more on this subject see the Collation customization section of the users guide.
Note, RuleBasedCollator is not to be subclassed.
icu::RuleBasedCollator::RuleBasedCollator | ( | const UnicodeString & | rules, |
UErrorCode & | status | ||
) |
RuleBasedCollator constructor.
This takes the table rules and builds a collation table out of them. Please see RuleBasedCollator class description for more details on the collation rule syntax.
rules | the collation rules to build the collation table from. |
status | reporting a success or an error. |
icu::RuleBasedCollator::RuleBasedCollator | ( | const UnicodeString & | rules, |
ECollationStrength | collationStrength, | ||
UErrorCode & | status | ||
) |
RuleBasedCollator constructor.
This takes the table rules and builds a collation table out of them. Please see RuleBasedCollator class description for more details on the collation rule syntax.
rules | the collation rules to build the collation table from. |
collationStrength | default strength for comparison |
status | reporting a success or an error. |
icu::RuleBasedCollator::RuleBasedCollator | ( | const UnicodeString & | rules, |
UColAttributeValue | decompositionMode, | ||
UErrorCode & | status | ||
) |
RuleBasedCollator constructor.
This takes the table rules and builds a collation table out of them. Please see RuleBasedCollator class description for more details on the collation rule syntax.
rules | the collation rules to build the collation table from. |
decompositionMode | the normalisation mode |
status | reporting a success or an error. |
icu::RuleBasedCollator::RuleBasedCollator | ( | const UnicodeString & | rules, |
ECollationStrength | collationStrength, | ||
UColAttributeValue | decompositionMode, | ||
UErrorCode & | status | ||
) |
RuleBasedCollator constructor.
This takes the table rules and builds a collation table out of them. Please see RuleBasedCollator class description for more details on the collation rule syntax.
rules | the collation rules to build the collation table from. |
collationStrength | default strength for comparison |
decompositionMode | the normalisation mode |
status | reporting a success or an error. |
icu::RuleBasedCollator::RuleBasedCollator | ( | const RuleBasedCollator & | other | ) |
Copy constructor.
other | the RuleBasedCollator object to be copied |
icu::RuleBasedCollator::RuleBasedCollator | ( | const uint8_t * | bin, |
int32_t | length, | ||
const RuleBasedCollator * | base, | ||
UErrorCode & | status | ||
) |
Opens a collator from a collator binary image created using cloneBinary.
Binary image used in instantiation of the collator remains owned by the user and should stay around for the lifetime of the collator. The API also takes a base collator which usualy should be UCA.
bin | binary image owned by the user and required through the lifetime of the collator |
length | size of the image. If negative, the API will try to figure out the length of the image |
base | fallback collator, usually UCA. Base is required to be present through the lifetime of the collator. Currently it cannot be NULL. |
status | for catching errors |
|
virtual |
Destructor.
|
virtual |
Makes a copy of this object.
Implements icu::Collator.
int32_t icu::RuleBasedCollator::cloneBinary | ( | uint8_t * | buffer, |
int32_t | capacity, | ||
UErrorCode & | status | ||
) |
Creates a binary image of a collator.
This binary image can be stored and later used to instantiate a collator using ucol_openBinary. This API supports preflighting.
buffer | a fill-in buffer to receive the binary image |
capacity | capacity of the destination buffer |
status | for catching errors |
uint8_t* icu::RuleBasedCollator::cloneRuleData | ( | int32_t & | length, |
UErrorCode & | status | ||
) |
Returns the binary format of the class's rules.
The format is that of .col files.
length | Returns the length of the data, in bytes |
status | the error code status. |
|
virtual |
The comparison function compares the character data stored in two different strings.
Returns information about whether a string is less than, greater than or equal to another string.
source | the source string to be compared with. |
target | the string that is to be compared with the source string. |
status | possible error code |
Implements icu::Collator.
|
virtual |
Does the same thing as compare but limits the comparison to a specified length.
source | the source string to be compared with. |
target | the string that is to be compared with the source string. |
length | the length the comparison is limited to |
status | possible error code |
Implements icu::Collator.
|
virtual |
The comparison function compares the character data stored in two different string arrays.
Returns information about whether a string array is less than, greater than or equal to another string array.
source | the source string array to be compared with. |
sourceLength | the length of the source string array. If this value is equal to -1, the string array is null-terminated. |
target | the string that is to be compared with the source string. |
targetLength | the length of the target string array. If this value is equal to -1, the string array is null-terminated. |
status | possible error code |
Implements icu::Collator.
|
virtual |
Compares two strings using the Collator.
Returns whether the first one compares less than/equal to/greater than the second one. This version takes UCharIterator input.
sIter | the first ("source") string iterator |
tIter | the second ("target") string iterator |
status | ICU status |
Reimplemented from icu::Collator.
|
virtual |
Creates a collation element iterator for the source string.
The caller of this method is responsible for the memory management of the return pointer.
source | the string over which the CollationElementIterator will iterate. |
|
virtual |
Creates a collation element iterator for the source.
The caller of this method is responsible for the memory management of the returned pointer.
source | the CharacterIterator which produces the characters over which the CollationElementItgerator will iterate. |
|
virtual |
Universal attribute getter.
attr | attribute type |
status | to indicate whether the operation went on smoothly or there were errors |
Implements icu::Collator.
|
virtual |
Transforms a specified region of the string into a series of characters that can be compared with CollationKey.compare.
Use a CollationKey when you need to do repeated comparisions on the same string. For a single comparison the compare method will be faster.
source | the source string. |
key | the transformed key of the source string. |
status | the error code status. |
Implements icu::Collator.
|
virtual |
Transforms a specified region of the string into a series of characters that can be compared with CollationKey.compare.
Use a CollationKey when you need to do repeated comparisions on the same string. For a single comparison the compare method will be faster.
source | the source string. |
sourceLength | the length of the source string. |
key | the transformed key of the source string. |
status | the error code status. |
Implements icu::Collator.
|
virtual |
Returns a unique class ID POLYMORPHICALLY.
Pure virtual override. This method is to implement a simple version of RTTI, since not all C++ compilers support genuine RTTI. Polymorphic operator==() and clone() methods call this method.
Implements icu::Collator.
|
static |
Retrieves the reorder codes that are grouped with the given reorder code.
Some reorder codes will be grouped and must reorder together.
reorderCode | The reorder code to determine equivalence for. |
dest | The array to fill with the script equivalene reordering codes. |
destCapacity | The length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). |
status | A reference to an error code value, which must not indicate a failure before the function call. |
|
virtual |
Gets the locale of the Collator.
type | can be either requested, valid or actual locale. For more information see the definition of ULocDataLocaleType in uloc.h |
status | the error code status. |
Implements icu::Collator.
int32_t icu::RuleBasedCollator::getMaxExpansion | ( | int32_t | order | ) | const |
Return the maximum length of any expansion sequences that end with the specified comparison order.
order | a collation order returned by previous or next. |
|
virtual |
Retrieves the reordering codes for this collator.
dest | The array to fill with the script ordering. |
destCapacity | The length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). |
status | A reference to an error code value, which must not indicate a failure before the function call. |
Reimplemented from icu::Collator.
const UnicodeString& icu::RuleBasedCollator::getRules | ( | void | ) | const |
Gets the tailoring rules for this collator.
void icu::RuleBasedCollator::getRules | ( | UColRuleOption | delta, |
UnicodeString & | buffer | ||
) |
Returns current rules.
Delta defines whether full rules are returned or just the tailoring.
getRules(void) should normally be used instead. See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
delta | one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. |
buffer | UnicodeString to store the result rules |
|
virtual |
Get the sort key as an array of bytes from an UnicodeString.
source | string to be processed. |
result | buffer to store result in. If NULL, number of bytes needed will be returned. |
resultLength | length of the result buffer. If if not enough the buffer will be filled to capacity. |
Implements icu::Collator.
|
virtual |
Get the sort key as an array of bytes from an UChar buffer.
source | string to be processed. |
sourceLength | length of string to be processed. If -1, the string is 0 terminated and length will be decided by the function. |
result | buffer to store result in. If NULL, number of bytes needed will be returned. |
resultLength | length of the result buffer. If if not enough the buffer will be filled to capacity. |
Implements icu::Collator.
|
static |
Returns the class ID for this class.
This is useful only for comparing to a return value from getDynamicClassID(). For example:
Base* polymorphic_pointer = createPolymorphicObject(); if (polymorphic_pointer->getDynamicClassID() == Derived::getStaticClassID()) ...
|
virtual |
Get an UnicodeSet that contains all the characters and sequences tailored in this collator.
status | error code of the operation |
Reimplemented from icu::Collator.
|
inline |
Get UCollator data struct.
Used only by StringSearch & intltest.
|
virtual |
Gets the variable top value of a Collator.
Lower 16 bits are undefined and should be ignored.
status | error code (not changed by function). If error code is set, the return value is undefined. |
Implements icu::Collator.
|
virtual |
Gets the version information for a Collator.
info | the version # information, the result will be filled in |
Implements icu::Collator.
|
virtual |
Generates the hash code for the rule-based collation object.
Implements icu::Collator.
|
virtual |
Get the short definition string for a collator.
This internal API harvests the collator's locale and the attribute set and produces a string that can be used for opening a collator with the same properties using the ucol_openFromShortString API. This string will be normalized. The structure and the syntax of the string is defined in the "Naming collators" section of the users guide: http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators This function supports preflighting.
This is internal, and intended to be used with delegate converters.
locale | a locale that will appear as a collators locale in the resulting short string definition. If NULL, the locale will be harvested from the collator. |
buffer | space to hold the resulting string |
capacity | capacity of the buffer |
status | for returning errors. All the preflighting errors are featured |
Reimplemented from icu::Collator.
RuleBasedCollator& icu::RuleBasedCollator::operator= | ( | const RuleBasedCollator & | other | ) |
Returns true if argument is the same as this object.
other | Collator object to be compared. |
Reimplemented from icu::Collator.
|
virtual |
Universal attribute setter.
attr | attribute type |
value | attribute value |
status | to indicate whether the operation went on smoothly or there were errors |
Implements icu::Collator.
|
protectedvirtual |
Used internally by registraton to define the requested and valid locales.
requestedLocale | the requsted locale |
validLocale | the valid locale |
actualLocale | the actual locale |
Reimplemented from icu::Collator.
|
virtual |
Sets the ordering of scripts for this collator.
reorderCodes | An array of script codes in the new order. This can be NULL if the length is also set to 0. An empty array will clear any reordering codes on the collator. |
reorderCodesLength | The length of reorderCodes. |
status | error code |
Reimplemented from icu::Collator.
|
virtual |
Sets the variable top to a collation element value of a string supplied.
varTop | one or more (if contraction) UChars to which the variable top should be set |
len | length of variable top string. If -1 it is considered to be zero terminated. |
status | error code. If error code is set, the return value is undefined. Errors set by this function are: U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes |
Implements icu::Collator.
|
virtual |
Sets the variable top to a collation element value of a string supplied.
varTop | an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set |
status | error code. If error code is set, the return value is undefined. Errors set by this function are: U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes |
Implements icu::Collator.
|
virtual |
Sets the variable top to a collation element value supplied.
Variable top is set to the upper 16 bits. Lower 16 bits are ignored.
varTop | CE value, as returned by setVariableTop or ucol)getVariableTop |
status | error code (not changed by function) |
Implements icu::Collator.