ICU 50.1.2  50.1.2
tblcoll.h
Go to the documentation of this file.
1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
6 */
7 
59 #ifndef TBLCOLL_H
60 #define TBLCOLL_H
61 
62 #include "unicode/utypes.h"
63 
64 
65 #if !UCONFIG_NO_COLLATION
66 
67 #include "unicode/coll.h"
68 #include "unicode/ucol.h"
69 #include "unicode/sortkey.h"
70 #include "unicode/normlzr.h"
71 
73 
77 class StringSearch;
81 class CollationElementIterator;
82 
112 {
113 public:
114 
115  // constructor -------------------------------------------------------------
116 
126  RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
127 
138  RuleBasedCollator(const UnicodeString& rules,
139  ECollationStrength collationStrength,
140  UErrorCode& status);
141 
152  RuleBasedCollator(const UnicodeString& rules,
153  UColAttributeValue decompositionMode,
154  UErrorCode& status);
155 
167  RuleBasedCollator(const UnicodeString& rules,
168  ECollationStrength collationStrength,
169  UColAttributeValue decompositionMode,
170  UErrorCode& status);
171 
178  RuleBasedCollator(const RuleBasedCollator& other);
179 
180 
198  RuleBasedCollator(const uint8_t *bin, int32_t length,
199  const RuleBasedCollator *base,
200  UErrorCode &status);
201  // destructor --------------------------------------------------------------
202 
207  virtual ~RuleBasedCollator();
208 
209  // public methods ----------------------------------------------------------
210 
216  RuleBasedCollator& operator=(const RuleBasedCollator& other);
217 
224  virtual UBool operator==(const Collator& other) const;
225 
231  virtual Collator* clone(void) const;
232 
243  virtual CollationElementIterator* createCollationElementIterator(
244  const UnicodeString& source) const;
245 
255  virtual CollationElementIterator* createCollationElementIterator(
256  const CharacterIterator& source) const;
257 
258  // Make deprecated versions of Collator::compare() visible.
259  using Collator::compare;
260 
273  virtual UCollationResult compare(const UnicodeString& source,
274  const UnicodeString& target,
275  UErrorCode &status) const;
276 
290  virtual UCollationResult compare(const UnicodeString& source,
291  const UnicodeString& target,
292  int32_t length,
293  UErrorCode &status) const;
294 
311  virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
312  const UChar* target, int32_t targetLength,
313  UErrorCode &status) const;
314 
326  virtual UCollationResult compare(UCharIterator &sIter,
327  UCharIterator &tIter,
328  UErrorCode &status) const;
329 
342  virtual CollationKey& getCollationKey(const UnicodeString& source,
343  CollationKey& key,
344  UErrorCode& status) const;
345 
359  virtual CollationKey& getCollationKey(const UChar *source,
360  int32_t sourceLength,
361  CollationKey& key,
362  UErrorCode& status) const;
363 
369  virtual int32_t hashCode(void) const;
370 
381  virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
382 
388  const UnicodeString& getRules(void) const;
389 
395  virtual void getVersion(UVersionInfo info) const;
396 
407  int32_t getMaxExpansion(int32_t order) const;
408 
419  virtual UClassID getDynamicClassID(void) const;
420 
432  static UClassID U_EXPORT2 getStaticClassID(void);
433 
442  uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
443 
444 
455  int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
456 
468  void getRules(UColRuleOption delta, UnicodeString &buffer);
469 
477  virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
478  UErrorCode &status);
479 
488  UErrorCode &status) const;
489 
500  virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
501 
511  virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status);
512 
520  virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
521 
528  virtual uint32_t getVariableTop(UErrorCode &status) const;
529 
539  virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
540 
551  virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
552  int32_t resultLength) const;
553 
566  virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
567  uint8_t *result, int32_t resultLength) const;
568 
582  virtual int32_t getReorderCodes(int32_t *dest,
583  int32_t destCapacity,
584  UErrorCode& status) const;
585 
596  virtual void setReorderCodes(const int32_t* reorderCodes,
597  int32_t reorderCodesLength,
598  UErrorCode& status) ;
599 
616  static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
617  int32_t* dest,
618  int32_t destCapacity,
619  UErrorCode& status);
620 
621 private:
622 
623  // private static constants -----------------------------------------------
624 
625  enum {
626  /* need look up in .commit() */
627  CHARINDEX = 0x70000000,
628  /* Expand index follows */
629  EXPANDCHARINDEX = 0x7E000000,
630  /* contract indexes follows */
631  CONTRACTCHARINDEX = 0x7F000000,
632  /* unmapped character values */
633  UNMAPPED = 0xFFFFFFFF,
634  /* primary strength increment */
635  PRIMARYORDERINCREMENT = 0x00010000,
636  /* secondary strength increment */
637  SECONDARYORDERINCREMENT = 0x00000100,
638  /* tertiary strength increment */
639  TERTIARYORDERINCREMENT = 0x00000001,
640  /* mask off anything but primary order */
641  PRIMARYORDERMASK = 0xffff0000,
642  /* mask off anything but secondary order */
643  SECONDARYORDERMASK = 0x0000ff00,
644  /* mask off anything but tertiary order */
645  TERTIARYORDERMASK = 0x000000ff,
646  /* mask off ignorable char order */
647  IGNORABLEMASK = 0x0000ffff,
648  /* use only the primary difference */
649  PRIMARYDIFFERENCEONLY = 0xffff0000,
650  /* use only the primary and secondary difference */
651  SECONDARYDIFFERENCEONLY = 0xffffff00,
652  /* primary order shift */
653  PRIMARYORDERSHIFT = 16,
654  /* secondary order shift */
655  SECONDARYORDERSHIFT = 8,
656  /* starting value for collation elements */
657  COLELEMENTSTART = 0x02020202,
658  /* testing mask for primary low element */
659  PRIMARYLOWZEROMASK = 0x00FF0000,
660  /* reseting value for secondaries and tertiaries */
661  RESETSECONDARYTERTIARY = 0x00000202,
662  /* reseting value for tertiaries */
663  RESETTERTIARY = 0x00000002,
664 
665  PRIMIGNORABLE = 0x0202
666  };
667 
668  // private data members ---------------------------------------------------
669 
670  UBool dataIsOwned;
671 
672  UBool isWriteThroughAlias;
673 
678  UCollator *ucollator;
679 
683  UnicodeString urulestring;
684 
685  // friend classes --------------------------------------------------------
686 
691 
696  friend class Collator;
697 
701  friend class StringSearch;
702 
703  // private constructors --------------------------------------------------
704 
709 
720  RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
721 
730  void
731  construct(const UnicodeString& rules,
732  UColAttributeValue collationStrength,
733  UColAttributeValue decompositionMode,
734  UErrorCode& status);
735 
736  // private methods -------------------------------------------------------
737 
743  void setUCollator(const Locale& locale, UErrorCode& status);
744 
750  void setUCollator(const char* locale, UErrorCode& status);
751 
758  void setUCollator(UCollator *collator);
759 
760 public:
761 #ifndef U_HIDE_INTERNAL_API
762 
767  const UCollator * getUCollator();
768 #endif /* U_HIDE_INTERNAL_API */
769 
770 protected:
778  virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
779 
780 private:
781  // if not owned and not a write through alias, copy the ucollator
782  void checkOwned(void);
783 
784  // utility to init rule string used by checkOwned and construct
785  void setRuleStringFromCollator();
786 
787 public:
811  virtual int32_t internalGetShortDefinitionString(const char *locale,
812  char *buffer,
813  int32_t capacity,
814  UErrorCode &status) const;
815 };
816 
817 // inline method implementation ---------------------------------------------
818 
819 inline void RuleBasedCollator::setUCollator(const Locale &locale,
820  UErrorCode &status)
821 {
822  setUCollator(locale.getName(), status);
823 }
824 
825 
826 inline void RuleBasedCollator::setUCollator(UCollator *collator)
827 {
828 
829  if (ucollator && dataIsOwned) {
830  ucol_close(ucollator);
831  }
832  ucollator = collator;
833  dataIsOwned = FALSE;
834  isWriteThroughAlias = TRUE;
835  setRuleStringFromCollator();
836 }
837 
838 #ifndef U_HIDE_INTERNAL_API
839 inline const UCollator * RuleBasedCollator::getUCollator()
840 {
841  return ucollator;
842 }
843 #endif
844 
846 
847 #endif /* #if !UCONFIG_NO_COLLATION */
848 
849 #endif
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition: uversion.h:57
virtual UBool operator==(const Collator &other) const
Returns TRUE if "other" is the same as "this".
virtual void setReorderCodes(const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode &status)
Sets the ordering of scripts for this collator.
virtual int32_t hashCode(void) const =0
Generates the hash code for the collation object.
The Collator class performs locale-sensitive string comparison.
Definition: coll.h:177
UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the u_strcoll() m...
Definition: ucol.h:77
C++ API: Unicode Normalization.
virtual uint32_t getVariableTop(UErrorCode &status) const =0
Gets the variable top value of a Collator.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
C API for code unit iteration.
Definition: uiter.h:339
virtual int32_t internalGetShortDefinitionString(const char *locale, char *buffer, int32_t capacity, UErrorCode &status) const
Get the short definition string for a collator.
ECollationStrength
Base letter represents a primary difference.
Definition: coll.h:207
UColAttribute
Attributes that collation service understands.
Definition: ucol.h:240
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:358
virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status)=0
Sets the variable top to a collation element value of a string supplied.
virtual UColAttributeValue getAttribute(UColAttribute attr, UErrorCode &status) const =0
Universal attribute getter.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129
The RuleBasedCollator class provides the simple implementation of Collator, using data-driven tables...
Definition: tblcoll.h:111
C++ API: Collation Service.
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:356
virtual Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const =0
Gets the locale of the Collator.
const char * getName() const
Returns the programmatic name of the entire locale, with the language, country and variant separated ...
Definition: locid.h:777
static int32_t getEquivalentReorderCodes(int32_t reorderCode, int32_t *dest, int32_t destCapacity, UErrorCode &status)
Retrieves the reorder codes that are grouped with the given reorder code.
virtual Collator * clone(void) const =0
Makes a copy of this object.
virtual UClassID getDynamicClassID(void) const =0
Returns a unique class ID POLYMORPHICALLY.
The CollationElementIterator class is used as an iterator to walk through each character of an intern...
Definition: coleitr.h:120
virtual void getVersion(UVersionInfo info) const =0
Gets the version information for a Collator.
virtual void setLocales(const Locale &requestedLocale, const Locale &validLocale, const Locale &actualLocale)
Used internally by registraton to define the requested and valid locales.
C API: Collator.
C++ API: Keys for comparing strings multiple times.
Collation keys are generated by the Collator class.
Definition: sortkey.h:97
virtual int32_t getSortKey(const UnicodeString &source, uint8_t *result, int32_t resultLength) const =0
Get the sort key as an array of bytes from an UnicodeString.
virtual CollationKey & getCollationKey(const UnicodeString &source, CollationKey &key, UErrorCode &status) const =0
Transforms the string into a series of characters that can be compared with CollationKey::compareTo.
void ucol_close(UCollator *coll)
Close a UCollator.
virtual EComparisonResult compare(const UnicodeString &source, const UnicodeString &target) const
The comparison function compares the character data stored in two different strings.
#define TRUE
The TRUE value of a UBool.
Definition: umachine.h:204
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:273
virtual int32_t getReorderCodes(int32_t *dest, int32_t destCapacity, UErrorCode &status) const
Retrieves the reordering codes for this collator.
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:278
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
virtual UnicodeSet * getTailoredSet(UErrorCode &status) const
Get an UnicodeSet that contains all the characters and sequences tailored in this collator...
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:476
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested...
Definition: uloc.h:336
struct UCollator UCollator
structure representing a collator object instance
Definition: ucol.h:62
virtual void setAttribute(UColAttribute attr, UColAttributeValue value, UErrorCode &status)=0
Universal attribute setter.
StringSearch is a SearchIterator that provides language-sensitive text searching based on the compari...
Definition: stsearch.h:138
Basic definitions for ICU, for both C and C++ APIs.
#define FALSE
The FALSE value of a UBool.
Definition: umachine.h:208
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:246
UColAttributeValue
Enum containing attribute values for controling collation behavior.
Definition: ucol.h:93
UColRuleOption
Options for retrieving the rule string.
Definition: ucol.h:336
int8_t UBool
The ICU boolean type.
Definition: umachine.h:200
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:182