ICU 50.1.2  50.1.2
alphaindex.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2011-2012 International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 */
9 
10 #ifndef INDEXCHARS_H
11 #define INDEXCHARS_H
12 
13 #include "unicode/utypes.h"
14 #include "unicode/uobject.h"
15 #include "unicode/locid.h"
16 
17 
18 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
19 
26 
40 
47 
57 
65 
66 
67 struct UHashtable;
69 
71 
72 // Forward Declarations
73 
74 class Collator;
75 class RuleBasedCollator;
76 class StringEnumeration;
77 class UnicodeSet;
78 class UVector;
79 
80 
81 
166 
167  public:
168 
181  AlphabeticIndex(const Locale &locale, UErrorCode &status);
182 
183 
184 
195  virtual AlphabeticIndex &addLabels(const UnicodeSet &additions, UErrorCode &status);
196 
210  virtual AlphabeticIndex &addLabels(const Locale &locale, UErrorCode &status);
211 
216  virtual ~AlphabeticIndex();
217 
218 
231  virtual const RuleBasedCollator &getCollator() const;
232 
233 
242  virtual const UnicodeString &getInflowLabel() const;
243 
255  virtual AlphabeticIndex &setInflowLabel(const UnicodeString &inflowLabel, UErrorCode &status);
256 
257 
258 
266  virtual const UnicodeString &getOverflowLabel() const;
267 
268 
278  virtual AlphabeticIndex &setOverflowLabel(const UnicodeString &overflowLabel, UErrorCode &status);
279 
287  virtual const UnicodeString &getUnderflowLabel() const;
288 
298  virtual AlphabeticIndex &setUnderflowLabel(const UnicodeString &underflowLabel, UErrorCode &status);
299 
300 
308  virtual int32_t getMaxLabelCount() const;
309 
322  virtual AlphabeticIndex &setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status);
323 
324 
337  virtual const UnicodeString &getOverflowComparisonString(const UnicodeString &lowerLimit,
338  UErrorCode &status);
339 
340 
357  virtual AlphabeticIndex &addRecord(const UnicodeString &name, const void *data, UErrorCode &status);
358 
367  virtual AlphabeticIndex &clearRecords(UErrorCode &status);
368 
369 
378  virtual int32_t getBucketCount(UErrorCode &status);
379 
380 
389  virtual int32_t getRecordCount(UErrorCode &status);
390 
391 
392 
405  virtual int32_t getBucketIndex(const UnicodeString &itemName, UErrorCode &status);
406 
407 
414  virtual int32_t getBucketIndex() const;
415 
416 
428  virtual UBool nextBucket(UErrorCode &status);
429 
438  virtual const UnicodeString &getBucketLabel() const;
439 
447  virtual UAlphabeticIndexLabelType getBucketLabelType() const;
448 
457  virtual int32_t getBucketRecordCount() const;
458 
459 
468  virtual AlphabeticIndex &resetBucketIterator(UErrorCode &status);
469 
481  virtual UBool nextRecord(UErrorCode &status);
482 
491  virtual const UnicodeString &getRecordName() const;
492 
493 
502  virtual const void *getRecordData() const;
503 
504 
511  virtual AlphabeticIndex &resetRecordIterator();
512 
513 private:
514  // No ICU "poor man's RTTI" for this class nor its subclasses.
515  virtual UClassID getDynamicClassID() const;
516 
521  AlphabeticIndex(const AlphabeticIndex &other);
522 
526  AlphabeticIndex &operator =(const AlphabeticIndex & /*other*/) { return *this;};
527 
532  virtual UBool operator==(const AlphabeticIndex& other) const;
533 
538  virtual UBool operator!=(const AlphabeticIndex& other) const;
539 
540  // Common initialization, for use from all constructors.
541  void init(UErrorCode &status);
542 
543  // Initialize & destruct static constants used by this class.
544  static void staticInit(UErrorCode &status);
545 
546  // Pinyin stuff. If the input name is Chinese, add the Pinyin prefix to the dest string.
547  void hackName(UnicodeString &dest, const UnicodeString &name, const Collator *coll);
548  void initPinyinBounds(const Collator *coll, UErrorCode &status);
549 
550  public:
551 #ifndef U_HIDE_INTERNAL_API
552 
557  static void staticCleanup();
558 #endif /* U_HIDE_INTERNAL_API */
559  private:
560 
561  // Add index characters from the specified locale to the dest set.
562  // Does not remove any previous contents from dest.
563  static void getIndexExemplars(UnicodeSet &dest, const Locale &locale, UErrorCode &status);
564 
565  UVector *firstStringsInScript(UErrorCode &status);
566 
567  static UnicodeString separated(const UnicodeString &item);
568 
569  static UnicodeSet *getScriptSet(UnicodeSet &dest, const UnicodeString &codePoint, UErrorCode &status);
570 
571  void buildIndex(UErrorCode &status);
572  void buildBucketList(UErrorCode &status);
573  void bucketRecords(UErrorCode &status);
574 
575 
576  public:
577 
578  // The following internal items are declared public only to allow access from
579  // implementation code written in plain C. They are not intended for
580  // public use.
581 
582 #ifndef U_HIDE_INTERNAL_API
583 
587  struct Record: public UMemory {
588  AlphabeticIndex *alphaIndex_;
589  const UnicodeString name_;
590  UnicodeString sortingName_; // Usually the same as name_; different for Pinyin.
591  const void *data_;
592  int32_t serialNumber_; // Defines sorting order for names that compare equal.
593  Record(AlphabeticIndex *alphaIndex, const UnicodeString &name, const void *data);
594  ~Record();
595  };
596 #endif /* U_HIDE_INTERNAL_API */
597 
603  UVector *inputRecords_;
604 
610  struct Bucket: public UMemory {
611  UnicodeString label_;
612  UnicodeString lowerBoundary_;
613  UAlphabeticIndexLabelType labelType_;
614  UVector *records_; // Records are owned by inputRecords_ vector.
615 
616  Bucket(const UnicodeString &label, // Parameter strings are copied.
617  const UnicodeString &lowerBoundary,
618  UAlphabeticIndexLabelType type, UErrorCode &status);
619  ~Bucket();
620  };
621 
622  public:
623 
628  enum ELangType {
634  kTraditional
635  };
636 
641  static ELangType langTypeFromLocale(const Locale &loc);
642 
643 
644  private:
645 
646  // Holds the contents of this index, buckets of user items.
647  // UVector elements are of type (Bucket *)
648  UVector *bucketList_;
649 
650  int32_t labelsIterIndex_; // Index of next item to return.
651  int32_t itemsIterIndex_;
652  Bucket *currentBucket_; // While an iteration of the index in underway,
653  // point to the bucket for the current label.
654  // NULL when no iteration underway.
655 
656  UBool indexBuildRequired_; // Caller has made changes to the index that
657  // require rebuilding & bucketing before the
658  // contents can be iterated.
659 
660  int32_t maxLabelCount_; // Limit on # of labels permitted in the index.
661 
662  UHashtable *alreadyIn_; // Key=UnicodeString, value=UnicodeSet
663 
664  UnicodeSet *initialLabels_; // Initial (unprocessed) set of Labels. Union
665  // of those explicitly set by the user plus
666  // those from locales. Raw values, before
667  // crunching into bucket labels.
668 
669  UVector *labels_; // List of Labels, after processing, sorting.
670  // Contents are (UnicodeString *)
671 
672  UnicodeSet *noDistinctSorting_; // As the set of labels is built, strings may
673  // be discarded from the exemplars. This contains
674  // some of the discards, and is
675  // intended for debugging.
676 
677  UnicodeSet *notAlphabetic_; // As the set of labels is built, strings may
678  // be discarded from the exemplars. This contains
679  // some of the discards, and is
680  // intended for debugging.
681 
682 
683  UVector *firstScriptCharacters_; // The first character from each script,
684  // in collation order.
685 
686  Locale locale_;
687  Collator *collator_;
688  Collator *collatorPrimaryOnly_;
689 
690  UnicodeString inflowLabel_;
691  UnicodeString overflowLabel_;
692  UnicodeString underflowLabel_;
693  UnicodeString overflowComparisonString_;
694 
695  ELangType langType_; // The language type, simplified Chinese, Traditional Chinese,
696  // or not Chinese (Normal). Part of the Pinyin support
697 
698  typedef const UChar PinyinLookup[24][3];
699  static PinyinLookup HACK_PINYIN_LOOKUP_SHORT;
700  static PinyinLookup HACK_PINYIN_LOOKUP_LONG;
701 
702  // These will be lazily set to the short or long tables based on which
703  // Chinese collation has been configured into the ICU library.
704  static PinyinLookup *HACK_PINYIN_LOOKUP;
705  static const UChar *PINYIN_LOWER_BOUNDS;
706 
707 
708 
709  int32_t recordCounter_; // Counts Records created. For minting record serial numbers.
710 
711 // Constants. Lazily initialized the first time an AlphabeticIndex object is created.
712 
713  static UnicodeSet *ALPHABETIC;
714  static UnicodeSet *CORE_LATIN;
715  static UnicodeSet *ETHIOPIC;
716  static UnicodeSet *HANGUL;
717  static UnicodeSet *IGNORE_SCRIPTS;
718  static UnicodeSet *TO_TRY;
719  static UnicodeSet *UNIHAN;
720  static const UnicodeString *EMPTY_STRING;
721 
722 };
723 
725 
726 #endif /* UCONFIG_NO_COLLATION / UCONFIG_NO_NORMALIZATION */
727 #endif
The Collator class performs locale-sensitive string comparison.
Definition: coll.h:177
A Bucket holds an index label and references to everything belonging to that label.
Definition: alphaindex.h:610
virtual UClassID getDynamicClassID() const =0
ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
Inflow Label.
Definition: alphaindex.h:56
Normal Label, typically the starting letter of the names in the bucket with this label.
Definition: alphaindex.h:39
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:82
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:358
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129
The RuleBasedCollator class provides the simple implementation of Collator, using data-driven tables...
Definition: tblcoll.h:111
ELangType
Language Types.
Definition: alphaindex.h:628
UBool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:218
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:273
A record, or item, in the index.
Definition: alphaindex.h:587
C++ API: Common ICU base class UObject.
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:278
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:83
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
class AlphabeticIndex supports the creation of a UI index appropriate for a given language...
Definition: alphaindex.h:165
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:476
Overflow Label.
Definition: alphaindex.h:63
C++ API: Locale ID object.
Basic definitions for ICU, for both C and C++ APIs.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:246
UVector * inputRecords_
Holds all user records before they are distributed into buckets.
Definition: alphaindex.h:603
Undeflow Label.
Definition: alphaindex.h:46
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:229
UMemory is the common ICU base class.
Definition: uobject.h:115
int8_t UBool
The ICU boolean type.
Definition: umachine.h:200
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:182
UAlphabeticIndexLabelType
Constants for Alphabetic Index Label Types.
Definition: alphaindex.h:33