ICU 50.1.2  50.1.2
normalizer2.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2009-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: normalizer2.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2009nov22
14 * created by: Markus W. Scherer
15 */
16 
17 #ifndef __NORMALIZER2_H__
18 #define __NORMALIZER2_H__
19 
25 #include "unicode/utypes.h"
26 
27 #if !UCONFIG_NO_NORMALIZATION
28 
29 #include "unicode/uniset.h"
30 #include "unicode/unistr.h"
31 #include "unicode/unorm2.h"
32 
34 
79 public:
84  ~Normalizer2();
85 
86 #ifndef U_HIDE_DRAFT_API
87 
98  static const Normalizer2 *
99  getNFCInstance(UErrorCode &errorCode);
100 
112  static const Normalizer2 *
113  getNFDInstance(UErrorCode &errorCode);
114 
126  static const Normalizer2 *
127  getNFKCInstance(UErrorCode &errorCode);
128 
140  static const Normalizer2 *
141  getNFKDInstance(UErrorCode &errorCode);
142 
154  static const Normalizer2 *
155  getNFKCCasefoldInstance(UErrorCode &errorCode);
156 #endif /* U_HIDE_DRAFT_API */
157 
179  static const Normalizer2 *
180  getInstance(const char *packageName,
181  const char *name,
182  UNormalization2Mode mode,
183  UErrorCode &errorCode);
184 
196  normalize(const UnicodeString &src, UErrorCode &errorCode) const {
197  UnicodeString result;
198  normalize(src, result, errorCode);
199  return result;
200  }
214  virtual UnicodeString &
215  normalize(const UnicodeString &src,
216  UnicodeString &dest,
217  UErrorCode &errorCode) const = 0;
232  virtual UnicodeString &
233  normalizeSecondAndAppend(UnicodeString &first,
234  const UnicodeString &second,
235  UErrorCode &errorCode) const = 0;
250  virtual UnicodeString &
251  append(UnicodeString &first,
252  const UnicodeString &second,
253  UErrorCode &errorCode) const = 0;
254 
268  virtual UBool
269  getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
270 
295  virtual UBool
296  getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
297 
313  virtual UChar32
314  composePair(UChar32 a, UChar32 b) const;
315 
324  virtual uint8_t
325  getCombiningClass(UChar32 c) const;
326 
341  virtual UBool
342  isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
343 
360  quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
361 
384  virtual int32_t
385  spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
386 
400  virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
401 
416  virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
417 
431  virtual UBool isInert(UChar32 c) const = 0;
432 
433 private:
434  // No ICU "poor man's RTTI" for this class nor its subclasses.
435  virtual UClassID getDynamicClassID() const;
436 };
437 
450 public:
461  FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
462  norm2(n2), set(filterSet) {}
463 
469 
483  virtual UnicodeString &
484  normalize(const UnicodeString &src,
485  UnicodeString &dest,
486  UErrorCode &errorCode) const;
501  virtual UnicodeString &
503  const UnicodeString &second,
504  UErrorCode &errorCode) const;
519  virtual UnicodeString &
520  append(UnicodeString &first,
521  const UnicodeString &second,
522  UErrorCode &errorCode) const;
523 
535  virtual UBool
536  getDecomposition(UChar32 c, UnicodeString &decomposition) const;
537 
549  virtual UBool
550  getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
551 
562  virtual UChar32
563  composePair(UChar32 a, UChar32 b) const;
564 
573  virtual uint8_t
574  getCombiningClass(UChar32 c) const;
575 
587  virtual UBool
588  isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
601  quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
613  virtual int32_t
614  spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
615 
624  virtual UBool hasBoundaryBefore(UChar32 c) const;
625 
634  virtual UBool hasBoundaryAfter(UChar32 c) const;
635 
643  virtual UBool isInert(UChar32 c) const;
644 private:
645  UnicodeString &
646  normalize(const UnicodeString &src,
647  UnicodeString &dest,
648  USetSpanCondition spanCondition,
649  UErrorCode &errorCode) const;
650 
651  UnicodeString &
653  const UnicodeString &second,
654  UBool doNormalize,
655  UErrorCode &errorCode) const;
656 
657  const Normalizer2 &norm2;
658  const UnicodeSet &set;
659 };
660 
662 
663 #endif // !UCONFIG_NO_NORMALIZATION
664 #endif // __NORMALIZER2_H__
virtual UBool getRawDecomposition(UChar32 c, UnicodeString &decomposition) const
Gets the raw decomposition mapping of c.
virtual UBool hasBoundaryBefore(UChar32 c) const =0
Tests if the character always has a normalization boundary before it, regardless of context...
UnicodeString normalize(const UnicodeString &src, UErrorCode &errorCode) const
Returns the normalized form of the source string.
Definition: normalizer2.h:196
virtual UChar32 composePair(UChar32 a, UChar32 b) const
Performs pairwise composition of a & b and returns the composite if there is one. ...
virtual UClassID getDynamicClassID() const =0
ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
virtual int32_t spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const =0
Returns the end of the normalized substring of the input string.
C++ API: Unicode String.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
virtual UnicodeString & append(UnicodeString &first, const UnicodeString &second, UErrorCode &errorCode) const =0
Appends the second string to the first string (merging them at the boundary) and returns the first st...
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition: normalizer2.h:78
C API: New API for Unicode Normalization.
virtual UBool hasBoundaryAfter(UChar32 c) const =0
Tests if the character always has a normalization boundary after it, regardless of context...
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:298
FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet)
Constructs a filtered normalizer wrapping any Normalizer2 instance and a filter set.
Definition: normalizer2.h:461
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:273
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:150
virtual UNormalizationCheckResult quickCheck(const UnicodeString &s, UErrorCode &errorCode) const =0
Tests if the string is normalized.
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
UNormalization2Mode
Constants for normalization modes.
Definition: unorm2.h:42
virtual UnicodeString & normalizeSecondAndAppend(UnicodeString &first, const UnicodeString &second, UErrorCode &errorCode) const =0
Appends the normalized form of the second string to the first string (merging them at the boundary) a...
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:476
virtual UBool isNormalized(const UnicodeString &s, UErrorCode &errorCode) const =0
Tests if the string is normalized.
virtual UBool getDecomposition(UChar32 c, UnicodeString &decomposition) const =0
Gets the decomposition mapping of c.
virtual uint8_t getCombiningClass(UChar32 c) const
Gets the combining class of c.
Basic definitions for ICU, for both C and C++ APIs.
virtual UBool isInert(UChar32 c) const =0
Tests if the character is normalization-inert.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
Definition: utypes.h:357
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:246
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:229
Normalization filtered by a UnicodeSet.
Definition: normalizer2.h:449
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition: unorm2.h:91
int8_t UBool
The ICU boolean type.
Definition: umachine.h:200
C++ API: Unicode Set.