24 #if !UCONFIG_NO_BREAK_ITERATION
38 struct RBBIDataHeader;
39 class RuleBasedBreakIteratorTables;
41 class RBBIDataWrapper;
43 class LanguageBreakEngine;
44 class UnhandledEngine;
45 struct RBBIStateTable;
173 #ifndef U_HIDE_INTERNAL_API
210 friend class RBBIRuleBuilder;
336 virtual int32_t hashCode(
void)
const;
430 virtual int32_t
first(
void);
437 virtual int32_t
last(
void);
449 virtual int32_t
next(int32_t n);
456 virtual int32_t
next(
void);
472 virtual int32_t
following(int32_t offset);
481 virtual int32_t
preceding(int32_t offset);
498 virtual int32_t
current(
void)
const;
533 virtual int32_t getRuleStatus()
const;
558 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity,
UErrorCode &status);
584 static UClassID U_EXPORT2 getStaticClassID(
void);
632 virtual const uint8_t *getBinaryRules(uint32_t &length);
671 virtual void reset(
void);
688 virtual int32_t getBreakType()
const;
695 virtual void setBreakType(int32_t type);
697 #ifndef U_HIDE_INTERNAL_API
717 int32_t handlePrevious(
const RBBIStateTable *statetable);
728 int32_t handleNext(
const RBBIStateTable *statetable);
732 #ifndef U_HIDE_INTERNAL_API
747 int32_t checkDictionary(int32_t startPos, int32_t endPos,
UBool reverse);
758 const LanguageBreakEngine *getLanguageBreakEngine(
UChar32 c);
763 void makeRuleStatusValid();
int32_t * fCachedBreakPositions
When a range of characters is divided up using the dictionary, the break positions that are discovere...
UStack * fLanguageBreakEngines
If present, UStack of LanguageBreakEngine objects that might handle dictionary characters.
int32_t fPositionInCache
if fCachedBreakPositions is not null, this indicates which item in the cache the current iteration po...
RBBIDataWrapper * fData
The rule data for this BreakIterator instance.
virtual int32_t next(void)=0
Advance the iterator to the boundary following the current boundary.
CharacterIterator * fCharIter
A character iterator that refers to the same text as the UText, above.
virtual UBool isBoundary(int32_t offset)=0
Return true if the specfied position is a boundary position.
virtual void adoptText(CharacterIterator *it)=0
Change the text over which this operates.
StringCharacterIterator * fSCharIter
When the input text is provided by a UnicodeString, this will point to a characterIterator that wraps...
int32_t fLastRuleStatusIndex
Index of the Rule {tag} values for the most recent match.
EDontAdopt
Constant to be used in the constructor RuleBasedBreakIterator(RBBIDataHeader*, EDontAdopt, UErrorCode &); which does not adopt the memory indicated by the RBBIDataHeader* parameter.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
virtual CharacterIterator & getText(void) const =0
Return a CharacterIterator over the text being analyzed.
virtual UText * getUText(UText *fillIn, UErrorCode &status) const =0
Get a UText for the text being analyzed.
virtual int32_t first(void)=0
Set the iterator position to the index of the first character in the text being scanned.
virtual int32_t following(int32_t offset)=0
Advance the iterator to the first boundary following the specified offset.
int32_t fBreakType
The type of the break iterator, or -1 if it has not been set.
int32_t fNumCachedBreakPositions
The number of elements in fCachedBreakPositions.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Abstract class that defines an API for iteration on text objects.
C++ API: String Character Iterator.
A concrete subclass of CharacterIterator that iterates over the characters (code units or code points...
A concrete subclass of CharacterIterator that iterates over the characters (code units or code points...
UText * fText
The UText through which this BreakIterator accesses the text.
The BreakIterator class implements methods for finding the location of boundaries in text...
UBool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
virtual int32_t last(void)=0
Set the iterator position to the index immediately BEYOND the last character in the text being scanne...
UBool fLastStatusIndexValid
Rule tag value valid flag.
virtual int32_t current(void) const =0
Return character index of the current interator position within the text.
uint32_t fDictionaryCharCount
Counter for the number of characters encountered with the "dictionary" flag set.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
virtual UBool operator==(const BreakIterator &) const =0
Return true if another object is semantically equal to this one.
C API: Data loading interface.
struct UDataMemory UDataMemory
Forward declaration of the data memory type.
virtual int32_t previous(void)=0
Set the iterator position to the boundary preceding the current boundary.
virtual UClassID getDynamicClassID(void) const =0
Return a polymorphic class ID for this object.
UnhandledEngine * fUnhandledBreakEngine
If present, the special LanguageBreakEngine used for handling characters that are in the dictionary s...
virtual void setText(const UnicodeString &text)=0
Change the text over which this operates.
virtual BreakIterator & refreshInputText(UText *input, UErrorCode &status)=0
Set the subject text string upon which the break iterator is operating without changing any other asp...
virtual BreakIterator * clone(void) const =0
Return a polymorphic copy of this object.
UBool operator!=(const BreakIterator &rhs) const
Returns the complement of the result of operator==.
C++ API: UChar Character Iterator.
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
C API: Parse Error Information.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
virtual BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status)=0
Thread safe client-buffer-based cloning operation Do NOT call delete on a safeclone, since 'new' is not used to create it.
A subclass of BreakIterator whose behavior is specified using a list of rules.
virtual int32_t preceding(int32_t offset)=0
Set the iterator position to the first boundary preceding the specified offset.
A UParseError struct is used to returned detailed information about parsing errors.
Basic definitions for ICU, for both C and C++ APIs.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
UCharCharacterIterator * fDCharIter
When the input text is provided by a UText, this dummy CharacterIterator over an empty string will be...
int8_t UBool
The ICU boolean type.