ICU 50.1.2  50.1.2
ubrk.h
Go to the documentation of this file.
1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2012, International Business Machines Corporation and others.
4 * All Rights Reserved.
5 ******************************************************************************
6 */
7 
8 #ifndef UBRK_H
9 #define UBRK_H
10 
11 #include "unicode/utypes.h"
12 #include "unicode/uloc.h"
13 #include "unicode/utext.h"
14 #include "unicode/localpointer.h"
15 
20 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
21 # define UBRK_TYPEDEF_UBREAK_ITERATOR
22 
27 #endif
28 
29 #if !UCONFIG_NO_BREAK_ITERATION
30 
31 #include "unicode/parseerr.h"
32 
87 typedef enum UBreakIteratorType {
91  UBRK_WORD = 1,
93  UBRK_LINE = 2,
96 
97 #ifndef U_HIDE_DEPRECATED_API
98 
107 #endif /* U_HIDE_DEPRECATED_API */
108  UBRK_COUNT = 5
110 
114 #define UBRK_DONE ((int32_t) -1)
115 
116 
125 typedef enum UWordBreak {
148 } UWordBreak;
149 
158 typedef enum ULineBreakTag {
168 } ULineBreakTag;
169 
170 
171 
180 typedef enum USentenceBreakTag {
197 
198 
213 U_STABLE UBreakIterator* U_EXPORT2
215  const char *locale,
216  const UChar *text,
217  int32_t textLength,
218  UErrorCode *status);
219 
235 U_STABLE UBreakIterator* U_EXPORT2
236 ubrk_openRules(const UChar *rules,
237  int32_t rulesLength,
238  const UChar *text,
239  int32_t textLength,
240  UParseError *parseErr,
241  UErrorCode *status);
242 
259 U_STABLE UBreakIterator * U_EXPORT2
261  const UBreakIterator *bi,
262  void *stackBuffer,
263  int32_t *pBufferSize,
264  UErrorCode *status);
265 
270 #define U_BRK_SAFECLONE_BUFFERSIZE 528
271 
278 U_STABLE void U_EXPORT2
280 
281 #if U_SHOW_CPLUSPLUS_API
282 
284 
295 
297 
298 #endif
299 
308 U_STABLE void U_EXPORT2
310  const UChar* text,
311  int32_t textLength,
312  UErrorCode* status);
313 
314 
326 U_STABLE void U_EXPORT2
328  UText* text,
329  UErrorCode* status);
330 
331 
332 
341 U_STABLE int32_t U_EXPORT2
342 ubrk_current(const UBreakIterator *bi);
343 
353 U_STABLE int32_t U_EXPORT2
355 
365 U_STABLE int32_t U_EXPORT2
367 
376 U_STABLE int32_t U_EXPORT2
378 
388 U_STABLE int32_t U_EXPORT2
390 
400 U_STABLE int32_t U_EXPORT2
402  int32_t offset);
403 
413 U_STABLE int32_t U_EXPORT2
415  int32_t offset);
416 
426 U_STABLE const char* U_EXPORT2
427 ubrk_getAvailable(int32_t index);
428 
437 U_STABLE int32_t U_EXPORT2
438 ubrk_countAvailable(void);
439 
440 
450 U_STABLE UBool U_EXPORT2
451 ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
452 
462 U_STABLE int32_t U_EXPORT2
464 
482 U_STABLE int32_t U_EXPORT2
483 ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
484 
494 U_STABLE const char* U_EXPORT2
496 
497 #ifndef U_HIDE_DRAFT_API
498 
523 U_DRAFT void U_EXPORT2
525  UText *text,
526  UErrorCode *status);
527 #endif /* U_HIDE_DRAFT_API */
528 
529 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
530 
531 #endif
struct UBreakIterator UBreakIterator
Opaque type representing an ICU Break iterator object.
Definition: ubrk.h:26
Tag value for for sentences that do not contain an ending sentence terminator ('. ...
Definition: ubrk.h:192
UBreakIterator * ubrk_safeClone(const UBreakIterator *bi, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
Thread safe cloning operation.
Tag value for words containing kana characters, upper limit.
Definition: ubrk.h:143
Tag value for words containing ideographic characters, lower limit.
Definition: ubrk.h:145
void ubrk_setUText(UBreakIterator *bi, UText *text, UErrorCode *status)
Sets an existing iterator to point to a new piece of text.
Tag value for words containing letters, upper limit.
Definition: ubrk.h:139
int32_t ubrk_next(UBreakIterator *bi)
Advance the iterator to the boundary following the current boundary.
void ubrk_refreshUText(UBreakIterator *bi, UText *text, UErrorCode *status)
Set the subject text string upon which the break iterator is operating without changing any other asp...
Upper bound for hard line breaks.
Definition: ubrk.h:167
Tag value for words containing kana characters, lower limit.
Definition: ubrk.h:141
Character breaks.
Definition: ubrk.h:89
ULineBreakTag
Enum constants for the line break tags returned by getRuleStatus().
Definition: ubrk.h:158
UWordBreak
Enum constants for the word break tags returned by getRuleStatus().
Definition: ubrk.h:125
USentenceBreakTag
Enum constants for the sentence break tags returned by getRuleStatus().
Definition: ubrk.h:180
Tag value for a hard, or mandatory line break.
Definition: ubrk.h:165
Upper bound for tags for sentences ended by sentence terminators.
Definition: ubrk.h:187
UBreakIteratorType
The possible types of text boundaries.
Definition: ubrk.h:87
Tag value for for sentences ending with a sentence terminator ('.
Definition: ubrk.h:185
UBreakIterator * ubrk_open(UBreakIteratorType type, const char *locale, const UChar *text, int32_t textLength, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries for a specified locale.
Word breaks.
Definition: ubrk.h:91
int32_t ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
Get the statuses from the break rules that determined the most recently returned break position...
int32_t ubrk_first(UBreakIterator *bi)
Set the iterator position to the index of the first character in the text being scanned.
int32_t ubrk_preceding(UBreakIterator *bi, int32_t offset)
Set the iterator position to the first boundary preceding the specified offset.
C API: Abstract Unicode Text API.
void ubrk_setText(UBreakIterator *bi, const UChar *text, int32_t textLength, UErrorCode *status)
Sets an existing iterator to point to a new piece of text.
const char * ubrk_getAvailable(int32_t index)
Get a locale for which text breaking information is available.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129
Upper bound for tags for uncategorized words.
Definition: ubrk.h:130
Tag value for words that appear to be numbers, upper limit.
Definition: ubrk.h:134
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:290
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
int32_t ubrk_previous(UBreakIterator *bi)
Set the iterator position to the boundary preceding the current boundary.
Tag value for soft line breaks, positions at which a line break is acceptable but not required...
Definition: ubrk.h:161
"Smart pointer" class, closes a UBreakIterator via ubrk_close().
Upper bound for tags for sentences ended by a separator.
Definition: ubrk.h:194
UBreakIterator * ubrk_openRules(const UChar *rules, int32_t rulesLength, const UChar *text, int32_t textLength, UParseError *parseErr, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries using specified breaking rules.
int32_t ubrk_countAvailable(void)
Determine how many locales have text breaking information available.
int32_t ubrk_last(UBreakIterator *bi)
Set the iterator position to the index immediately beyond the last character in the text being scanne...
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:278
int32_t ubrk_getRuleStatus(UBreakIterator *bi)
Return the status from the break rule that determined the most recently returned break position...
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
const char * ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode *status)
Return the locale of the break iterator.
C API: Parse Error Information.
int32_t ubrk_current(const UBreakIterator *bi)
Determine the most recently-returned text boundary.
Tag value for "words" that do not fit into any of other categories.
Definition: ubrk.h:128
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:476
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested...
Definition: uloc.h:336
Line breaks.
Definition: ubrk.h:93
Sentence breaks.
Definition: ubrk.h:95
UText struct.
Definition: utext.h:1343
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
Basic definitions for ICU, for both C and C++ APIs.
void ubrk_close(UBreakIterator *bi)
Close a UBreakIterator.
Tag value for words that contain letters, excluding hiragana, katakana or ideographic characters...
Definition: ubrk.h:137
Tag value for words that appear to be numbers, lower limit.
Definition: ubrk.h:132
Upper bound for soft line breaks.
Definition: ubrk.h:163
#define U_DRAFT
This is used to declare a function as a draft public ICU C API.
Definition: umachine.h:111
Title Case breaks The iterator created using this type locates title boundaries as described for Unic...
Definition: ubrk.h:106
UBool ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
Returns true if the specfied position is a boundary position.
int32_t ubrk_following(UBreakIterator *bi, int32_t offset)
Advance the iterator to the first boundary following the specified offset.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:109
Tag value for words containing ideographic characters, upper limit.
Definition: ubrk.h:147
int8_t UBool
The ICU boolean type.
Definition: umachine.h:200
C API: Locale.