ICU 50.1.2  50.1.2
ustring.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 1998-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * File ustring.h
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 12/07/98 bertrand Creation.
13 ******************************************************************************
14 */
15 
16 #ifndef USTRING_H
17 #define USTRING_H
18 
19 #include "unicode/utypes.h"
20 #include "unicode/putil.h"
21 #include "unicode/uiter.h"
22 
28 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
29 # define UBRK_TYPEDEF_UBREAK_ITERATOR
30 
32 #endif
33 
90 U_STABLE int32_t U_EXPORT2
91 u_strlen(const UChar *s);
107 U_STABLE int32_t U_EXPORT2
108 u_countChar32(const UChar *s, int32_t length);
109 
128 U_STABLE UBool U_EXPORT2
129 u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
130 
141 U_STABLE UChar* U_EXPORT2
142 u_strcat(UChar *dst,
143  const UChar *src);
144 
159 U_STABLE UChar* U_EXPORT2
160 u_strncat(UChar *dst,
161  const UChar *src,
162  int32_t n);
163 
184 U_STABLE UChar * U_EXPORT2
185 u_strstr(const UChar *s, const UChar *substring);
186 
208 U_STABLE UChar * U_EXPORT2
209 u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
210 
228 U_STABLE UChar * U_EXPORT2
229 u_strchr(const UChar *s, UChar c);
230 
248 U_STABLE UChar * U_EXPORT2
249 u_strchr32(const UChar *s, UChar32 c);
250 
271 U_STABLE UChar * U_EXPORT2
272 u_strrstr(const UChar *s, const UChar *substring);
273 
295 U_STABLE UChar * U_EXPORT2
296 u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
297 
315 U_STABLE UChar * U_EXPORT2
316 u_strrchr(const UChar *s, UChar c);
317 
335 U_STABLE UChar * U_EXPORT2
336 u_strrchr32(const UChar *s, UChar32 c);
337 
350 U_STABLE UChar * U_EXPORT2
351 u_strpbrk(const UChar *string, const UChar *matchSet);
352 
366 U_STABLE int32_t U_EXPORT2
367 u_strcspn(const UChar *string, const UChar *matchSet);
368 
382 U_STABLE int32_t U_EXPORT2
383 u_strspn(const UChar *string, const UChar *matchSet);
384 
410 U_STABLE UChar * U_EXPORT2
411 u_strtok_r(UChar *src,
412  const UChar *delim,
413  UChar **saveState);
414 
425 U_STABLE int32_t U_EXPORT2
426 u_strcmp(const UChar *s1,
427  const UChar *s2);
428 
440 U_STABLE int32_t U_EXPORT2
441 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
442 
470 U_STABLE int32_t U_EXPORT2
471 u_strCompare(const UChar *s1, int32_t length1,
472  const UChar *s2, int32_t length2,
473  UBool codePointOrder);
474 
495 U_STABLE int32_t U_EXPORT2
496 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
497 
498 #ifndef U_COMPARE_CODE_POINT_ORDER
499 /* see also unistr.h and unorm.h */
505 #define U_COMPARE_CODE_POINT_ORDER 0x8000
506 #endif
507 
548 U_STABLE int32_t U_EXPORT2
549 u_strCaseCompare(const UChar *s1, int32_t length1,
550  const UChar *s2, int32_t length2,
551  uint32_t options,
552  UErrorCode *pErrorCode);
553 
566 U_STABLE int32_t U_EXPORT2
567 u_strncmp(const UChar *ucs1,
568  const UChar *ucs2,
569  int32_t n);
570 
584 U_STABLE int32_t U_EXPORT2
585 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
586 
606 U_STABLE int32_t U_EXPORT2
607 u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
608 
630 U_STABLE int32_t U_EXPORT2
631 u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
632 
654 U_STABLE int32_t U_EXPORT2
655 u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
656 
665 U_STABLE UChar* U_EXPORT2
666 u_strcpy(UChar *dst,
667  const UChar *src);
668 
680 U_STABLE UChar* U_EXPORT2
681 u_strncpy(UChar *dst,
682  const UChar *src,
683  int32_t n);
684 
685 #if !UCONFIG_NO_CONVERSION
686 
697 U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
698  const char *src );
699 
712 U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
713  const char *src,
714  int32_t n);
715 
726 U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
727  const UChar *src );
728 
741 U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
742  const UChar *src,
743  int32_t n );
744 
745 #endif
746 
755 U_STABLE UChar* U_EXPORT2
756 u_memcpy(UChar *dest, const UChar *src, int32_t count);
757 
766 U_STABLE UChar* U_EXPORT2
767 u_memmove(UChar *dest, const UChar *src, int32_t count);
768 
778 U_STABLE UChar* U_EXPORT2
779 u_memset(UChar *dest, UChar c, int32_t count);
780 
792 U_STABLE int32_t U_EXPORT2
793 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
794 
808 U_STABLE int32_t U_EXPORT2
809 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
810 
828 U_STABLE UChar* U_EXPORT2
829 u_memchr(const UChar *s, UChar c, int32_t count);
830 
848 U_STABLE UChar* U_EXPORT2
849 u_memchr32(const UChar *s, UChar32 c, int32_t count);
850 
868 U_STABLE UChar* U_EXPORT2
869 u_memrchr(const UChar *s, UChar c, int32_t count);
870 
888 U_STABLE UChar* U_EXPORT2
889 u_memrchr32(const UChar *s, UChar32 c, int32_t count);
890 
941 #if defined(U_DECLARE_UTF16)
942 # define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs)
943 
944 # define U_STRING_INIT(var, cs, length)
945 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
946 # define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
947 
948 # define U_STRING_INIT(var, cs, length)
949 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
950 # define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
951 
952 # define U_STRING_INIT(var, cs, length)
953 #else
954 # define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
955 
956 # define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
957 #endif
958 
1006 U_STABLE int32_t U_EXPORT2
1007 u_unescape(const char *src,
1008  UChar *dest, int32_t destCapacity);
1009 
1023 typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
1025 
1054 U_STABLE UChar32 U_EXPORT2
1056  int32_t *offset,
1057  int32_t length,
1058  void *context);
1059 
1080 U_STABLE int32_t U_EXPORT2
1081 u_strToUpper(UChar *dest, int32_t destCapacity,
1082  const UChar *src, int32_t srcLength,
1083  const char *locale,
1084  UErrorCode *pErrorCode);
1085 
1106 U_STABLE int32_t U_EXPORT2
1107 u_strToLower(UChar *dest, int32_t destCapacity,
1108  const UChar *src, int32_t srcLength,
1109  const char *locale,
1110  UErrorCode *pErrorCode);
1111 
1112 #if !UCONFIG_NO_BREAK_ITERATION
1113 
1152 U_STABLE int32_t U_EXPORT2
1153 u_strToTitle(UChar *dest, int32_t destCapacity,
1154  const UChar *src, int32_t srcLength,
1155  UBreakIterator *titleIter,
1156  const char *locale,
1157  UErrorCode *pErrorCode);
1158 
1159 #endif
1160 
1185 U_STABLE int32_t U_EXPORT2
1186 u_strFoldCase(UChar *dest, int32_t destCapacity,
1187  const UChar *src, int32_t srcLength,
1188  uint32_t options,
1189  UErrorCode *pErrorCode);
1190 
1191 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
1192 
1214 U_STABLE wchar_t* U_EXPORT2
1215 u_strToWCS(wchar_t *dest,
1216  int32_t destCapacity,
1217  int32_t *pDestLength,
1218  const UChar *src,
1219  int32_t srcLength,
1220  UErrorCode *pErrorCode);
1243 U_STABLE UChar* U_EXPORT2
1244 u_strFromWCS(UChar *dest,
1245  int32_t destCapacity,
1246  int32_t *pDestLength,
1247  const wchar_t *src,
1248  int32_t srcLength,
1249  UErrorCode *pErrorCode);
1250 #endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */
1251 
1274 U_STABLE char* U_EXPORT2
1275 u_strToUTF8(char *dest,
1276  int32_t destCapacity,
1277  int32_t *pDestLength,
1278  const UChar *src,
1279  int32_t srcLength,
1280  UErrorCode *pErrorCode);
1281 
1304 U_STABLE UChar* U_EXPORT2
1305 u_strFromUTF8(UChar *dest,
1306  int32_t destCapacity,
1307  int32_t *pDestLength,
1308  const char *src,
1309  int32_t srcLength,
1310  UErrorCode *pErrorCode);
1311 
1348 U_STABLE char* U_EXPORT2
1349 u_strToUTF8WithSub(char *dest,
1350  int32_t destCapacity,
1351  int32_t *pDestLength,
1352  const UChar *src,
1353  int32_t srcLength,
1354  UChar32 subchar, int32_t *pNumSubstitutions,
1355  UErrorCode *pErrorCode);
1356 
1394 U_STABLE UChar* U_EXPORT2
1396  int32_t destCapacity,
1397  int32_t *pDestLength,
1398  const char *src,
1399  int32_t srcLength,
1400  UChar32 subchar, int32_t *pNumSubstitutions,
1401  UErrorCode *pErrorCode);
1402 
1454 U_STABLE UChar * U_EXPORT2
1456  int32_t destCapacity,
1457  int32_t *pDestLength,
1458  const char *src,
1459  int32_t srcLength,
1460  UErrorCode *pErrorCode);
1461 
1484 U_STABLE UChar32* U_EXPORT2
1485 u_strToUTF32(UChar32 *dest,
1486  int32_t destCapacity,
1487  int32_t *pDestLength,
1488  const UChar *src,
1489  int32_t srcLength,
1490  UErrorCode *pErrorCode);
1491 
1514 U_STABLE UChar* U_EXPORT2
1515 u_strFromUTF32(UChar *dest,
1516  int32_t destCapacity,
1517  int32_t *pDestLength,
1518  const UChar32 *src,
1519  int32_t srcLength,
1520  UErrorCode *pErrorCode);
1521 
1558 U_STABLE UChar32* U_EXPORT2
1560  int32_t destCapacity,
1561  int32_t *pDestLength,
1562  const UChar *src,
1563  int32_t srcLength,
1564  UChar32 subchar, int32_t *pNumSubstitutions,
1565  UErrorCode *pErrorCode);
1566 
1603 U_STABLE UChar* U_EXPORT2
1605  int32_t destCapacity,
1606  int32_t *pDestLength,
1607  const UChar32 *src,
1608  int32_t srcLength,
1609  UChar32 subchar, int32_t *pNumSubstitutions,
1610  UErrorCode *pErrorCode);
1611 
1644 U_STABLE char* U_EXPORT2
1646  char *dest,
1647  int32_t destCapacity,
1648  int32_t *pDestLength,
1649  const UChar *src,
1650  int32_t srcLength,
1651  UErrorCode *pErrorCode);
1652 
1693 U_STABLE UChar* U_EXPORT2
1695  UChar *dest,
1696  int32_t destCapacity,
1697  int32_t *pDestLength,
1698  const char *src,
1699  int32_t srcLength,
1700  UChar32 subchar, int32_t *pNumSubstitutions,
1701  UErrorCode *pErrorCode);
1702 
1703 #endif
UChar * u_strtok_r(UChar *src, const UChar *delim, UChar **saveState)
The string tokenizer API allows an application to break a string into tokens.
struct UBreakIterator UBreakIterator
Opaque type representing an ICU Break iterator object.
Definition: ubrk.h:26
UChar * u_strFromJavaModifiedUTF8WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a Java Modified UTF-8 string to a 16-bit Unicode string.
UChar * u_memchr(const UChar *s, UChar c, int32_t count)
Find the first occurrence of a BMP code point in a string.
int32_t u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n)
Compare two Unicode strings in code point order.
UChar * u_strFromWCS(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const wchar_t *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a wchar_t string to UTF-16.
UChar * u_strFromUTF8Lenient(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-8 string to UTF-16.
UChar * u_strFromUTF32WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-32 string to UTF-16.
UChar * u_strrchr(const UChar *s, UChar c)
Find the last occurrence of a BMP code point in a string.
UChar32 * u_strToUTF32(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-32.
int32_t u_strToTitle(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBreakIterator *titleIter, const char *locale, UErrorCode *pErrorCode)
Titlecase a string.
UChar * u_strncpy(UChar *dst, const UChar *src, int32_t n)
Copy a ustring.
int32_t u_countChar32(const UChar *s, int32_t length)
Count Unicode code points in the length UChar code units of the string.
UChar * u_memcpy(UChar *dest, const UChar *src, int32_t count)
Synonym for memcpy(), but with UChars only.
char * u_strToUTF8(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-8.
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:752
int32_t u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count)
Compare two Unicode strings in code point order.
int32_t u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options)
Compare two strings case-insensitively using full case folding.
C API for code unit iteration.
Definition: uiter.h:339
int32_t u_strcmpCodePointOrder(const UChar *s1, const UChar *s2)
Compare two Unicode strings in code point order.
UChar * u_strchr32(const UChar *s, UChar32 c)
Find the first occurrence of a code point in a string.
int32_t u_strFoldCase(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, uint32_t options, UErrorCode *pErrorCode)
Case-folds the characters in a string.
UChar32 * u_strToUTF32WithSub(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-32.
UBool u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number)
Check if the string contains more Unicode code points than a certain number.
int32_t u_strcspn(const UChar *string, const UChar *matchSet)
Returns the number of consecutive characters in string, beginning with the first, that do not occur s...
char * u_austrncpy(char *dst, const UChar *src, int32_t n)
Copy ustring to a byte string encoded in the default codepage.
wchar_t * u_strToWCS(wchar_t *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-16 string to a wchar_t string.
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:82
int32_t u_strCaseCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compare two strings case-insensitively using full case folding.
UChar * u_memset(UChar *dest, UChar c, int32_t count)
Initialize count characters of dest to c.
UChar * u_uastrcpy(UChar *dst, const char *src)
Copy a byte string encoded in the default codepage to a ustring.
int32_t u_strcmp(const UChar *s1, const UChar *s2)
Compare two Unicode strings for bitwise equality (code unit order).
int32_t u_strCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, UBool codePointOrder)
Compare two Unicode strings (binary order).
UChar * u_strncat(UChar *dst, const UChar *src, int32_t n)
Concatenate two ustrings.
UChar(* UNESCAPE_CHAR_AT)(int32_t offset, void *context)
Callback function for u_unescapeAt() that returns a character of the source text given an offset and ...
Definition: ustring.h:1023
char * u_strToUTF8WithSub(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-8.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:298
UChar * u_strchr(const UChar *s, UChar c)
Find the first occurrence of a BMP code point in a string.
UChar * u_memrchr(const UChar *s, UChar c, int32_t count)
Find the last occurrence of a BMP code point in a string.
C API: Platform Utilities.
int32_t u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count)
Compare the first count UChars of each buffer.
UChar * u_strrchr32(const UChar *s, UChar32 c)
Find the last occurrence of a code point in a string.
UChar * u_strcpy(UChar *dst, const UChar *src)
Copy a ustring.
C API: Unicode Character Iteration.
UChar * u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
Find the last occurrence of a substring in a string.
UChar * u_memmove(UChar *dest, const UChar *src, int32_t count)
Synonym for memmove(), but with UChars only.
int32_t u_unescape(const char *src, UChar *dest, int32_t destCapacity)
Unescape a string of characters and write the resulting Unicode characters to the destination buffer...
int32_t u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder)
Compare two Unicode strings (binary order) as presented by UCharIterator objects. ...
int32_t u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options)
Compare two strings case-insensitively using full case folding.
UChar * u_strpbrk(const UChar *string, const UChar *matchSet)
Locates the first occurrence in the string string of any of the characters in the string matchSet...
UChar * u_strcat(UChar *dst, const UChar *src)
Concatenate two ustrings.
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:278
UChar * u_strstr(const UChar *s, const UChar *substring)
Find the first occurrence of a substring in a string.
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:83
int32_t u_strToLower(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
Lowercase the characters in a string.
int32_t u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options)
Compare two strings case-insensitively using full case folding.
int32_t u_strncmp(const UChar *ucs1, const UChar *ucs2, int32_t n)
Compare two ustrings for bitwise equality.
UChar * u_strFromUTF32(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-32 string to UTF-16.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:476
char * u_strToJavaModifiedUTF8(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a 16-bit Unicode string to Java Modified UTF-8.
char * u_austrcpy(char *dst, const UChar *src)
Copy ustring to a byte string encoded in the default codepage.
UChar32 u_unescapeAt(UNESCAPE_CHAR_AT charAt, int32_t *offset, int32_t length, void *context)
Unescape a single sequence.
int32_t u_strspn(const UChar *string, const UChar *matchSet)
Returns the number of consecutive characters in string, beginning with the first, that occur somewher...
UChar * u_strFromUTF8(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-8 string to UTF-16.
Basic definitions for ICU, for both C and C++ APIs.
int32_t u_strlen(const UChar *s)
Determine the length of an array of UChar.
UChar * u_memchr32(const UChar *s, UChar32 c, int32_t count)
Find the first occurrence of a code point in a string.
UChar * u_memrchr32(const UChar *s, UChar32 c, int32_t count)
Find the last occurrence of a code point in a string.
UChar * u_strrstr(const UChar *s, const UChar *substring)
Find the last occurrence of a substring in a string.
UChar * u_uastrncpy(UChar *dst, const char *src, int32_t n)
Copy a byte string encoded in the default codepage to a ustring.
UChar * u_strFromUTF8WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-8 string to UTF-16.
int32_t u_strToUpper(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
Uppercase the characters in a string.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:109
int8_t UBool
The ICU boolean type.
Definition: umachine.h:200
UChar * u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
Find the first occurrence of a substring in a string.