ICU 62.1  62.1
ustring.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1998-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * File ustring.h
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 12/07/98 bertrand Creation.
15 ******************************************************************************
16 */
17 
18 #ifndef USTRING_H
19 #define USTRING_H
20 
21 #include "unicode/utypes.h"
22 #include "unicode/putil.h"
23 #include "unicode/uiter.h"
24 
30 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
31 # define UBRK_TYPEDEF_UBREAK_ITERATOR
32 
34 #endif
35 
92 U_STABLE int32_t U_EXPORT2
93 u_strlen(const UChar *s);
109 U_STABLE int32_t U_EXPORT2
110 u_countChar32(const UChar *s, int32_t length);
111 
130 U_STABLE UBool U_EXPORT2
131 u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
132 
143 U_STABLE UChar* U_EXPORT2
144 u_strcat(UChar *dst,
145  const UChar *src);
146 
161 U_STABLE UChar* U_EXPORT2
162 u_strncat(UChar *dst,
163  const UChar *src,
164  int32_t n);
165 
186 U_STABLE UChar * U_EXPORT2
187 u_strstr(const UChar *s, const UChar *substring);
188 
210 U_STABLE UChar * U_EXPORT2
211 u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
212 
230 U_STABLE UChar * U_EXPORT2
231 u_strchr(const UChar *s, UChar c);
232 
250 U_STABLE UChar * U_EXPORT2
251 u_strchr32(const UChar *s, UChar32 c);
252 
273 U_STABLE UChar * U_EXPORT2
274 u_strrstr(const UChar *s, const UChar *substring);
275 
297 U_STABLE UChar * U_EXPORT2
298 u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
299 
317 U_STABLE UChar * U_EXPORT2
318 u_strrchr(const UChar *s, UChar c);
319 
337 U_STABLE UChar * U_EXPORT2
338 u_strrchr32(const UChar *s, UChar32 c);
339 
352 U_STABLE UChar * U_EXPORT2
353 u_strpbrk(const UChar *string, const UChar *matchSet);
354 
368 U_STABLE int32_t U_EXPORT2
369 u_strcspn(const UChar *string, const UChar *matchSet);
370 
384 U_STABLE int32_t U_EXPORT2
385 u_strspn(const UChar *string, const UChar *matchSet);
386 
412 U_STABLE UChar * U_EXPORT2
413 u_strtok_r(UChar *src,
414  const UChar *delim,
415  UChar **saveState);
416 
427 U_STABLE int32_t U_EXPORT2
428 u_strcmp(const UChar *s1,
429  const UChar *s2);
430 
442 U_STABLE int32_t U_EXPORT2
443 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
444 
472 U_STABLE int32_t U_EXPORT2
473 u_strCompare(const UChar *s1, int32_t length1,
474  const UChar *s2, int32_t length2,
475  UBool codePointOrder);
476 
497 U_STABLE int32_t U_EXPORT2
498 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
499 
540 U_STABLE int32_t U_EXPORT2
541 u_strCaseCompare(const UChar *s1, int32_t length1,
542  const UChar *s2, int32_t length2,
543  uint32_t options,
544  UErrorCode *pErrorCode);
545 
558 U_STABLE int32_t U_EXPORT2
559 u_strncmp(const UChar *ucs1,
560  const UChar *ucs2,
561  int32_t n);
562 
576 U_STABLE int32_t U_EXPORT2
577 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
578 
598 U_STABLE int32_t U_EXPORT2
599 u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
600 
622 U_STABLE int32_t U_EXPORT2
623 u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
624 
646 U_STABLE int32_t U_EXPORT2
647 u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
648 
657 U_STABLE UChar* U_EXPORT2
658 u_strcpy(UChar *dst,
659  const UChar *src);
660 
672 U_STABLE UChar* U_EXPORT2
673 u_strncpy(UChar *dst,
674  const UChar *src,
675  int32_t n);
676 
677 #if !UCONFIG_NO_CONVERSION
678 
689 U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
690  const char *src );
691 
704 U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
705  const char *src,
706  int32_t n);
707 
718 U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
719  const UChar *src );
720 
733 U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
734  const UChar *src,
735  int32_t n );
736 
737 #endif
738 
747 U_STABLE UChar* U_EXPORT2
748 u_memcpy(UChar *dest, const UChar *src, int32_t count);
749 
758 U_STABLE UChar* U_EXPORT2
759 u_memmove(UChar *dest, const UChar *src, int32_t count);
760 
770 U_STABLE UChar* U_EXPORT2
771 u_memset(UChar *dest, UChar c, int32_t count);
772 
784 U_STABLE int32_t U_EXPORT2
785 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
786 
800 U_STABLE int32_t U_EXPORT2
801 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
802 
820 U_STABLE UChar* U_EXPORT2
821 u_memchr(const UChar *s, UChar c, int32_t count);
822 
840 U_STABLE UChar* U_EXPORT2
841 u_memchr32(const UChar *s, UChar32 c, int32_t count);
842 
860 U_STABLE UChar* U_EXPORT2
861 u_memrchr(const UChar *s, UChar c, int32_t count);
862 
880 U_STABLE UChar* U_EXPORT2
881 u_memrchr32(const UChar *s, UChar32 c, int32_t count);
882 
933 #if defined(U_DECLARE_UTF16)
934 # define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs)
935 
936 # define U_STRING_INIT(var, cs, length)
937 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
938 # define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
939 
940 # define U_STRING_INIT(var, cs, length)
941 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
942 # define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
943 
944 # define U_STRING_INIT(var, cs, length)
945 #else
946 # define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
947 
948 # define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
949 #endif
950 
998 U_STABLE int32_t U_EXPORT2
999 u_unescape(const char *src,
1000  UChar *dest, int32_t destCapacity);
1001 
1015 typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
1017 
1046 U_STABLE UChar32 U_EXPORT2
1048  int32_t *offset,
1049  int32_t length,
1050  void *context);
1051 
1072 U_STABLE int32_t U_EXPORT2
1073 u_strToUpper(UChar *dest, int32_t destCapacity,
1074  const UChar *src, int32_t srcLength,
1075  const char *locale,
1076  UErrorCode *pErrorCode);
1077 
1098 U_STABLE int32_t U_EXPORT2
1099 u_strToLower(UChar *dest, int32_t destCapacity,
1100  const UChar *src, int32_t srcLength,
1101  const char *locale,
1102  UErrorCode *pErrorCode);
1103 
1104 #if !UCONFIG_NO_BREAK_ITERATION
1105 
1144 U_STABLE int32_t U_EXPORT2
1145 u_strToTitle(UChar *dest, int32_t destCapacity,
1146  const UChar *src, int32_t srcLength,
1147  UBreakIterator *titleIter,
1148  const char *locale,
1149  UErrorCode *pErrorCode);
1150 
1151 #endif
1152 
1177 U_STABLE int32_t U_EXPORT2
1178 u_strFoldCase(UChar *dest, int32_t destCapacity,
1179  const UChar *src, int32_t srcLength,
1180  uint32_t options,
1181  UErrorCode *pErrorCode);
1182 
1183 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
1184 
1206 U_STABLE wchar_t* U_EXPORT2
1207 u_strToWCS(wchar_t *dest,
1208  int32_t destCapacity,
1209  int32_t *pDestLength,
1210  const UChar *src,
1211  int32_t srcLength,
1212  UErrorCode *pErrorCode);
1235 U_STABLE UChar* U_EXPORT2
1236 u_strFromWCS(UChar *dest,
1237  int32_t destCapacity,
1238  int32_t *pDestLength,
1239  const wchar_t *src,
1240  int32_t srcLength,
1241  UErrorCode *pErrorCode);
1242 #endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */
1243 
1266 U_STABLE char* U_EXPORT2
1267 u_strToUTF8(char *dest,
1268  int32_t destCapacity,
1269  int32_t *pDestLength,
1270  const UChar *src,
1271  int32_t srcLength,
1272  UErrorCode *pErrorCode);
1273 
1296 U_STABLE UChar* U_EXPORT2
1297 u_strFromUTF8(UChar *dest,
1298  int32_t destCapacity,
1299  int32_t *pDestLength,
1300  const char *src,
1301  int32_t srcLength,
1302  UErrorCode *pErrorCode);
1303 
1339 U_STABLE char* U_EXPORT2
1340 u_strToUTF8WithSub(char *dest,
1341  int32_t destCapacity,
1342  int32_t *pDestLength,
1343  const UChar *src,
1344  int32_t srcLength,
1345  UChar32 subchar, int32_t *pNumSubstitutions,
1346  UErrorCode *pErrorCode);
1347 
1384 U_STABLE UChar* U_EXPORT2
1386  int32_t destCapacity,
1387  int32_t *pDestLength,
1388  const char *src,
1389  int32_t srcLength,
1390  UChar32 subchar, int32_t *pNumSubstitutions,
1391  UErrorCode *pErrorCode);
1392 
1444 U_STABLE UChar * U_EXPORT2
1446  int32_t destCapacity,
1447  int32_t *pDestLength,
1448  const char *src,
1449  int32_t srcLength,
1450  UErrorCode *pErrorCode);
1451 
1474 U_STABLE UChar32* U_EXPORT2
1475 u_strToUTF32(UChar32 *dest,
1476  int32_t destCapacity,
1477  int32_t *pDestLength,
1478  const UChar *src,
1479  int32_t srcLength,
1480  UErrorCode *pErrorCode);
1481 
1504 U_STABLE UChar* U_EXPORT2
1505 u_strFromUTF32(UChar *dest,
1506  int32_t destCapacity,
1507  int32_t *pDestLength,
1508  const UChar32 *src,
1509  int32_t srcLength,
1510  UErrorCode *pErrorCode);
1511 
1547 U_STABLE UChar32* U_EXPORT2
1549  int32_t destCapacity,
1550  int32_t *pDestLength,
1551  const UChar *src,
1552  int32_t srcLength,
1553  UChar32 subchar, int32_t *pNumSubstitutions,
1554  UErrorCode *pErrorCode);
1555 
1591 U_STABLE UChar* U_EXPORT2
1593  int32_t destCapacity,
1594  int32_t *pDestLength,
1595  const UChar32 *src,
1596  int32_t srcLength,
1597  UChar32 subchar, int32_t *pNumSubstitutions,
1598  UErrorCode *pErrorCode);
1599 
1632 U_STABLE char* U_EXPORT2
1634  char *dest,
1635  int32_t destCapacity,
1636  int32_t *pDestLength,
1637  const UChar *src,
1638  int32_t srcLength,
1639  UErrorCode *pErrorCode);
1640 
1682 U_STABLE UChar* U_EXPORT2
1684  UChar *dest,
1685  int32_t destCapacity,
1686  int32_t *pDestLength,
1687  const char *src,
1688  int32_t srcLength,
1689  UChar32 subchar, int32_t *pNumSubstitutions,
1690  UErrorCode *pErrorCode);
1691 
1692 #endif
UChar * u_strtok_r(UChar *src, const UChar *delim, UChar **saveState)
The string tokenizer API allows an application to break a string into tokens.
struct UBreakIterator UBreakIterator
Opaque type representing an ICU Break iterator object.
Definition: ubrk.h:28
UChar * u_strFromJavaModifiedUTF8WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a Java Modified UTF-8 string to a 16-bit Unicode string.
UChar * u_memchr(const UChar *s, UChar c, int32_t count)
Find the first occurrence of a BMP code point in a string.
int32_t u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n)
Compare two Unicode strings in code point order.
UChar * u_strFromWCS(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const wchar_t *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a wchar_t string to UTF-16.
UChar * u_strFromUTF8Lenient(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-8 string to UTF-16.
UChar * u_strFromUTF32WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-32 string to UTF-16.
UChar * u_strrchr(const UChar *s, UChar c)
Find the last occurrence of a BMP code point in a string.
UChar32 * u_strToUTF32(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-32.
int32_t u_strToTitle(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBreakIterator *titleIter, const char *locale, UErrorCode *pErrorCode)
Titlecase a string.
UChar(* UNESCAPE_CHAR_AT)(int32_t offset, void *context)
Callback function for u_unescapeAt() that returns a character of the source text given an offset and ...
Definition: ustring.h:1015
UChar * u_strncpy(UChar *dst, const UChar *src, int32_t n)
Copy a ustring.
int32_t u_countChar32(const UChar *s, int32_t length)
Count Unicode code points in the length UChar code units of the string.
UChar * u_memcpy(UChar *dest, const UChar *src, int32_t count)
Synonym for memcpy(), but with UChars only.
char * u_strToUTF8(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-8.
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:836
int32_t u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count)
Compare two Unicode strings in code point order.
int32_t u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options)
Compare two strings case-insensitively using full case folding.
C API for code unit iteration.
Definition: uiter.h:341
int32_t u_strcmpCodePointOrder(const UChar *s1, const UChar *s2)
Compare two Unicode strings in code point order.
UChar * u_strchr32(const UChar *s, UChar32 c)
Find the first occurrence of a code point in a string.
int32_t u_strFoldCase(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, uint32_t options, UErrorCode *pErrorCode)
Case-folds the characters in a string.
UChar32 * u_strToUTF32WithSub(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-32.
UBool u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number)
Check if the string contains more Unicode code points than a certain number.
int32_t u_strcspn(const UChar *string, const UChar *matchSet)
Returns the number of consecutive characters in string, beginning with the first, that do not occur s...
char * u_austrncpy(char *dst, const UChar *src, int32_t n)
Copy ustring to a byte string encoded in the default codepage.
wchar_t * u_strToWCS(wchar_t *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-16 string to a wchar_t string.
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:84
int32_t u_strCaseCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compare two strings case-insensitively using full case folding.
UChar * u_memset(UChar *dest, UChar c, int32_t count)
Initialize count characters of dest to c.
UChar * u_uastrcpy(UChar *dst, const char *src)
Copy a byte string encoded in the default codepage to a ustring.
int32_t u_strcmp(const UChar *s1, const UChar *s2)
Compare two Unicode strings for bitwise equality (code unit order).
int32_t u_strCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, UBool codePointOrder)
Compare two Unicode strings (binary order).
UChar * u_strncat(UChar *dst, const UChar *src, int32_t n)
Concatenate two ustrings.
char * u_strToUTF8WithSub(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-8.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:400
UChar * u_strchr(const UChar *s, UChar c)
Find the first occurrence of a BMP code point in a string.
UChar * u_memrchr(const UChar *s, UChar c, int32_t count)
Find the last occurrence of a BMP code point in a string.
C API: Platform Utilities.
int32_t u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count)
Compare the first count UChars of each buffer.
UChar * u_strrchr32(const UChar *s, UChar32 c)
Find the last occurrence of a code point in a string.
UChar * u_strcpy(UChar *dst, const UChar *src)
Copy a ustring.
C API: Unicode Character Iteration.
UChar * u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
Find the last occurrence of a substring in a string.
UChar * u_memmove(UChar *dest, const UChar *src, int32_t count)
Synonym for memmove(), but with UChars only.
int32_t u_unescape(const char *src, UChar *dest, int32_t destCapacity)
Unescape a string of characters and write the resulting Unicode characters to the destination buffer...
int32_t u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder)
Compare two Unicode strings (binary order) as presented by UCharIterator objects. ...
int32_t u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options)
Compare two strings case-insensitively using full case folding.
UChar * u_strpbrk(const UChar *string, const UChar *matchSet)
Locates the first occurrence in the string string of any of the characters in the string matchSet...
UChar * u_strcat(UChar *dst, const UChar *src)
Concatenate two ustrings.
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:353
UChar * u_strstr(const UChar *s, const UChar *substring)
Find the first occurrence of a substring in a string.
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:85
int32_t u_strToLower(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
Lowercase the characters in a string.
int32_t u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options)
Compare two strings case-insensitively using full case folding.
int32_t u_strncmp(const UChar *ucs1, const UChar *ucs2, int32_t n)
Compare two ustrings for bitwise equality.
UChar * u_strFromUTF32(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-32 string to UTF-16.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:396
char * u_strToJavaModifiedUTF8(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a 16-bit Unicode string to Java Modified UTF-8.
char * u_austrcpy(char *dst, const UChar *src)
Copy ustring to a byte string encoded in the default codepage.
UChar32 u_unescapeAt(UNESCAPE_CHAR_AT charAt, int32_t *offset, int32_t length, void *context)
Unescape a single sequence.
int32_t u_strspn(const UChar *string, const UChar *matchSet)
Returns the number of consecutive characters in string, beginning with the first, that occur somewher...
UChar * u_strFromUTF8(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-8 string to UTF-16.
Basic definitions for ICU, for both C and C++ APIs.
int32_t u_strlen(const UChar *s)
Determine the length of an array of UChar.
UChar * u_memchr32(const UChar *s, UChar32 c, int32_t count)
Find the first occurrence of a code point in a string.
UChar * u_memrchr32(const UChar *s, UChar32 c, int32_t count)
Find the last occurrence of a code point in a string.
UChar * u_strrstr(const UChar *s, const UChar *substring)
Find the last occurrence of a substring in a string.
UChar * u_uastrncpy(UChar *dst, const char *src, int32_t n)
Copy a byte string encoded in the default codepage to a ustring.
UChar * u_strFromUTF8WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-8 string to UTF-16.
int32_t u_strToUpper(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
Uppercase the characters in a string.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
int8_t UBool
The ICU boolean type.
Definition: umachine.h:236
UChar * u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
Find the first occurrence of a substring in a string.