ICU 62.1 62.1
unistr.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 1998-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8*
9* File unistr.h
10*
11* Modification History:
12*
13* Date Name Description
14* 09/25/98 stephen Creation.
15* 11/11/98 stephen Changed per 11/9 code review.
16* 04/20/99 stephen Overhauled per 4/16 code review.
17* 11/18/99 aliu Made to inherit from Replaceable. Added method
18* handleReplaceBetween(); other methods unchanged.
19* 06/25/01 grhoten Remove dependency on iostream.
20******************************************************************************
21*/
22
23#ifndef UNISTR_H
24#define UNISTR_H
25
31#include <cstddef>
32#include "unicode/utypes.h"
33#include "unicode/char16ptr.h"
34#include "unicode/rep.h"
35#include "unicode/std_string.h"
36#include "unicode/stringpiece.h"
37#include "unicode/bytestream.h"
38
39struct UConverter; // unicode/ucnv.h
40
41#ifndef USTRING_H
45U_STABLE int32_t U_EXPORT2
46u_strlen(const UChar *s);
47#endif
48
50
51#if !UCONFIG_NO_BREAK_ITERATION
52class BreakIterator; // unicode/brkiter.h
53#endif
54class Edits;
55
57
58// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
65typedef int32_t U_CALLCONV
66UStringCaseMapper(int32_t caseLocale, uint32_t options,
69#endif
70 char16_t *dest, int32_t destCapacity,
71 const char16_t *src, int32_t srcLength,
72 icu::Edits *edits,
73 UErrorCode &errorCode);
74
76
77class Locale; // unicode/locid.h
78class StringCharacterIterator;
79class UnicodeStringAppendable; // unicode/appendable.h
80
81/* The <iostream> include has been moved to unicode/ustream.h */
82
93#define US_INV icu::UnicodeString::kInvariant
94
112#if !U_CHAR16_IS_TYPEDEF
113# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length)
114#else
115# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const char16_t*)u ## cs, _length)
116#endif
117
131#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
132
140#ifndef UNISTR_FROM_CHAR_EXPLICIT
141# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
142 // Auto-"explicit" in ICU library code.
143# define UNISTR_FROM_CHAR_EXPLICIT explicit
144# else
145 // Empty by default for source code compatibility.
146# define UNISTR_FROM_CHAR_EXPLICIT
147# endif
148#endif
149
160#ifndef UNISTR_FROM_STRING_EXPLICIT
161# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
162 // Auto-"explicit" in ICU library code.
163# define UNISTR_FROM_STRING_EXPLICIT explicit
164# else
165 // Empty by default for source code compatibility.
166# define UNISTR_FROM_STRING_EXPLICIT
167# endif
168#endif
169
203#ifndef UNISTR_OBJECT_SIZE
204# define UNISTR_OBJECT_SIZE 64
205#endif
206
287{
288public:
289
303 kInvariant
304 };
305
306 //========================================
307 // Read-only operations
308 //========================================
309
310 /* Comparison - bitwise only - for international comparison use collation */
311
319 inline UBool operator== (const UnicodeString& text) const;
320
328 inline UBool operator!= (const UnicodeString& text) const;
329
337 inline UBool operator> (const UnicodeString& text) const;
338
346 inline UBool operator< (const UnicodeString& text) const;
347
355 inline UBool operator>= (const UnicodeString& text) const;
356
364 inline UBool operator<= (const UnicodeString& text) const;
365
377 inline int8_t compare(const UnicodeString& text) const;
378
394 inline int8_t compare(int32_t start,
395 int32_t length,
396 const UnicodeString& text) const;
397
415 inline int8_t compare(int32_t start,
416 int32_t length,
417 const UnicodeString& srcText,
418 int32_t srcStart,
419 int32_t srcLength) const;
420
433 inline int8_t compare(ConstChar16Ptr srcChars,
434 int32_t srcLength) const;
435
450 inline int8_t compare(int32_t start,
451 int32_t length,
452 const char16_t *srcChars) const;
453
471 inline int8_t compare(int32_t start,
472 int32_t length,
473 const char16_t *srcChars,
474 int32_t srcStart,
475 int32_t srcLength) const;
476
494 inline int8_t compareBetween(int32_t start,
495 int32_t limit,
496 const UnicodeString& srcText,
497 int32_t srcStart,
498 int32_t srcLimit) const;
499
517 inline int8_t compareCodePointOrder(const UnicodeString& text) const;
518
538 inline int8_t compareCodePointOrder(int32_t start,
539 int32_t length,
540 const UnicodeString& srcText) const;
541
563 inline int8_t compareCodePointOrder(int32_t start,
564 int32_t length,
565 const UnicodeString& srcText,
566 int32_t srcStart,
567 int32_t srcLength) const;
568
587 inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
588 int32_t srcLength) const;
589
609 inline int8_t compareCodePointOrder(int32_t start,
610 int32_t length,
611 const char16_t *srcChars) const;
612
634 inline int8_t compareCodePointOrder(int32_t start,
635 int32_t length,
636 const char16_t *srcChars,
637 int32_t srcStart,
638 int32_t srcLength) const;
639
661 inline int8_t compareCodePointOrderBetween(int32_t start,
662 int32_t limit,
663 const UnicodeString& srcText,
664 int32_t srcStart,
665 int32_t srcLimit) const;
666
685 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
686
707 inline int8_t caseCompare(int32_t start,
708 int32_t length,
709 const UnicodeString& srcText,
710 uint32_t options) const;
711
734 inline int8_t caseCompare(int32_t start,
735 int32_t length,
736 const UnicodeString& srcText,
737 int32_t srcStart,
738 int32_t srcLength,
739 uint32_t options) const;
740
760 inline int8_t caseCompare(ConstChar16Ptr srcChars,
761 int32_t srcLength,
762 uint32_t options) const;
763
784 inline int8_t caseCompare(int32_t start,
785 int32_t length,
786 const char16_t *srcChars,
787 uint32_t options) const;
788
811 inline int8_t caseCompare(int32_t start,
812 int32_t length,
813 const char16_t *srcChars,
814 int32_t srcStart,
815 int32_t srcLength,
816 uint32_t options) const;
817
840 inline int8_t caseCompareBetween(int32_t start,
841 int32_t limit,
842 const UnicodeString& srcText,
843 int32_t srcStart,
844 int32_t srcLimit,
845 uint32_t options) const;
846
854 inline UBool startsWith(const UnicodeString& text) const;
855
866 inline UBool startsWith(const UnicodeString& srcText,
867 int32_t srcStart,
868 int32_t srcLength) const;
869
878 inline UBool startsWith(ConstChar16Ptr srcChars,
879 int32_t srcLength) const;
880
890 inline UBool startsWith(const char16_t *srcChars,
891 int32_t srcStart,
892 int32_t srcLength) const;
893
901 inline UBool endsWith(const UnicodeString& text) const;
902
913 inline UBool endsWith(const UnicodeString& srcText,
914 int32_t srcStart,
915 int32_t srcLength) const;
916
925 inline UBool endsWith(ConstChar16Ptr srcChars,
926 int32_t srcLength) const;
927
938 inline UBool endsWith(const char16_t *srcChars,
939 int32_t srcStart,
940 int32_t srcLength) const;
941
942
943 /* Searching - bitwise only */
944
953 inline int32_t indexOf(const UnicodeString& text) const;
954
964 inline int32_t indexOf(const UnicodeString& text,
965 int32_t start) const;
966
978 inline int32_t indexOf(const UnicodeString& text,
979 int32_t start,
980 int32_t length) const;
981
998 inline int32_t indexOf(const UnicodeString& srcText,
999 int32_t srcStart,
1000 int32_t srcLength,
1001 int32_t start,
1002 int32_t length) const;
1003
1015 inline int32_t indexOf(const char16_t *srcChars,
1016 int32_t srcLength,
1017 int32_t start) const;
1018
1031 inline int32_t indexOf(ConstChar16Ptr srcChars,
1032 int32_t srcLength,
1033 int32_t start,
1034 int32_t length) const;
1035
1052 int32_t indexOf(const char16_t *srcChars,
1053 int32_t srcStart,
1054 int32_t srcLength,
1055 int32_t start,
1056 int32_t length) const;
1057
1065 inline int32_t indexOf(char16_t c) const;
1066
1075 inline int32_t indexOf(UChar32 c) const;
1076
1085 inline int32_t indexOf(char16_t c,
1086 int32_t start) const;
1087
1097 inline int32_t indexOf(UChar32 c,
1098 int32_t start) const;
1099
1110 inline int32_t indexOf(char16_t c,
1111 int32_t start,
1112 int32_t length) const;
1113
1125 inline int32_t indexOf(UChar32 c,
1126 int32_t start,
1127 int32_t length) const;
1128
1137 inline int32_t lastIndexOf(const UnicodeString& text) const;
1138
1148 inline int32_t lastIndexOf(const UnicodeString& text,
1149 int32_t start) const;
1150
1162 inline int32_t lastIndexOf(const UnicodeString& text,
1163 int32_t start,
1164 int32_t length) const;
1165
1182 inline int32_t lastIndexOf(const UnicodeString& srcText,
1183 int32_t srcStart,
1184 int32_t srcLength,
1185 int32_t start,
1186 int32_t length) const;
1187
1198 inline int32_t lastIndexOf(const char16_t *srcChars,
1199 int32_t srcLength,
1200 int32_t start) const;
1201
1214 inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1215 int32_t srcLength,
1216 int32_t start,
1217 int32_t length) const;
1218
1235 int32_t lastIndexOf(const char16_t *srcChars,
1236 int32_t srcStart,
1237 int32_t srcLength,
1238 int32_t start,
1239 int32_t length) const;
1240
1248 inline int32_t lastIndexOf(char16_t c) const;
1249
1258 inline int32_t lastIndexOf(UChar32 c) const;
1259
1268 inline int32_t lastIndexOf(char16_t c,
1269 int32_t start) const;
1270
1280 inline int32_t lastIndexOf(UChar32 c,
1281 int32_t start) const;
1282
1293 inline int32_t lastIndexOf(char16_t c,
1294 int32_t start,
1295 int32_t length) const;
1296
1308 inline int32_t lastIndexOf(UChar32 c,
1309 int32_t start,
1310 int32_t length) const;
1311
1312
1313 /* Character access */
1314
1323 inline char16_t charAt(int32_t offset) const;
1324
1332 inline char16_t operator[] (int32_t offset) const;
1333
1345 UChar32 char32At(int32_t offset) const;
1346
1362 int32_t getChar32Start(int32_t offset) const;
1363
1380 int32_t getChar32Limit(int32_t offset) const;
1381
1432 int32_t moveIndex32(int32_t index, int32_t delta) const;
1433
1434 /* Substring extraction */
1435
1451 inline void extract(int32_t start,
1452 int32_t length,
1453 Char16Ptr dst,
1454 int32_t dstStart = 0) const;
1455
1477 int32_t
1479 UErrorCode &errorCode) const;
1480
1491 inline void extract(int32_t start,
1492 int32_t length,
1493 UnicodeString& target) const;
1494
1506 inline void extractBetween(int32_t start,
1507 int32_t limit,
1508 char16_t *dst,
1509 int32_t dstStart = 0) const;
1510
1520 virtual void extractBetween(int32_t start,
1521 int32_t limit,
1522 UnicodeString& target) const;
1523
1545 int32_t extract(int32_t start,
1546 int32_t startLength,
1547 char *target,
1548 int32_t targetCapacity,
1549 enum EInvariant inv) const;
1550
1551#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1552
1572 int32_t extract(int32_t start,
1573 int32_t startLength,
1574 char *target,
1575 uint32_t targetLength) const;
1576
1577#endif
1578
1579#if !UCONFIG_NO_CONVERSION
1580
1606 inline int32_t extract(int32_t start,
1607 int32_t startLength,
1608 char *target,
1609 const char *codepage = 0) const;
1610
1640 int32_t extract(int32_t start,
1641 int32_t startLength,
1642 char *target,
1643 uint32_t targetLength,
1644 const char *codepage) const;
1645
1663 int32_t extract(char *dest, int32_t destCapacity,
1664 UConverter *cnv,
1665 UErrorCode &errorCode) const;
1666
1667#endif
1668
1682 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1683
1694 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1695
1707 void toUTF8(ByteSink &sink) const;
1708
1721 template<typename StringClass>
1724 toUTF8(sbs);
1725 return result;
1726 }
1727
1743 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1744
1745 /* Length operations */
1746
1755 inline int32_t length(void) const;
1756
1770 int32_t
1771 countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1772
1796 UBool
1797 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1798
1804 inline UBool isEmpty(void) const;
1805
1815 inline int32_t getCapacity(void) const;
1816
1817 /* Other operations */
1818
1824 inline int32_t hashCode(void) const;
1825
1838 inline UBool isBogus(void) const;
1839
1840
1841 //========================================
1842 // Write operations
1843 //========================================
1844
1845 /* Assignment operations */
1846
1866
1893
1903 return moveFrom(src);
1904 }
1905
1906 // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
1918
1925
1932 friend U_COMMON_API inline void U_EXPORT2
1936
1944 inline UnicodeString& operator= (char16_t ch);
1945
1954
1966 inline UnicodeString& setTo(const UnicodeString& srcText,
1967 int32_t srcStart);
1968
1982 inline UnicodeString& setTo(const UnicodeString& srcText,
1983 int32_t srcStart,
1984 int32_t srcLength);
1985
1994 inline UnicodeString& setTo(const UnicodeString& srcText);
1995
2004 inline UnicodeString& setTo(const char16_t *srcChars,
2005 int32_t srcLength);
2006
2015 UnicodeString& setTo(char16_t srcChar);
2016
2026
2051 ConstChar16Ptr text,
2052 int32_t textLength);
2053
2073 UnicodeString &setTo(char16_t *buffer,
2074 int32_t buffLength,
2075 int32_t buffCapacity);
2076
2118
2126 UnicodeString& setCharAt(int32_t offset,
2127 char16_t ch);
2128
2129
2130 /* Append operations */
2131
2139 inline UnicodeString& operator+= (char16_t ch);
2140
2149
2158
2173 inline UnicodeString& append(const UnicodeString& srcText,
2174 int32_t srcStart,
2175 int32_t srcLength);
2176
2184 inline UnicodeString& append(const UnicodeString& srcText);
2185
2199 inline UnicodeString& append(const char16_t *srcChars,
2200 int32_t srcStart,
2201 int32_t srcLength);
2202
2212 inline UnicodeString& append(ConstChar16Ptr srcChars,
2213 int32_t srcLength);
2214
2221 inline UnicodeString& append(char16_t srcChar);
2222
2230
2231
2232 /* Insert operations */
2233
2247 inline UnicodeString& insert(int32_t start,
2248 const UnicodeString& srcText,
2249 int32_t srcStart,
2250 int32_t srcLength);
2251
2260 inline UnicodeString& insert(int32_t start,
2261 const UnicodeString& srcText);
2262
2276 inline UnicodeString& insert(int32_t start,
2277 const char16_t *srcChars,
2278 int32_t srcStart,
2279 int32_t srcLength);
2280
2290 inline UnicodeString& insert(int32_t start,
2292 int32_t srcLength);
2293
2302 inline UnicodeString& insert(int32_t start,
2303 char16_t srcChar);
2304
2313 inline UnicodeString& insert(int32_t start,
2315
2316
2317 /* Replace operations */
2318
2336 UnicodeString& replace(int32_t start,
2337 int32_t length,
2338 const UnicodeString& srcText,
2339 int32_t srcStart,
2340 int32_t srcLength);
2341
2354 UnicodeString& replace(int32_t start,
2355 int32_t length,
2356 const UnicodeString& srcText);
2357
2375 UnicodeString& replace(int32_t start,
2376 int32_t length,
2377 const char16_t *srcChars,
2378 int32_t srcStart,
2379 int32_t srcLength);
2380
2393 inline UnicodeString& replace(int32_t start,
2394 int32_t length,
2396 int32_t srcLength);
2397
2409 inline UnicodeString& replace(int32_t start,
2410 int32_t length,
2411 char16_t srcChar);
2412
2424 UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2425
2435 inline UnicodeString& replaceBetween(int32_t start,
2436 int32_t limit,
2437 const UnicodeString& srcText);
2438
2453 inline UnicodeString& replaceBetween(int32_t start,
2454 int32_t limit,
2455 const UnicodeString& srcText,
2456 int32_t srcStart,
2457 int32_t srcLimit);
2458
2469 virtual void handleReplaceBetween(int32_t start,
2470 int32_t limit,
2471 const UnicodeString& text);
2472
2478 virtual UBool hasMetaData() const;
2479
2495 virtual void copy(int32_t start, int32_t limit, int32_t dest);
2496
2497 /* Search and replace operations */
2498
2507 inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2508 const UnicodeString& newText);
2509
2521 inline UnicodeString& findAndReplace(int32_t start,
2522 int32_t length,
2523 const UnicodeString& oldText,
2524 const UnicodeString& newText);
2525
2544 int32_t length,
2545 const UnicodeString& oldText,
2546 int32_t oldStart,
2547 int32_t oldLength,
2548 const UnicodeString& newText,
2549 int32_t newStart,
2550 int32_t newLength);
2551
2552
2553 /* Remove operations */
2554
2560 inline UnicodeString& remove(void);
2561
2570 inline UnicodeString& remove(int32_t start,
2571 int32_t length = (int32_t)INT32_MAX);
2572
2581 inline UnicodeString& removeBetween(int32_t start,
2582 int32_t limit = (int32_t)INT32_MAX);
2583
2593 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2594
2595 /* Length operations */
2596
2609 char16_t padChar = 0x0020);
2610
2623 char16_t padChar = 0x0020);
2624
2631 inline UBool truncate(int32_t targetLength);
2632
2639
2640
2641 /* Miscellaneous operations */
2642
2648 inline UnicodeString& reverse(void);
2649
2658 inline UnicodeString& reverse(int32_t start,
2659 int32_t length);
2660
2668
2677
2685
2694
2695#if !UCONFIG_NO_BREAK_ITERATION
2696
2724
2753
2786
2787#endif
2788
2802 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2803
2804 //========================================
2805 // Access to the internal buffer
2806 //========================================
2807
2851 char16_t *getBuffer(int32_t minCapacity);
2852
2873 void releaseBuffer(int32_t newLength=-1);
2874
2905 inline const char16_t *getBuffer() const;
2906
2940 const char16_t *getTerminatedBuffer();
2941
2942 //========================================
2943 // Constructors
2944 //========================================
2945
2949 inline UnicodeString();
2950
2962 UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2963
2974
2985
2997
2998#if !U_CHAR16_IS_TYPEDEF
3011#endif
3012
3013#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3027#endif
3028
3039 UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3040
3048 UnicodeString(const char16_t *text,
3049 int32_t textLength);
3050
3051#if !U_CHAR16_IS_TYPEDEF
3059 UnicodeString(const uint16_t *text, int32_t length) :
3060 UnicodeString(ConstChar16Ptr(text), length) {}
3061#endif
3062
3063#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3072 UnicodeString(const wchar_t *text, int32_t length) :
3073 UnicodeString(ConstChar16Ptr(text), length) {}
3074#endif
3075
3083 inline UnicodeString(const std::nullptr_t text, int32_t length);
3084
3108 ConstChar16Ptr text,
3109 int32_t textLength);
3110
3129 UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
3130
3131#if !U_CHAR16_IS_TYPEDEF
3140 UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3142#endif
3143
3144#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3154 UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3156#endif
3157
3166 inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3167
3168#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
3169
3190
3200
3201#endif
3202
3203#if !UCONFIG_NO_CONVERSION
3204
3222 UnicodeString(const char *codepageData, const char *codepage);
3223
3241 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3242
3265 const char *src, int32_t srcLength,
3266 UConverter *cnv,
3267 UErrorCode &errorCode);
3268
3269#endif
3270
3295 UnicodeString(const char *src, int32_t length, enum EInvariant inv);
3296
3297
3315
3323
3331
3340
3357 virtual Replaceable *clone() const;
3358
3363
3378
3390 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3391
3392 /* Miscellaneous operations */
3393
3429
3449 UChar32 unescapeAt(int32_t &offset) const;
3450
3457
3464
3465 //========================================
3466 // Implementation methods
3467 //========================================
3468
3469protected:
3474 virtual int32_t getLength() const;
3475
3481 virtual char16_t getCharAt(int32_t offset) const;
3482
3488 virtual UChar32 getChar32At(int32_t offset) const;
3489
3490private:
3491 // For char* constructors. Could be made public.
3492 UnicodeString &setToUTF8(StringPiece utf8);
3493 // For extract(char*).
3494 // We could make a toUTF8(target, capacity, errorCode) public but not
3495 // this version: New API will be cleaner if we make callers create substrings
3496 // rather than having start+length on every method,
3497 // and it should take a UErrorCode&.
3498 int32_t
3499 toUTF8(int32_t start, int32_t len,
3500 char *target, int32_t capacity) const;
3501
3506 UBool doEquals(const UnicodeString &text, int32_t len) const;
3507
3508 inline int8_t
3509 doCompare(int32_t start,
3510 int32_t length,
3511 const UnicodeString& srcText,
3512 int32_t srcStart,
3513 int32_t srcLength) const;
3514
3515 int8_t doCompare(int32_t start,
3516 int32_t length,
3517 const char16_t *srcChars,
3518 int32_t srcStart,
3519 int32_t srcLength) const;
3520
3521 inline int8_t
3522 doCompareCodePointOrder(int32_t start,
3523 int32_t length,
3524 const UnicodeString& srcText,
3525 int32_t srcStart,
3526 int32_t srcLength) const;
3527
3528 int8_t doCompareCodePointOrder(int32_t start,
3529 int32_t length,
3530 const char16_t *srcChars,
3531 int32_t srcStart,
3532 int32_t srcLength) const;
3533
3534 inline int8_t
3535 doCaseCompare(int32_t start,
3536 int32_t length,
3537 const UnicodeString &srcText,
3538 int32_t srcStart,
3539 int32_t srcLength,
3540 uint32_t options) const;
3541
3542 int8_t
3543 doCaseCompare(int32_t start,
3544 int32_t length,
3545 const char16_t *srcChars,
3546 int32_t srcStart,
3547 int32_t srcLength,
3548 uint32_t options) const;
3549
3550 int32_t doIndexOf(char16_t c,
3551 int32_t start,
3552 int32_t length) const;
3553
3554 int32_t doIndexOf(UChar32 c,
3555 int32_t start,
3556 int32_t length) const;
3557
3558 int32_t doLastIndexOf(char16_t c,
3559 int32_t start,
3560 int32_t length) const;
3561
3562 int32_t doLastIndexOf(UChar32 c,
3563 int32_t start,
3564 int32_t length) const;
3565
3566 void doExtract(int32_t start,
3567 int32_t length,
3568 char16_t *dst,
3569 int32_t dstStart) const;
3570
3571 inline void doExtract(int32_t start,
3572 int32_t length,
3573 UnicodeString& target) const;
3574
3575 inline char16_t doCharAt(int32_t offset) const;
3576
3577 UnicodeString& doReplace(int32_t start,
3578 int32_t length,
3579 const UnicodeString& srcText,
3580 int32_t srcStart,
3581 int32_t srcLength);
3582
3583 UnicodeString& doReplace(int32_t start,
3584 int32_t length,
3585 const char16_t *srcChars,
3586 int32_t srcStart,
3587 int32_t srcLength);
3588
3589 UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3590 UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3591
3592 UnicodeString& doReverse(int32_t start,
3593 int32_t length);
3594
3595 // calculate hash code
3596 int32_t doHashCode(void) const;
3597
3598 // get pointer to start of array
3599 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3600 inline char16_t* getArrayStart(void);
3601 inline const char16_t* getArrayStart(void) const;
3602
3603 inline UBool hasShortLength() const;
3604 inline int32_t getShortLength() const;
3605
3606 // A UnicodeString object (not necessarily its current buffer)
3607 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3608 inline UBool isWritable() const;
3609
3610 // Is the current buffer writable?
3611 inline UBool isBufferWritable() const;
3612
3613 // None of the following does releaseArray().
3614 inline void setZeroLength();
3615 inline void setShortLength(int32_t len);
3616 inline void setLength(int32_t len);
3617 inline void setToEmpty();
3618 inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
3619
3620 // allocate the array; result may be the stack buffer
3621 // sets refCount to 1 if appropriate
3622 // sets fArray, fCapacity, and flags
3623 // sets length to 0
3624 // returns boolean for success or failure
3625 UBool allocate(int32_t capacity);
3626
3627 // release the array if owned
3628 void releaseArray(void);
3629
3630 // turn a bogus string into an empty one
3631 void unBogus();
3632
3633 // implements assigment operator, copy constructor, and fastCopyFrom()
3635
3636 // Copies just the fields without memory management.
3637 void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;
3638
3639 // Pin start and limit to acceptable values.
3640 inline void pinIndex(int32_t& start) const;
3641 inline void pinIndices(int32_t& start,
3642 int32_t& length) const;
3643
3644#if !UCONFIG_NO_CONVERSION
3645
3646 /* Internal extract() using UConverter. */
3647 int32_t doExtract(int32_t start, int32_t length,
3648 char *dest, int32_t destCapacity,
3649 UConverter *cnv,
3650 UErrorCode &errorCode) const;
3651
3652 /*
3653 * Real constructor for converting from codepage data.
3654 * It assumes that it is called with !fRefCounted.
3655 *
3656 * If <code>codepage==0</code>, then the default converter
3657 * is used for the platform encoding.
3658 * If <code>codepage</code> is an empty string (<code>""</code>),
3659 * then a simple conversion is performed on the codepage-invariant
3660 * subset ("invariant characters") of the platform encoding. See utypes.h.
3661 */
3662 void doCodepageCreate(const char *codepageData,
3663 int32_t dataLength,
3664 const char *codepage);
3665
3666 /*
3667 * Worker function for creating a UnicodeString from
3668 * a codepage string using a UConverter.
3669 */
3670 void
3671 doCodepageCreate(const char *codepageData,
3672 int32_t dataLength,
3673 UConverter *converter,
3675
3676#endif
3677
3678 /*
3679 * This function is called when write access to the array
3680 * is necessary.
3681 *
3682 * We need to make a copy of the array if
3683 * the buffer is read-only, or
3684 * the buffer is refCounted (shared), and refCount>1, or
3685 * the buffer is too small.
3686 *
3687 * Return FALSE if memory could not be allocated.
3688 */
3689 UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3690 int32_t growCapacity = -1,
3692 int32_t **pBufferToDelete = 0,
3694
3701 caseMap(int32_t caseLocale, uint32_t options,
3704#endif
3706
3707 // ref counting
3708 void addRef(void);
3709 int32_t removeRef(void);
3710 int32_t refCount(void) const;
3711
3712 // constants
3713 enum {
3719 US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
3720 kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
3721 kInvalidHashCode=0, // invalid hash code
3722 kEmptyHashCode=1, // hash code for empty string
3723
3724 // bit flag values for fLengthAndFlags
3725 kIsBogus=1, // this string is bogus, i.e., not valid or NULL
3726 kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
3727 kRefCounted=4, // there is a refCount field before the characters in fArray
3728 kBufferIsReadonly=8,// do not write to this buffer
3729 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
3730 // and releaseBuffer(newLength) must be called
3731 kAllStorageFlags=0x1f,
3732
3733 kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
3734 kLength1=1<<kLengthShift,
3735 kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0)
3736 kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength
3737
3738 // combined values for convenience
3739 kShortString=kUsingStackBuffer,
3740 kLongString=kRefCounted,
3741 kReadonlyAlias=kBufferIsReadonly,
3742 kWritableAlias=0
3743 };
3744
3745 friend class UnicodeStringAppendable;
3746
3747 union StackBufferOrFields; // forward declaration necessary before friend declaration
3748 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3749
3750 /*
3751 * The following are all the class fields that are stored
3752 * in each UnicodeString object.
3753 * Note that UnicodeString has virtual functions,
3754 * therefore there is an implicit vtable pointer
3755 * as the first real field.
3756 * The fields should be aligned such that no padding is necessary.
3757 * On 32-bit machines, the size should be 32 bytes,
3758 * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3759 *
3760 * We use a hack to achieve this.
3761 *
3762 * With at least some compilers, each of the following is forced to
3763 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3764 * rounded up with additional padding if the fields do not already fit that requirement:
3765 * - sizeof(class UnicodeString)
3766 * - offsetof(UnicodeString, fUnion)
3767 * - sizeof(fUnion)
3768 * - sizeof(fStackFields)
3769 *
3770 * We optimize for the longest possible internal buffer for short strings.
3771 * fUnion.fStackFields begins with 2 bytes for storage flags
3772 * and the length of relatively short strings,
3773 * followed by the buffer for short string contents.
3774 * There is no padding inside fStackFields.
3775 *
3776 * Heap-allocated and aliased strings use fUnion.fFields.
3777 * Both fStackFields and fFields must begin with the same fields for flags and short length,
3778 * that is, those must have the same memory offsets inside the object,
3779 * because the flags must be inspected in order to decide which half of fUnion is being used.
3780 * We assume that the compiler does not reorder the fields.
3781 *
3782 * (Padding at the end of fFields is ok:
3783 * As long as it is no larger than fStackFields, it is not wasted space.)
3784 *
3785 * For some of the history of the UnicodeString class fields layout, see
3786 * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
3787 * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
3788 * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
3789 */
3790 // (implicit) *vtable;
3791 union StackBufferOrFields {
3792 // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
3793 // Each struct of the union must begin with fLengthAndFlags.
3794 struct {
3795 int16_t fLengthAndFlags; // bit fields: see constants above
3796 char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
3797 } fStackFields;
3798 struct {
3799 int16_t fLengthAndFlags; // bit fields: see constants above
3800 int32_t fLength; // number of characters in fArray if >127; else undefined
3801 int32_t fCapacity; // capacity of fArray (in char16_ts)
3802 // array pointer last to minimize padding for machines with P128 data model
3803 // or pointer sizes that are not a power of 2
3804 char16_t *fArray; // the Unicode data
3805 } fFields;
3806 } fUnion;
3807};
3808
3817U_COMMON_API UnicodeString U_EXPORT2
3819
3820//========================================
3821// Inline members
3822//========================================
3823
3824//========================================
3825// Privates
3826//========================================
3827
3828inline void
3829UnicodeString::pinIndex(int32_t& start) const
3830{
3831 // pin index
3832 if(start < 0) {
3833 start = 0;
3834 } else if(start > length()) {
3835 start = length();
3836 }
3837}
3838
3839inline void
3840UnicodeString::pinIndices(int32_t& start,
3841 int32_t& _length) const
3842{
3843 // pin indices
3844 int32_t len = length();
3845 if(start < 0) {
3846 start = 0;
3847 } else if(start > len) {
3848 start = len;
3849 }
3850 if(_length < 0) {
3851 _length = 0;
3852 } else if(_length > (len - start)) {
3853 _length = (len - start);
3854 }
3855}
3856
3857inline char16_t*
3858UnicodeString::getArrayStart() {
3859 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3860 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3861}
3862
3863inline const char16_t*
3864UnicodeString::getArrayStart() const {
3865 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3866 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3867}
3868
3869//========================================
3870// Default constructor
3871//========================================
3872
3873inline
3874UnicodeString::UnicodeString() {
3875 fUnion.fStackFields.fLengthAndFlags=kShortString;
3876}
3877
3878inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
3879 fUnion.fStackFields.fLengthAndFlags=kShortString;
3880}
3881
3882inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
3883 fUnion.fStackFields.fLengthAndFlags=kShortString;
3884}
3885
3886inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
3887 fUnion.fStackFields.fLengthAndFlags=kShortString;
3888}
3889
3890//========================================
3891// Read-only implementation methods
3892//========================================
3893inline UBool
3894UnicodeString::hasShortLength() const {
3895 return fUnion.fFields.fLengthAndFlags>=0;
3896}
3897
3898inline int32_t
3899UnicodeString::getShortLength() const {
3900 // fLengthAndFlags must be non-negative -> short length >= 0
3901 // and arithmetic or logical shift does not matter.
3902 return fUnion.fFields.fLengthAndFlags>>kLengthShift;
3903}
3904
3905inline int32_t
3906UnicodeString::length() const {
3907 return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
3908}
3909
3910inline int32_t
3911UnicodeString::getCapacity() const {
3912 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3913 US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
3914}
3915
3916inline int32_t
3917UnicodeString::hashCode() const
3918{ return doHashCode(); }
3919
3920inline UBool
3921UnicodeString::isBogus() const
3922{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
3923
3924inline UBool
3925UnicodeString::isWritable() const
3926{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); }
3927
3928inline UBool
3929UnicodeString::isBufferWritable() const
3930{
3931 return (UBool)(
3932 !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3933 (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1));
3934}
3935
3936inline const char16_t *
3937UnicodeString::getBuffer() const {
3938 if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
3939 return nullptr;
3940 } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
3941 return fUnion.fStackFields.fBuffer;
3942 } else {
3943 return fUnion.fFields.fArray;
3944 }
3945}
3946
3947//========================================
3948// Read-only alias methods
3949//========================================
3950inline int8_t
3951UnicodeString::doCompare(int32_t start,
3952 int32_t thisLength,
3953 const UnicodeString& srcText,
3954 int32_t srcStart,
3955 int32_t srcLength) const
3956{
3957 if(srcText.isBogus()) {
3958 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3959 } else {
3960 srcText.pinIndices(srcStart, srcLength);
3961 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3962 }
3963}
3964
3965inline UBool
3966UnicodeString::operator== (const UnicodeString& text) const
3967{
3968 if(isBogus()) {
3969 return text.isBogus();
3970 } else {
3971 int32_t len = length(), textLength = text.length();
3972 return !text.isBogus() && len == textLength && doEquals(text, len);
3973 }
3974}
3975
3976inline UBool
3977UnicodeString::operator!= (const UnicodeString& text) const
3978{ return (! operator==(text)); }
3979
3980inline UBool
3981UnicodeString::operator> (const UnicodeString& text) const
3982{ return doCompare(0, length(), text, 0, text.length()) == 1; }
3983
3984inline UBool
3985UnicodeString::operator< (const UnicodeString& text) const
3986{ return doCompare(0, length(), text, 0, text.length()) == -1; }
3987
3988inline UBool
3989UnicodeString::operator>= (const UnicodeString& text) const
3990{ return doCompare(0, length(), text, 0, text.length()) != -1; }
3991
3992inline UBool
3993UnicodeString::operator<= (const UnicodeString& text) const
3994{ return doCompare(0, length(), text, 0, text.length()) != 1; }
3995
3996inline int8_t
3997UnicodeString::compare(const UnicodeString& text) const
3998{ return doCompare(0, length(), text, 0, text.length()); }
3999
4000inline int8_t
4001UnicodeString::compare(int32_t start,
4002 int32_t _length,
4003 const UnicodeString& srcText) const
4004{ return doCompare(start, _length, srcText, 0, srcText.length()); }
4005
4006inline int8_t
4007UnicodeString::compare(ConstChar16Ptr srcChars,
4008 int32_t srcLength) const
4009{ return doCompare(0, length(), srcChars, 0, srcLength); }
4010
4011inline int8_t
4012UnicodeString::compare(int32_t start,
4013 int32_t _length,
4014 const UnicodeString& srcText,
4015 int32_t srcStart,
4016 int32_t srcLength) const
4017{ return doCompare(start, _length, srcText, srcStart, srcLength); }
4018
4019inline int8_t
4020UnicodeString::compare(int32_t start,
4021 int32_t _length,
4022 const char16_t *srcChars) const
4023{ return doCompare(start, _length, srcChars, 0, _length); }
4024
4025inline int8_t
4026UnicodeString::compare(int32_t start,
4027 int32_t _length,
4028 const char16_t *srcChars,
4029 int32_t srcStart,
4030 int32_t srcLength) const
4031{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
4032
4033inline int8_t
4034UnicodeString::compareBetween(int32_t start,
4035 int32_t limit,
4036 const UnicodeString& srcText,
4037 int32_t srcStart,
4038 int32_t srcLimit) const
4039{ return doCompare(start, limit - start,
4041
4042inline int8_t
4043UnicodeString::doCompareCodePointOrder(int32_t start,
4044 int32_t thisLength,
4045 const UnicodeString& srcText,
4046 int32_t srcStart,
4047 int32_t srcLength) const
4048{
4049 if(srcText.isBogus()) {
4050 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4051 } else {
4052 srcText.pinIndices(srcStart, srcLength);
4053 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4054 }
4055}
4056
4057inline int8_t
4058UnicodeString::compareCodePointOrder(const UnicodeString& text) const
4059{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
4060
4061inline int8_t
4062UnicodeString::compareCodePointOrder(int32_t start,
4063 int32_t _length,
4064 const UnicodeString& srcText) const
4065{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
4066
4067inline int8_t
4068UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
4069 int32_t srcLength) const
4070{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
4071
4072inline int8_t
4073UnicodeString::compareCodePointOrder(int32_t start,
4074 int32_t _length,
4075 const UnicodeString& srcText,
4076 int32_t srcStart,
4077 int32_t srcLength) const
4078{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4079
4080inline int8_t
4081UnicodeString::compareCodePointOrder(int32_t start,
4082 int32_t _length,
4083 const char16_t *srcChars) const
4084{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
4085
4086inline int8_t
4087UnicodeString::compareCodePointOrder(int32_t start,
4088 int32_t _length,
4089 const char16_t *srcChars,
4090 int32_t srcStart,
4091 int32_t srcLength) const
4092{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4093
4094inline int8_t
4095UnicodeString::compareCodePointOrderBetween(int32_t start,
4096 int32_t limit,
4097 const UnicodeString& srcText,
4098 int32_t srcStart,
4099 int32_t srcLimit) const
4100{ return doCompareCodePointOrder(start, limit - start,
4102
4103inline int8_t
4104UnicodeString::doCaseCompare(int32_t start,
4105 int32_t thisLength,
4106 const UnicodeString &srcText,
4107 int32_t srcStart,
4108 int32_t srcLength,
4109 uint32_t options) const
4110{
4111 if(srcText.isBogus()) {
4112 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4113 } else {
4114 srcText.pinIndices(srcStart, srcLength);
4115 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4116 }
4117}
4118
4119inline int8_t
4120UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4121 return doCaseCompare(0, length(), text, 0, text.length(), options);
4122}
4123
4124inline int8_t
4125UnicodeString::caseCompare(int32_t start,
4126 int32_t _length,
4127 const UnicodeString &srcText,
4128 uint32_t options) const {
4129 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
4130}
4131
4132inline int8_t
4133UnicodeString::caseCompare(ConstChar16Ptr srcChars,
4134 int32_t srcLength,
4135 uint32_t options) const {
4136 return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
4137}
4138
4139inline int8_t
4140UnicodeString::caseCompare(int32_t start,
4141 int32_t _length,
4142 const UnicodeString &srcText,
4143 int32_t srcStart,
4144 int32_t srcLength,
4145 uint32_t options) const {
4146 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4147}
4148
4149inline int8_t
4150UnicodeString::caseCompare(int32_t start,
4151 int32_t _length,
4152 const char16_t *srcChars,
4153 uint32_t options) const {
4154 return doCaseCompare(start, _length, srcChars, 0, _length, options);
4155}
4156
4157inline int8_t
4158UnicodeString::caseCompare(int32_t start,
4159 int32_t _length,
4160 const char16_t *srcChars,
4161 int32_t srcStart,
4162 int32_t srcLength,
4163 uint32_t options) const {
4164 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4165}
4166
4167inline int8_t
4168UnicodeString::caseCompareBetween(int32_t start,
4169 int32_t limit,
4170 const UnicodeString &srcText,
4171 int32_t srcStart,
4172 int32_t srcLimit,
4173 uint32_t options) const {
4174 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4175}
4176
4177inline int32_t
4178UnicodeString::indexOf(const UnicodeString& srcText,
4179 int32_t srcStart,
4180 int32_t srcLength,
4181 int32_t start,
4182 int32_t _length) const
4183{
4184 if(!srcText.isBogus()) {
4185 srcText.pinIndices(srcStart, srcLength);
4186 if(srcLength > 0) {
4187 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4188 }
4189 }
4190 return -1;
4191}
4192
4193inline int32_t
4194UnicodeString::indexOf(const UnicodeString& text) const
4195{ return indexOf(text, 0, text.length(), 0, length()); }
4196
4197inline int32_t
4198UnicodeString::indexOf(const UnicodeString& text,
4199 int32_t start) const {
4200 pinIndex(start);
4201 return indexOf(text, 0, text.length(), start, length() - start);
4202}
4203
4204inline int32_t
4205UnicodeString::indexOf(const UnicodeString& text,
4206 int32_t start,
4207 int32_t _length) const
4208{ return indexOf(text, 0, text.length(), start, _length); }
4209
4210inline int32_t
4211UnicodeString::indexOf(const char16_t *srcChars,
4212 int32_t srcLength,
4213 int32_t start) const {
4214 pinIndex(start);
4215 return indexOf(srcChars, 0, srcLength, start, length() - start);
4216}
4217
4218inline int32_t
4219UnicodeString::indexOf(ConstChar16Ptr srcChars,
4220 int32_t srcLength,
4221 int32_t start,
4222 int32_t _length) const
4223{ return indexOf(srcChars, 0, srcLength, start, _length); }
4224
4225inline int32_t
4226UnicodeString::indexOf(char16_t c,
4227 int32_t start,
4228 int32_t _length) const
4229{ return doIndexOf(c, start, _length); }
4230
4231inline int32_t
4232UnicodeString::indexOf(UChar32 c,
4233 int32_t start,
4234 int32_t _length) const
4235{ return doIndexOf(c, start, _length); }
4236
4237inline int32_t
4238UnicodeString::indexOf(char16_t c) const
4239{ return doIndexOf(c, 0, length()); }
4240
4241inline int32_t
4242UnicodeString::indexOf(UChar32 c) const
4243{ return indexOf(c, 0, length()); }
4244
4245inline int32_t
4246UnicodeString::indexOf(char16_t c,
4247 int32_t start) const {
4248 pinIndex(start);
4249 return doIndexOf(c, start, length() - start);
4250}
4251
4252inline int32_t
4253UnicodeString::indexOf(UChar32 c,
4254 int32_t start) const {
4255 pinIndex(start);
4256 return indexOf(c, start, length() - start);
4257}
4258
4259inline int32_t
4260UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
4261 int32_t srcLength,
4262 int32_t start,
4263 int32_t _length) const
4264{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
4265
4266inline int32_t
4267UnicodeString::lastIndexOf(const char16_t *srcChars,
4268 int32_t srcLength,
4269 int32_t start) const {
4270 pinIndex(start);
4271 return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
4272}
4273
4274inline int32_t
4275UnicodeString::lastIndexOf(const UnicodeString& srcText,
4276 int32_t srcStart,
4277 int32_t srcLength,
4278 int32_t start,
4279 int32_t _length) const
4280{
4281 if(!srcText.isBogus()) {
4282 srcText.pinIndices(srcStart, srcLength);
4283 if(srcLength > 0) {
4284 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4285 }
4286 }
4287 return -1;
4288}
4289
4290inline int32_t
4291UnicodeString::lastIndexOf(const UnicodeString& text,
4292 int32_t start,
4293 int32_t _length) const
4294{ return lastIndexOf(text, 0, text.length(), start, _length); }
4295
4296inline int32_t
4297UnicodeString::lastIndexOf(const UnicodeString& text,
4298 int32_t start) const {
4299 pinIndex(start);
4300 return lastIndexOf(text, 0, text.length(), start, length() - start);
4301}
4302
4303inline int32_t
4304UnicodeString::lastIndexOf(const UnicodeString& text) const
4305{ return lastIndexOf(text, 0, text.length(), 0, length()); }
4306
4307inline int32_t
4308UnicodeString::lastIndexOf(char16_t c,
4309 int32_t start,
4310 int32_t _length) const
4311{ return doLastIndexOf(c, start, _length); }
4312
4313inline int32_t
4314UnicodeString::lastIndexOf(UChar32 c,
4315 int32_t start,
4316 int32_t _length) const {
4317 return doLastIndexOf(c, start, _length);
4318}
4319
4320inline int32_t
4321UnicodeString::lastIndexOf(char16_t c) const
4322{ return doLastIndexOf(c, 0, length()); }
4323
4324inline int32_t
4325UnicodeString::lastIndexOf(UChar32 c) const {
4326 return lastIndexOf(c, 0, length());
4327}
4328
4329inline int32_t
4330UnicodeString::lastIndexOf(char16_t c,
4331 int32_t start) const {
4332 pinIndex(start);
4333 return doLastIndexOf(c, start, length() - start);
4334}
4335
4336inline int32_t
4337UnicodeString::lastIndexOf(UChar32 c,
4338 int32_t start) const {
4339 pinIndex(start);
4340 return lastIndexOf(c, start, length() - start);
4341}
4342
4343inline UBool
4344UnicodeString::startsWith(const UnicodeString& text) const
4345{ return compare(0, text.length(), text, 0, text.length()) == 0; }
4346
4347inline UBool
4348UnicodeString::startsWith(const UnicodeString& srcText,
4349 int32_t srcStart,
4350 int32_t srcLength) const
4351{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
4352
4353inline UBool
4354UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4355 if(srcLength < 0) {
4356 srcLength = u_strlen(toUCharPtr(srcChars));
4357 }
4358 return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
4359}
4360
4361inline UBool
4362UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
4363 if(srcLength < 0) {
4364 srcLength = u_strlen(toUCharPtr(srcChars));
4365 }
4366 return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
4367}
4368
4369inline UBool
4370UnicodeString::endsWith(const UnicodeString& text) const
4371{ return doCompare(length() - text.length(), text.length(),
4372 text, 0, text.length()) == 0; }
4373
4374inline UBool
4375UnicodeString::endsWith(const UnicodeString& srcText,
4376 int32_t srcStart,
4377 int32_t srcLength) const {
4378 srcText.pinIndices(srcStart, srcLength);
4379 return doCompare(length() - srcLength, srcLength,
4380 srcText, srcStart, srcLength) == 0;
4381}
4382
4383inline UBool
4384UnicodeString::endsWith(ConstChar16Ptr srcChars,
4385 int32_t srcLength) const {
4386 if(srcLength < 0) {
4387 srcLength = u_strlen(toUCharPtr(srcChars));
4388 }
4389 return doCompare(length() - srcLength, srcLength,
4390 srcChars, 0, srcLength) == 0;
4391}
4392
4393inline UBool
4394UnicodeString::endsWith(const char16_t *srcChars,
4395 int32_t srcStart,
4396 int32_t srcLength) const {
4397 if(srcLength < 0) {
4398 srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
4399 }
4400 return doCompare(length() - srcLength, srcLength,
4401 srcChars, srcStart, srcLength) == 0;
4402}
4403
4404//========================================
4405// replace
4406//========================================
4407inline UnicodeString&
4408UnicodeString::replace(int32_t start,
4409 int32_t _length,
4410 const UnicodeString& srcText)
4411{ return doReplace(start, _length, srcText, 0, srcText.length()); }
4412
4413inline UnicodeString&
4414UnicodeString::replace(int32_t start,
4415 int32_t _length,
4416 const UnicodeString& srcText,
4417 int32_t srcStart,
4418 int32_t srcLength)
4419{ return doReplace(start, _length, srcText, srcStart, srcLength); }
4420
4421inline UnicodeString&
4422UnicodeString::replace(int32_t start,
4423 int32_t _length,
4425 int32_t srcLength)
4426{ return doReplace(start, _length, srcChars, 0, srcLength); }
4427
4428inline UnicodeString&
4429UnicodeString::replace(int32_t start,
4430 int32_t _length,
4431 const char16_t *srcChars,
4432 int32_t srcStart,
4433 int32_t srcLength)
4434{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
4435
4436inline UnicodeString&
4437UnicodeString::replace(int32_t start,
4438 int32_t _length,
4439 char16_t srcChar)
4440{ return doReplace(start, _length, &srcChar, 0, 1); }
4441
4442inline UnicodeString&
4443UnicodeString::replaceBetween(int32_t start,
4444 int32_t limit,
4445 const UnicodeString& srcText)
4446{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4447
4448inline UnicodeString&
4449UnicodeString::replaceBetween(int32_t start,
4450 int32_t limit,
4451 const UnicodeString& srcText,
4452 int32_t srcStart,
4453 int32_t srcLimit)
4454{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4455
4456inline UnicodeString&
4457UnicodeString::findAndReplace(const UnicodeString& oldText,
4458 const UnicodeString& newText)
4459{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
4460 newText, 0, newText.length()); }
4461
4462inline UnicodeString&
4463UnicodeString::findAndReplace(int32_t start,
4464 int32_t _length,
4465 const UnicodeString& oldText,
4466 const UnicodeString& newText)
4467{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
4468 newText, 0, newText.length()); }
4469
4470// ============================
4471// extract
4472// ============================
4473inline void
4474UnicodeString::doExtract(int32_t start,
4475 int32_t _length,
4476 UnicodeString& target) const
4477{ target.replace(0, target.length(), *this, start, _length); }
4478
4479inline void
4480UnicodeString::extract(int32_t start,
4481 int32_t _length,
4482 Char16Ptr target,
4483 int32_t targetStart) const
4484{ doExtract(start, _length, target, targetStart); }
4485
4486inline void
4487UnicodeString::extract(int32_t start,
4488 int32_t _length,
4489 UnicodeString& target) const
4490{ doExtract(start, _length, target); }
4491
4492#if !UCONFIG_NO_CONVERSION
4493
4494inline int32_t
4495UnicodeString::extract(int32_t start,
4496 int32_t _length,
4497 char *dst,
4498 const char *codepage) const
4499
4500{
4501 // This dstSize value will be checked explicitly
4502 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4503}
4504
4505#endif
4506
4507inline void
4508UnicodeString::extractBetween(int32_t start,
4509 int32_t limit,
4510 char16_t *dst,
4511 int32_t dstStart) const {
4512 pinIndex(start);
4513 pinIndex(limit);
4514 doExtract(start, limit - start, dst, dstStart);
4515}
4516
4517inline UnicodeString
4518UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4519 return tempSubString(start, limit - start);
4520}
4521
4522inline char16_t
4523UnicodeString::doCharAt(int32_t offset) const
4524{
4525 if((uint32_t)offset < (uint32_t)length()) {
4526 return getArrayStart()[offset];
4527 } else {
4528 return kInvalidUChar;
4529 }
4530}
4531
4532inline char16_t
4533UnicodeString::charAt(int32_t offset) const
4534{ return doCharAt(offset); }
4535
4536inline char16_t
4537UnicodeString::operator[] (int32_t offset) const
4538{ return doCharAt(offset); }
4539
4540inline UBool
4541UnicodeString::isEmpty() const {
4542 // Arithmetic or logical right shift does not matter: only testing for 0.
4543 return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
4544}
4545
4546//========================================
4547// Write implementation methods
4548//========================================
4549inline void
4550UnicodeString::setZeroLength() {
4551 fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4552}
4553
4554inline void
4555UnicodeString::setShortLength(int32_t len) {
4556 // requires 0 <= len <= kMaxShortLength
4557 fUnion.fFields.fLengthAndFlags =
4558 (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
4559}
4560
4561inline void
4562UnicodeString::setLength(int32_t len) {
4563 if(len <= kMaxShortLength) {
4564 setShortLength(len);
4565 } else {
4566 fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
4567 fUnion.fFields.fLength = len;
4568 }
4569}
4570
4571inline void
4572UnicodeString::setToEmpty() {
4573 fUnion.fFields.fLengthAndFlags = kShortString;
4574}
4575
4576inline void
4577UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4578 setLength(len);
4579 fUnion.fFields.fArray = array;
4580 fUnion.fFields.fCapacity = capacity;
4581}
4582
4583inline UnicodeString&
4584UnicodeString::operator= (char16_t ch)
4585{ return doReplace(0, length(), &ch, 0, 1); }
4586
4587inline UnicodeString&
4588UnicodeString::operator= (UChar32 ch)
4589{ return replace(0, length(), ch); }
4590
4591inline UnicodeString&
4592UnicodeString::setTo(const UnicodeString& srcText,
4593 int32_t srcStart,
4594 int32_t srcLength)
4595{
4596 unBogus();
4597 return doReplace(0, length(), srcText, srcStart, srcLength);
4598}
4599
4600inline UnicodeString&
4601UnicodeString::setTo(const UnicodeString& srcText,
4602 int32_t srcStart)
4603{
4604 unBogus();
4605 srcText.pinIndex(srcStart);
4606 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4607}
4608
4609inline UnicodeString&
4610UnicodeString::setTo(const UnicodeString& srcText)
4611{
4612 return copyFrom(srcText);
4613}
4614
4615inline UnicodeString&
4616UnicodeString::setTo(const char16_t *srcChars,
4617 int32_t srcLength)
4618{
4619 unBogus();
4620 return doReplace(0, length(), srcChars, 0, srcLength);
4621}
4622
4623inline UnicodeString&
4624UnicodeString::setTo(char16_t srcChar)
4625{
4626 unBogus();
4627 return doReplace(0, length(), &srcChar, 0, 1);
4628}
4629
4630inline UnicodeString&
4631UnicodeString::setTo(UChar32 srcChar)
4632{
4633 unBogus();
4634 return replace(0, length(), srcChar);
4635}
4636
4637inline UnicodeString&
4638UnicodeString::append(const UnicodeString& srcText,
4639 int32_t srcStart,
4640 int32_t srcLength)
4641{ return doAppend(srcText, srcStart, srcLength); }
4642
4643inline UnicodeString&
4644UnicodeString::append(const UnicodeString& srcText)
4645{ return doAppend(srcText, 0, srcText.length()); }
4646
4647inline UnicodeString&
4648UnicodeString::append(const char16_t *srcChars,
4649 int32_t srcStart,
4650 int32_t srcLength)
4651{ return doAppend(srcChars, srcStart, srcLength); }
4652
4653inline UnicodeString&
4654UnicodeString::append(ConstChar16Ptr srcChars,
4655 int32_t srcLength)
4656{ return doAppend(srcChars, 0, srcLength); }
4657
4658inline UnicodeString&
4659UnicodeString::append(char16_t srcChar)
4660{ return doAppend(&srcChar, 0, 1); }
4661
4662inline UnicodeString&
4663UnicodeString::operator+= (char16_t ch)
4664{ return doAppend(&ch, 0, 1); }
4665
4666inline UnicodeString&
4667UnicodeString::operator+= (UChar32 ch) {
4668 return append(ch);
4669}
4670
4671inline UnicodeString&
4672UnicodeString::operator+= (const UnicodeString& srcText)
4673{ return doAppend(srcText, 0, srcText.length()); }
4674
4675inline UnicodeString&
4676UnicodeString::insert(int32_t start,
4677 const UnicodeString& srcText,
4678 int32_t srcStart,
4679 int32_t srcLength)
4680{ return doReplace(start, 0, srcText, srcStart, srcLength); }
4681
4682inline UnicodeString&
4683UnicodeString::insert(int32_t start,
4684 const UnicodeString& srcText)
4685{ return doReplace(start, 0, srcText, 0, srcText.length()); }
4686
4687inline UnicodeString&
4688UnicodeString::insert(int32_t start,
4689 const char16_t *srcChars,
4690 int32_t srcStart,
4691 int32_t srcLength)
4692{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
4693
4694inline UnicodeString&
4695UnicodeString::insert(int32_t start,
4697 int32_t srcLength)
4698{ return doReplace(start, 0, srcChars, 0, srcLength); }
4699
4700inline UnicodeString&
4701UnicodeString::insert(int32_t start,
4702 char16_t srcChar)
4703{ return doReplace(start, 0, &srcChar, 0, 1); }
4704
4705inline UnicodeString&
4706UnicodeString::insert(int32_t start,
4708{ return replace(start, 0, srcChar); }
4709
4710
4711inline UnicodeString&
4712UnicodeString::remove()
4713{
4714 // remove() of a bogus string makes the string empty and non-bogus
4715 if(isBogus()) {
4716 setToEmpty();
4717 } else {
4718 setZeroLength();
4719 }
4720 return *this;
4721}
4722
4723inline UnicodeString&
4724UnicodeString::remove(int32_t start,
4725 int32_t _length)
4726{
4727 if(start <= 0 && _length == INT32_MAX) {
4728 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4729 return remove();
4730 }
4731 return doReplace(start, _length, NULL, 0, 0);
4732}
4733
4734inline UnicodeString&
4735UnicodeString::removeBetween(int32_t start,
4736 int32_t limit)
4737{ return doReplace(start, limit - start, NULL, 0, 0); }
4738
4739inline UnicodeString &
4740UnicodeString::retainBetween(int32_t start, int32_t limit) {
4741 truncate(limit);
4742 return doReplace(0, start, NULL, 0, 0);
4743}
4744
4745inline UBool
4746UnicodeString::truncate(int32_t targetLength)
4747{
4748 if(isBogus() && targetLength == 0) {
4749 // truncate(0) of a bogus string makes the string empty and non-bogus
4750 unBogus();
4751 return FALSE;
4752 } else if((uint32_t)targetLength < (uint32_t)length()) {
4753 setLength(targetLength);
4754 return TRUE;
4755 } else {
4756 return FALSE;
4757 }
4758}
4759
4760inline UnicodeString&
4761UnicodeString::reverse()
4762{ return doReverse(0, length()); }
4763
4764inline UnicodeString&
4765UnicodeString::reverse(int32_t start,
4766 int32_t _length)
4767{ return doReverse(start, _length); }
4768
4770
4771#endif
C++ API: Interface for writing bytes, and implementation classes.
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types.
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition brkiter.h:102
A ByteSink can be filled with bytes.
Definition bytestream.h:50
char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition char16ptr.h:37
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition char16ptr.h:142
Records lengths of string edits but not replacement text.
Definition edits.h:77
"Smart pointer" base class; do not use directly: use LocalPointer etc.
A Locale object represents a specific geographical, political, or cultural region.
Definition locid.h:188
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition rep.h:73
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const =0
Copies characters in the range [start, limit) into the UnicodeString target.
char16_t charAt(int32_t offset) const
Returns the 16-bit code unit at the given offset into the text.
Definition rep.h:250
int32_t length() const
Returns the number of 16-bit code units in the text.
Definition rep.h:245
A string-like object that points to a sized piece of memory.
Definition stringpiece.h:54
An Appendable implementation which writes to a UnicodeString.
Definition appendable.h:153
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:287
int32_t indexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the first occurrence in the range [start, start + length) of the characters in srcChar...
UnicodeString(const UnicodeString &that)
Copy constructor.
virtual UChar32 getChar32At(int32_t offset) const
The change in Replaceable to use virtual getChar32At() allows UnicodeString::char32At() to be inline ...
UnicodeString & setTo(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
UChar32 unescapeAt(int32_t &offset) const
Unescape a single escape sequence and return the represented character.
UBool hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const
Check if the length char16_t code units of the string contain more Unicode code points than a certain...
UnicodeString(const UnicodeString &src, int32_t srcStart, int32_t srcLength)
'Substring' constructor from subrange of source string.
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const
Copy the characters in the range [start, limit) into the UnicodeString target.
virtual ~UnicodeString()
Destructor.
UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage)
char* constructor.
UnicodeString(const char *codepageData, const char *codepage)
char* constructor.
UnicodeString(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Readonly-aliasing char16_t* constructor.
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options)
Titlecase this string, with options.
UnicodeString & append(UChar32 srcChar)
Append the code point srcChar to the UnicodeString object.
EInvariant
Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor which constructs a ...
Definition unistr.h:298
UnicodeString unescape() const
Unescape a string of characters and return a string containing the result.
UnicodeString(const UnicodeString &src, int32_t srcStart)
'Substring' constructor from tail of source string.
UnicodeString(const wchar_t *text, int32_t length)
wchar_t * constructor.
Definition unistr.h:3072
int32_t getChar32Limit(int32_t offset) const
Adjust a random-access offset so that it points behind a Unicode character.
UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing char16_t* constructor.
UnicodeString & findAndReplace(int32_t start, int32_t length, const UnicodeString &oldText, int32_t oldStart, int32_t oldLength, const UnicodeString &newText, int32_t newStart, int32_t newLength)
Replace all occurrences of characters in oldText in the range [oldStart, oldStart + oldLength) with t...
UnicodeString(int32_t capacity, UChar32 c, int32_t count)
Construct a UnicodeString with capacity to hold capacity char16_ts.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text)
uint16_t * constructor.
Definition unistr.h:3009
void swap(UnicodeString &other)
Swap strings.
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength, const char *codepage) const
Copy the characters in the range [start, start + length) into an array of characters in a specified c...
virtual int32_t getLength() const
Implement Replaceable::getLength() (see jitterbug 1027).
UnicodeString & replace(int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Replace the characters in the range [start, start + length) with the characters in srcText in the ran...
Definition unistr.h:4414
UnicodeString & toTitle(BreakIterator *titleIter)
Titlecase this string, convenience function using the default locale.
UnicodeString & fastCopyFrom(const UnicodeString &src)
Almost the same as the assignment operator.
UBool padLeading(int32_t targetLength, char16_t padChar=0x0020)
Pad the start of this UnicodeString with the character padChar.
int32_t getChar32Start(int32_t offset) const
Adjust a random-access offset so that it points to the beginning of a Unicode character.
UChar32 char32At(int32_t offset) const
Return the code point that contains the code unit at offset offset.
UnicodeString & trim(void)
Trims leading and trailing whitespace from this UnicodeString.
int32_t length(void) const
Return the length of the UnicodeString object.
Definition unistr.h:3906
UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const
Create a temporary substring for the specified range.
int32_t extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const
Copy the contents of the string into dest.
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength) const
Copy the characters in the range [start, start + length) into an array of characters in the platform'...
static UnicodeString fromUTF8(StringPiece utf8)
Create a UnicodeString from a UTF-8 string.
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch)
Single char16_t (code unit) constructor.
int32_t lastIndexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the last occurrence in the range [start, start + length) of the characters in srcChars...
UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing wchar_t * constructor.
Definition unistr.h:3154
void setToBogus()
Make this UnicodeString object invalid.
virtual void handleReplaceBetween(int32_t start, int32_t limit, const UnicodeString &text)
Replace a substring of this object with the given text.
int32_t moveIndex32(int32_t index, int32_t delta) const
Move the code unit index along the string by delta code points.
static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length)
Create a UnicodeString from a UTF-32 string.
UnicodeString & replace(int32_t start, int32_t length, UChar32 srcChar)
Replace the characters in the range [start, start + length) with the code point srcChar.
virtual UBool hasMetaData() const
Replaceable API.
int32_t countChar32(int32_t start=0, int32_t length=INT32_MAX) const
Count Unicode code points in the length char16_t code units of the string.
UnicodeString & toUpper(void)
Convert the characters in this to UPPER CASE following the conventions of the default locale.
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch)
Single UChar32 (code point) constructor.
virtual void copy(int32_t start, int32_t limit, int32_t dest)
Copy a substring of this object, retaining attribute (out-of-band) information.
virtual Replaceable * clone() const
Clone this object, an instance of a subclass of Replaceable.
UnicodeString & operator=(const UnicodeString &srcText)
Assignment operator.
UnicodeString & setTo(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
UnicodeString & moveFrom(UnicodeString &src)
Move assignment; might leave src in bogus state.
int32_t extract(char *dest, int32_t destCapacity, UConverter *cnv, UErrorCode &errorCode) const
Convert the UnicodeString into a codepage string using an existing UConverter.
StringClass & toUTF8String(StringClass &result) const
Convert the UnicodeString to UTF-8 and append the result to a standard string.
Definition unistr.h:1722
UnicodeString(const char *src, int32_t length, enum EInvariant inv)
Constructs a Unicode string from an invariant-character char * string.
UnicodeString(const char16_t *text, int32_t textLength)
char16_t* constructor.
UnicodeString(const char *src, int32_t srcLength, UConverter *cnv, UErrorCode &errorCode)
char * / UConverter constructor.
UnicodeString(const char *codepageData, int32_t dataLength)
char* constructor.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing uint16_t * constructor.
Definition unistr.h:3140
int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const
Convert the UnicodeString to UTF-32.
UnicodeString & toLower(const Locale &locale)
Convert the characters in this to lower case following the conventions of a specific locale.
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale)
Titlecase this string.
UnicodeString & foldCase(uint32_t options=0)
Case-folds the characters in this string.
UnicodeString(UnicodeString &&src)
Move constructor; might leave src in bogus state.
const char16_t * getTerminatedBuffer()
Get a read-only pointer to the internal buffer, making sure that it is NUL-terminated.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text)
char16_t* constructor.
UnicodeString & toUpper(const Locale &locale)
Convert the characters in this to UPPER CASE following the conventions of a specific locale.
UnicodeString & setCharAt(int32_t offset, char16_t ch)
Set the character at the specified offset to the specified character.
int32_t extract(int32_t start, int32_t startLength, char *target, int32_t targetCapacity, enum EInvariant inv) const
Copy the characters in the range [start, start + startLength) into an array of characters.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text)
wchar_t * constructor.
Definition unistr.h:3025
virtual char16_t getCharAt(int32_t offset) const
The change in Replaceable to use virtual getCharAt() allows UnicodeString::charAt() to be inline agai...
UBool isBogus(void) const
Determine if this object contains a valid string.
Definition unistr.h:3921
char16_t * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
UBool padTrailing(int32_t targetLength, char16_t padChar=0x0020)
Pad the end of this UnicodeString with the character padChar.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData)
char* constructor.
UnicodeString & operator=(UnicodeString &&src)
Move assignment operator; might leave src in bogus state.
Definition unistr.h:1902
void releaseBuffer(int32_t newLength=-1)
Release a read/write buffer on a UnicodeString object with an "open" getBuffer(minCapacity).
void toUTF8(ByteSink &sink) const
Convert the UnicodeString to UTF-8 and write the result to a ByteSink.
friend U_COMMON_API void swap(UnicodeString &s1, UnicodeString &s2)
Non-member UnicodeString swap function.
Definition unistr.h:1933
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
UnicodeString(const uint16_t *text, int32_t length)
uint16_t * constructor.
Definition unistr.h:3059
UnicodeString & toLower(void)
Convert the characters in this to lower case following the conventions of the default locale.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
UBool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
const UChar * toUCharPtr(const char16_t *p)
Converts from const char16_t * to const UChar *.
Definition char16ptr.h:251
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition platform.h:835
C++ API: Replaceable String.
C++ API: Central ICU header for including the C++ standard <string> header and for related definition...
C++ API: StringPiece: Read-only byte string wrapper class.
struct UConverter UConverter
Definition ucnv_err.h:96
#define UCONFIG_NO_BREAK_ITERATION
This switch turns off break iteration.
Definition uconfig.h:348
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:400
#define INT32_MAX
The largest value a 32 bit signed integer can hold.
Definition umachine.h:182
int8_t UBool
The ICU boolean type.
Definition umachine.h:236
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition umachine.h:353
#define U_SIZEOF_UCHAR
Number of bytes in a UChar.
Definition umachine.h:291
#define TRUE
The TRUE value of a UBool.
Definition umachine.h:240
#define FALSE
The FALSE value of a UBool.
Definition umachine.h:244
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition umachine.h:111
#define UNISTR_FROM_CHAR_EXPLICIT
This can be defined to be empty or "explicit".
Definition unistr.h:146
int32_t UStringCaseMapper(int32_t caseLocale, uint32_t options, icu::BreakIterator *iter, char16_t *dest, int32_t destCapacity, const char16_t *src, int32_t srcLength, icu::Edits *edits, UErrorCode &errorCode)
Internal string case mapping function type.
Definition unistr.h:66
#define UNISTR_FROM_STRING_EXPLICIT
This can be defined to be empty or "explicit".
Definition unistr.h:166
#define UNISTR_OBJECT_SIZE
Desired sizeof(UnicodeString) in bytes.
Definition unistr.h:204
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition uobject.h:93
Basic definitions for ICU, for both C and C++ APIs.
#define NULL
Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C.
Definition utypes.h:188
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition utypes.h:396
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition utypes.h:359
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition uversion.h:138
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition uversion.h:137