ICU 62.1 62.1
caniter.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 *******************************************************************************
5 * Copyright (C) 1996-2014, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9
10#ifndef CANITER_H
11#define CANITER_H
12
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_NORMALIZATION
16
17#include "unicode/uobject.h"
18#include "unicode/unistr.h"
19
29#ifndef CANITER_SKIP_ZEROES
30#define CANITER_SKIP_ZEROES TRUE
31#endif
32
34
35class Hashtable;
36class Normalizer2;
37class Normalizer2Impl;
38
74class U_COMMON_API CanonicalIterator U_FINAL : public UObject {
75public:
83
89
96
101 void reset();
102
111
119 void setSource(const UnicodeString &newSource, UErrorCode &status);
120
121#ifndef U_HIDE_INTERNAL_API
131 static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
132#endif /* U_HIDE_INTERNAL_API */
133
139 static UClassID U_EXPORT2 getStaticClassID();
140
147
148private:
149 // ===================== PRIVATES ==============================
150 // private default constructor
152
153
159
164 CanonicalIterator& operator=(const CanonicalIterator& other);
165
166 // fields
167 UnicodeString source;
168 UBool done;
169
170 // 2 dimensional array holds the pieces of the string with
171 // their different canonically equivalent representations
172 UnicodeString **pieces;
173 int32_t pieces_length;
174 int32_t *pieces_lengths;
175
176 // current is used in iterating to combine pieces
177 int32_t *current;
178 int32_t current_length;
179
180 // transient fields
181 UnicodeString buffer;
182
183 const Normalizer2 &nfd;
184 const Normalizer2Impl &nfcImpl;
185
186 // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
187 UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)
188
189 //Set getEquivalents2(String segment);
190 Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status);
191 //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);
192
198 //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
199 Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
200 //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
201
202 void cleanPieces();
203
204};
205
207
208#endif /* #if !UCONFIG_NO_NORMALIZATION */
209
210#endif
This class allows one to iterate through all the strings that are canonically equivalent to a given s...
Definition caniter.h:74
void reset()
Resets the iterator so that one can start again from the beginning.
UnicodeString next()
Get the next canonically equivalent string.
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
static void permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status)
Dumb recursive implementation of permutation.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
void setSource(const UnicodeString &newSource, UErrorCode &status)
Set a new source for this iterator.
CanonicalIterator(const UnicodeString &source, UErrorCode &status)
Construct a CanonicalIterator object.
UnicodeString getSource()
Gets the NFD form of the current source we are iterating over.
virtual ~CanonicalIterator()
Destructor Cleans pieces.
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition normalizer2.h:83
UObject is the common ICU "boilerplate" class.
Definition uobject.h:223
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:287
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:400
int8_t UBool
The ICU boolean type.
Definition umachine.h:236
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition uobject.h:93
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition utypes.h:396
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition utypes.h:359
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition uversion.h:138
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition uversion.h:137