ICU 69.1 69.1
caniter.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 *******************************************************************************
5 * Copyright (C) 1996-2014, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9
10#ifndef CANITER_H
11#define CANITER_H
12
13#include "unicode/utypes.h"
14
15#if U_SHOW_CPLUSPLUS_API
16
17#if !UCONFIG_NO_NORMALIZATION
18
19#include "unicode/uobject.h"
20#include "unicode/unistr.h"
21
31#ifndef CANITER_SKIP_ZEROES
32#define CANITER_SKIP_ZEROES true
33#endif
34
35U_NAMESPACE_BEGIN
36
37class Hashtable;
38class Normalizer2;
39class Normalizer2Impl;
40
77public:
85
91
98
103 void reset();
104
113
121 void setSource(const UnicodeString &newSource, UErrorCode &status);
122
123#ifndef U_HIDE_INTERNAL_API
133 static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
134#endif /* U_HIDE_INTERNAL_API */
135
141 static UClassID U_EXPORT2 getStaticClassID();
142
149
150private:
151 // ===================== PRIVATES ==============================
152 // private default constructor
154
155
161
166 CanonicalIterator& operator=(const CanonicalIterator& other);
167
168 // fields
169 UnicodeString source;
170 UBool done;
171
172 // 2 dimensional array holds the pieces of the string with
173 // their different canonically equivalent representations
174 UnicodeString **pieces;
175 int32_t pieces_length;
176 int32_t *pieces_lengths;
177
178 // current is used in iterating to combine pieces
179 int32_t *current;
180 int32_t current_length;
181
182 // transient fields
183 UnicodeString buffer;
184
185 const Normalizer2 &nfd;
186 const Normalizer2Impl &nfcImpl;
187
188 // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
189 UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)
190
191 //Set getEquivalents2(String segment);
192 Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status);
193 //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);
194
200 //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
201 Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
202 //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
203
204 void cleanPieces();
205
206};
207
208U_NAMESPACE_END
209
210#endif /* #if !UCONFIG_NO_NORMALIZATION */
211
212#endif /* U_SHOW_CPLUSPLUS_API */
213
214#endif
This class allows one to iterate through all the strings that are canonically equivalent to a given s...
Definition caniter.h:76
void reset()
Resets the iterator so that one can start again from the beginning.
UnicodeString next()
Get the next canonically equivalent string.
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
static void permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status)
Dumb recursive implementation of permutation.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
void setSource(const UnicodeString &newSource, UErrorCode &status)
Set a new source for this iterator.
CanonicalIterator(const UnicodeString &source, UErrorCode &status)
Construct a CanonicalIterator object.
UnicodeString getSource()
Gets the NFD form of the current source we are iterating over.
virtual ~CanonicalIterator()
Destructor Cleans pieces.
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition normalizer2.h:85
UObject is the common ICU "boilerplate" class.
Definition uobject.h:223
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:296
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:467
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition umachine.h:269
#define U_FINAL
Defined to the C++11 "final" keyword if available.
Definition umachine.h:141
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition uobject.h:96
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:415
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition utypes.h:300