ICU 62.1 62.1
normlzr.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 ********************************************************************
5 * COPYRIGHT:
6 * Copyright (c) 1996-2015, International Business Machines Corporation and
7 * others. All Rights Reserved.
8 ********************************************************************
9 */
10
11#ifndef NORMLZR_H
12#define NORMLZR_H
13
14#include "unicode/utypes.h"
15
21#if !UCONFIG_NO_NORMALIZATION
22
23#include "unicode/chariter.h"
24#include "unicode/normalizer2.h"
25#include "unicode/unistr.h"
26#include "unicode/unorm.h"
27#include "unicode/uobject.h"
28
135public:
136#ifndef U_HIDE_DEPRECATED_API
142 enum {
143 DONE=0xffff
144 };
145
146 // Constructors
147
159
172
184#endif /* U_HIDE_DEPRECATED_API */
185
191 Normalizer(const Normalizer& copy);
192
197 virtual ~Normalizer();
198
199
200 //-------------------------------------------------------------------------
201 // Static utility methods
202 //-------------------------------------------------------------------------
203
204#ifndef U_HIDE_DEPRECATED_API
219 static void U_EXPORT2 normalize(const UnicodeString& source,
223
241 static void U_EXPORT2 compose(const UnicodeString& source,
242 UBool compat, int32_t options,
245
263 static void U_EXPORT2 decompose(const UnicodeString& source,
264 UBool compat, int32_t options,
267
288 static inline UNormalizationCheckResult
289 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
290
306
327 static inline UBool
328 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
329
345 static UBool
347
377 static UnicodeString &
381 UErrorCode &errorCode);
382#endif /* U_HIDE_DEPRECATED_API */
383
448 static inline int32_t
449 compare(const UnicodeString &s1, const UnicodeString &s2,
450 uint32_t options,
451 UErrorCode &errorCode);
452
453#ifndef U_HIDE_DEPRECATED_API
454 //-------------------------------------------------------------------------
455 // Iteration API
456 //-------------------------------------------------------------------------
457
467
477
487
503
519
529 void setIndexOnly(int32_t index);
530
536 void reset(void);
537
552 int32_t getIndex(void) const;
553
562 int32_t startIndex(void) const;
563
574 int32_t endIndex(void) const;
575
585
594 inline UBool operator!=(const Normalizer& that) const;
595
602 Normalizer* clone(void) const;
603
610 int32_t hashCode(void) const;
611
612 //-------------------------------------------------------------------------
613 // Property access methods
614 //-------------------------------------------------------------------------
615
632
644
661 void setOption(int32_t option,
662 UBool value);
663
674 UBool getOption(int32_t option) const;
675
686
697
708 int32_t length,
717
724#endif /* U_HIDE_DEPRECATED_API */
725
732
733private:
734 //-------------------------------------------------------------------------
735 // Private functions
736 //-------------------------------------------------------------------------
737
738 Normalizer(); // default constructor not implemented
739 Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
740
741 // Private utility methods for iteration
742 // For documentation, see the source code
743 UBool nextNormalize();
744 UBool previousNormalize();
745
746 void init();
747 void clearBuffer(void);
748
749 //-------------------------------------------------------------------------
750 // Private data
751 //-------------------------------------------------------------------------
752
753 FilteredNormalizer2*fFilteredNorm2; // owned if not NULL
754 const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2
755 UNormalizationMode fUMode; // deprecated
756 int32_t fOptions;
757
758 // The input text and our position in it
759 CharacterIterator *text;
760
761 // The normalization buffer is the result of normalization
762 // of the source in [currentIndex..nextIndex[ .
763 int32_t currentIndex, nextIndex;
764
765 // A buffer for holding intermediate results
766 UnicodeString buffer;
767 int32_t bufferPos;
768};
769
770//-------------------------------------------------------------------------
771// Inline implementations
772//-------------------------------------------------------------------------
773
774#ifndef U_HIDE_DEPRECATED_API
775inline UBool
776Normalizer::operator!= (const Normalizer& other) const
777{ return ! operator==(other); }
778
780Normalizer::quickCheck(const UnicodeString& source,
783 return quickCheck(source, mode, 0, status);
784}
785
786inline UBool
787Normalizer::isNormalized(const UnicodeString& source,
790 return isNormalized(source, mode, 0, status);
791}
792#endif /* U_HIDE_DEPRECATED_API */
793
794inline int32_t
795Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
796 uint32_t options,
797 UErrorCode &errorCode) {
798 // all argument checking is done in unorm_compare
799 return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(),
800 toUCharPtr(s2.getBuffer()), s2.length(),
801 options,
802 &errorCode);
803}
804
806
807#endif /* #if !UCONFIG_NO_NORMALIZATION */
808
809#endif // NORMLZR_H
C++ API: Character Iterator.
Abstract class that defines an API for iteration on text objects.
Definition chariter.h:358
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition char16ptr.h:142
Normalization filtered by a UnicodeSet.
"Smart pointer" base class; do not use directly: use LocalPointer etc.
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition normalizer2.h:83
Old Unicode normalization API.
Definition normlzr.h:134
int32_t startIndex(void) const
Retrieve the index of the start of the input text.
void reset(void)
Reset the index to the beginning of the text.
int32_t endIndex(void) const
Retrieve the index of the end of the input text.
static void decompose(const UnicodeString &source, UBool compat, int32_t options, UnicodeString &result, UErrorCode &status)
Static method to decompose a UnicodeString.
int32_t hashCode(void) const
Generates a hash code for this iterator.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
int32_t getIndex(void) const
Retrieve the current iteration position in the input text that is being normalized.
void setMode(UNormalizationMode newMode)
Set the normalization mode for this object.
UChar32 previous(void)
Return the previous character in the normalized text and decrement.
UBool operator==(const Normalizer &that) const
Returns TRUE when both iterators refer to the same character in the same input text.
UChar32 last(void)
Return the last character in the normalized text.
virtual ~Normalizer()
Destructor.
void setText(const UnicodeString &newText, UErrorCode &status)
Set the input text over which this Normalizer will iterate.
UChar32 current(void)
Return the current character in the normalized text.
UBool getOption(int32_t option) const
Determine whether an option is turned on or off.
UNormalizationMode getUMode(void) const
Return the normalization mode for this object.
void setText(ConstChar16Ptr newText, int32_t length, UErrorCode &status)
Set the input text over which this Normalizer will iterate.
static UBool isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode)
Test if a string is in a given normalization form; same as the other version of isNormalized but take...
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
Normalizer(const Normalizer &copy)
Copy constructor.
Normalizer(const UnicodeString &str, UNormalizationMode mode)
Creates a new Normalizer object for iterating over the normalized form of a given string.
void setText(const CharacterIterator &newText, UErrorCode &status)
Set the input text over which this Normalizer will iterate.
static UNormalizationCheckResult quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status)
Performing quick check on a string; same as the other version of quickCheck but takes an extra option...
Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode)
Creates a new Normalizer object for iterating over the normalized form of a given string.
void setOption(int32_t option, UBool value)
Set options that affect this Normalizer's operation.
static void normalize(const UnicodeString &source, UNormalizationMode mode, int32_t options, UnicodeString &result, UErrorCode &status)
Normalizes a UnicodeString according to the specified normalization mode.
static UnicodeString & concatenate(const UnicodeString &left, const UnicodeString &right, UnicodeString &result, UNormalizationMode mode, int32_t options, UErrorCode &errorCode)
Concatenate normalized strings, making sure that the result is normalized as well.
Normalizer(const CharacterIterator &iter, UNormalizationMode mode)
Creates a new Normalizer object for iterating over the normalized form of the given text.
UChar32 next(void)
Return the next character in the normalized text.
void getText(UnicodeString &result)
Copies the input text into the UnicodeString argument.
void setIndexOnly(int32_t index)
Set the iteration position in the input text that is being normalized, without any immediate normaliz...
static void compose(const UnicodeString &source, UBool compat, int32_t options, UnicodeString &result, UErrorCode &status)
Compose a UnicodeString.
Normalizer * clone(void) const
Returns a pointer to a new Normalizer that is a clone of this one.
UChar32 first(void)
Return the first character in the normalized text.
UObject is the common ICU "boilerplate" class.
Definition uobject.h:223
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:287
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
UBool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
const UChar * toUCharPtr(const char16_t *p)
Converts from const char16_t * to const UChar *.
Definition char16ptr.h:251
C++ API: New API for Unicode Normalization.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:400
int8_t UBool
The ICU boolean type.
Definition umachine.h:236
C++ API: Unicode String.
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition unorm2.h:94
int32_t unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compares two strings for canonical equivalence.
C API: Unicode Normalization.
UNormalizationMode
Constants for normalization modes.
Definition unorm.h:138
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition uobject.h:93
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition utypes.h:396
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition utypes.h:359
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition uversion.h:138
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition uversion.h:137