ICU 62.1 62.1
brkiter.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4********************************************************************************
5* Copyright (C) 1997-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7********************************************************************************
8*
9* File brkiter.h
10*
11* Modification History:
12*
13* Date Name Description
14* 02/18/97 aliu Added typedef for TextCount. Made DONE const.
15* 05/07/97 aliu Fixed DLL declaration.
16* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
17* 08/11/98 helena Sync-up JDK1.2.
18* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
19********************************************************************************
20*/
21
22#ifndef BRKITER_H
23#define BRKITER_H
24
25#include "unicode/utypes.h"
26
32#if UCONFIG_NO_BREAK_ITERATION
33
35
36/*
37 * Allow the declaration of APIs with pointers to BreakIterator
38 * even when break iteration is removed from the build.
39 */
40class BreakIterator;
41
43
44#else
45
46#include "unicode/uobject.h"
47#include "unicode/unistr.h"
48#include "unicode/chariter.h"
49#include "unicode/locid.h"
50#include "unicode/ubrk.h"
51#include "unicode/strenum.h"
52#include "unicode/utext.h"
53#include "unicode/umisc.h"
54
56
103public:
108 virtual ~BreakIterator();
109
123 virtual UBool operator==(const BreakIterator&) const = 0;
124
131 UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
132
138 virtual BreakIterator* clone(void) const = 0;
139
145 virtual UClassID getDynamicClassID(void) const = 0;
146
151 virtual CharacterIterator& getText(void) const = 0;
152
153
168 virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
169
181 virtual void setText(const UnicodeString &text) = 0;
182
201 virtual void setText(UText *text, UErrorCode &status) = 0;
202
211 virtual void adoptText(CharacterIterator* it) = 0;
212
213 enum {
219 DONE = (int32_t)-1
220 };
221
227 virtual int32_t first(void) = 0;
228
234 virtual int32_t last(void) = 0;
235
242 virtual int32_t previous(void) = 0;
243
250 virtual int32_t next(void) = 0;
251
257 virtual int32_t current(void) const = 0;
258
267 virtual int32_t following(int32_t offset) = 0;
268
277 virtual int32_t preceding(int32_t offset) = 0;
278
287 virtual UBool isBoundary(int32_t offset) = 0;
288
298 virtual int32_t next(int32_t n) = 0;
299
313 virtual int32_t getRuleStatus() const;
314
343 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
344
364 static BreakIterator* U_EXPORT2
365 createWordInstance(const Locale& where, UErrorCode& status);
366
388 static BreakIterator* U_EXPORT2
389 createLineInstance(const Locale& where, UErrorCode& status);
390
410 static BreakIterator* U_EXPORT2
412
431 static BreakIterator* U_EXPORT2
433
456 static BreakIterator* U_EXPORT2
457 createTitleInstance(const Locale& where, UErrorCode& status);
458
468 static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
469
479 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
480 const Locale& displayLocale,
481 UnicodeString& name);
482
491 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
492 UnicodeString& name);
493
513 virtual BreakIterator * createBufferClone(void *stackBuffer,
514 int32_t &BufferSize,
515 UErrorCode &status) = 0;
516
517#ifndef U_HIDE_DEPRECATED_API
518
525 inline UBool isBufferClone(void);
526
527#endif /* U_HIDE_DEPRECATED_API */
528
529#if !UCONFIG_NO_SERVICE
545 static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
546 const Locale& locale,
548 UErrorCode& status);
549
562 static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
563
570 static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
571#endif
572
579
580#ifndef U_HIDE_INTERNAL_API
587 const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
588#endif /* U_HIDE_INTERNAL_API */
589
615 virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
616
617 private:
618 static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
619 static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
620 static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
621
622 friend class ICUBreakIteratorFactory;
623 friend class ICUBreakIteratorService;
624
625protected:
626 // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
627 // or else the compiler will create a public ones.
632#ifndef U_HIDE_INTERNAL_API
634 BreakIterator (const Locale& valid, const Locale &actual);
636 BreakIterator &operator = (const BreakIterator &other);
637#endif /* U_HIDE_INTERNAL_API */
638
639private:
640
642 char actualLocale[ULOC_FULLNAME_CAPACITY];
643 char validLocale[ULOC_FULLNAME_CAPACITY];
644};
645
646#ifndef U_HIDE_DEPRECATED_API
647
648inline UBool BreakIterator::isBufferClone()
649{
650 return FALSE;
651}
652
653#endif /* U_HIDE_DEPRECATED_API */
654
656
657#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
658
659#endif // BRKITER_H
660//eof
C++ API: Character Iterator.
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition brkiter.h:102
virtual ~BreakIterator()
destructor
BreakIterator(const Locale &valid, const Locale &actual)
BreakIterator(const BreakIterator &other)
virtual CharacterIterator & getText(void) const =0
Return a CharacterIterator over the text being analyzed.
virtual int32_t next(void)=0
Advance the iterator to the boundary following the current boundary.
virtual int32_t getRuleStatus() const
For RuleBasedBreakIterators, return the status tag from the break rule that determined the boundary a...
virtual void adoptText(CharacterIterator *it)=0
Change the text over which this operates.
UBool operator!=(const BreakIterator &rhs) const
Returns the complement of the result of operator==.
Definition brkiter.h:131
static BreakIterator * createCharacterInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for character-breaks using specified locale Returns an instance of a BreakIterat...
virtual int32_t last(void)=0
Set the iterator position to the index immediately BEYOND the last character in the text being scanne...
virtual void setText(UText *text, UErrorCode &status)=0
Reset the break iterator to operate over the text represented by the UText.
virtual BreakIterator * clone(void) const =0
Return a polymorphic copy of this object.
static BreakIterator * createWordInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for word-breaks using the given locale.
virtual UClassID getDynamicClassID(void) const =0
Return a polymorphic class ID for this object.
virtual UBool operator==(const BreakIterator &) const =0
Return true if another object is semantically equal to this one.
const char * getLocaleID(ULocDataLocaleType type, UErrorCode &status) const
Get the locale for this break iterator object.
static UnicodeString & getDisplayName(const Locale &objectLocale, const Locale &displayLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the desired language.
Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const
Returns the locale for this break iterator.
virtual void setText(const UnicodeString &text)=0
Change the text over which this operates.
static BreakIterator * createTitleInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for title-casing breaks using the specified locale Returns an instance of a Brea...
virtual int32_t next(int32_t n)=0
Set the iterator position to the nth boundary from the current boundary.
virtual int32_t preceding(int32_t offset)=0
Set the iterator position to the first boundary preceding the specified offset.
virtual UBool isBoundary(int32_t offset)=0
Return true if the specified position is a boundary position.
static UnicodeString & getDisplayName(const Locale &objectLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the language of the default locale.
static BreakIterator * createSentenceInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for sentence-breaks using specified locale Returns an instance of a BreakIterato...
virtual BreakIterator & refreshInputText(UText *input, UErrorCode &status)=0
Set the subject text string upon which the break iterator is operating without changing any other asp...
virtual int32_t first(void)=0
Sets the current iteration position to the beginning of the text, position zero.
static UBool unregister(URegistryKey key, UErrorCode &status)
Unregister a previously-registered BreakIterator using the key returned from the register call.
virtual UText * getUText(UText *fillIn, UErrorCode &status) const =0
Get a UText for the text being analyzed.
static StringEnumeration * getAvailableLocales(void)
Return a StringEnumeration over the locales available at the time of the call, including registered l...
static BreakIterator * createLineInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for line-breaks using specified locale.
virtual BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status)=0
Deprecated functionality.
virtual int32_t following(int32_t offset)=0
Advance the iterator to the first boundary following the specified offset.
virtual int32_t previous(void)=0
Set the iterator position to the boundary preceding the current boundary.
static URegistryKey registerInstance(BreakIterator *toAdopt, const Locale &locale, UBreakIteratorType kind, UErrorCode &status)
Register a new break iterator of the indicated kind, to use in the given locale.
static const Locale * getAvailableLocales(int32_t &count)
Get the set of Locales for which TextBoundaries are installed.
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status)
For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) that determined the b...
virtual int32_t current(void) const =0
Return character index of the current iterator position within the text.
Abstract class that defines an API for iteration on text objects.
Definition chariter.h:358
A Locale object represents a specific geographical, political, or cultural region.
Definition locid.h:188
Base class for 'pure' C++ implementations of uenum api.
Definition strenum.h:57
UObject is the common ICU "boilerplate" class.
Definition uobject.h:223
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:287
C++ API: Locale ID object.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
C++ API: String Enumeration.
UText struct.
Definition utext.h:1345
C API: BreakIterator.
UBreakIteratorType
The possible types of text boundaries.
Definition ubrk.h:99
#define ULOC_FULLNAME_CAPACITY
Useful constant for the maximum size of the whole locale ID (including the terminating NULL and all k...
Definition uloc.h:264
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
Definition uloc.h:338
int8_t UBool
The ICU boolean type.
Definition umachine.h:236
#define FALSE
The FALSE value of a UBool.
Definition umachine.h:244
C API:misc definitions.
const void * URegistryKey
Opaque type returned by registerInstance, registerFactory and unregister for service registration.
Definition umisc.h:57
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition uobject.h:93
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition utypes.h:396
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition utypes.h:359
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition uversion.h:138
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition uversion.h:137