ICU 62.1 62.1
rbbi.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4***************************************************************************
5* Copyright (C) 1999-2016 International Business Machines Corporation *
6* and others. All rights reserved. *
7***************************************************************************
8
9**********************************************************************
10* Date Name Description
11* 10/22/99 alan Creation.
12* 11/11/99 rgillam Complete port from Java.
13**********************************************************************
14*/
15
16#ifndef RBBI_H
17#define RBBI_H
18
19#include "unicode/utypes.h"
20
26#if !UCONFIG_NO_BREAK_ITERATION
27
28#include "unicode/brkiter.h"
29#include "unicode/udata.h"
30#include "unicode/parseerr.h"
31#include "unicode/schriter.h"
32
34
36class LanguageBreakEngine;
37struct RBBIDataHeader;
38class RBBIDataWrapper;
39class UnhandledEngine;
40class UStack;
41
54
55private:
60 UText fText;
61
62#ifndef U_HIDE_INTERNAL_API
63public:
64#endif /* U_HIDE_INTERNAL_API */
71private:
72
77 int32_t fPosition;
78
82 int32_t fRuleStatusIndex;
83
87 class BreakCache;
88 BreakCache *fBreakCache;
89
94 class DictionaryCache;
95 DictionaryCache *fDictionaryCache;
96
104 UStack *fLanguageBreakEngines;
105
113 UnhandledEngine *fUnhandledBreakEngine;
114
120 uint32_t fDictionaryCharCount;
121
127 CharacterIterator *fCharIter;
128
134 StringCharacterIterator fSCharIter;
135
139 UBool fDone;
140
141 //=======================================================================
142 // constructors
143 //=======================================================================
144
156
158 friend class RBBIRuleBuilder;
160 friend class BreakIterator;
161
162public:
163
169
177
189
214 uint32_t ruleLength,
216
230
236
245
254 virtual UBool operator==(const BreakIterator& that) const;
255
263 UBool operator!=(const BreakIterator& that) const;
264
275 virtual BreakIterator* clone() const;
276
282 virtual int32_t hashCode(void) const;
283
289 virtual const UnicodeString& getRules(void) const;
290
291 //=======================================================================
292 // BreakIterator overrides
293 //=======================================================================
294
320 virtual CharacterIterator& getText(void) const;
321
322
338
347
359 virtual void setText(const UnicodeString& newText);
360
374 virtual void setText(UText *text, UErrorCode &status);
375
381 virtual int32_t first(void);
382
388 virtual int32_t last(void);
389
400 virtual int32_t next(int32_t n);
401
407 virtual int32_t next(void);
408
414 virtual int32_t previous(void);
415
423 virtual int32_t following(int32_t offset);
424
432 virtual int32_t preceding(int32_t offset);
433
442 virtual UBool isBoundary(int32_t offset);
443
452 virtual int32_t current(void) const;
453
454
486 virtual int32_t getRuleStatus() const;
487
511 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
512
524 virtual UClassID getDynamicClassID(void) const;
525
538
566 int32_t &BufferSize,
568
569
587 virtual const uint8_t *getBinaryRules(uint32_t &length);
588
615
616
617private:
618 //=======================================================================
619 // implementation
620 //=======================================================================
626 void reset(void);
627
632 void init(UErrorCode &status);
633
643 int32_t handleSafePrevious(int32_t fromPosition);
644
657 int32_t handleNext();
658
659
666 const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
667
668 public:
669#ifndef U_HIDE_INTERNAL_API
674 void dumpCache();
675
681
682#endif /* U_HIDE_INTERNAL_API */
683};
684
685//------------------------------------------------------------------------------
686//
687// Inline Functions Definitions ...
688//
689//------------------------------------------------------------------------------
690
691inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
692 return !operator==(that);
693}
694
696
697#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
698
699#endif
C++ API: Break Iterator.
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition brkiter.h:102
UBool operator!=(const BreakIterator &rhs) const
Returns the complement of the result of operator==.
Definition brkiter.h:131
Abstract class that defines an API for iteration on text objects.
Definition chariter.h:358
"Smart pointer" base class; do not use directly: use LocalPointer etc.
A subclass of BreakIterator whose behavior is specified using a list of rules.
Definition rbbi.h:53
virtual RuleBasedBreakIterator & refreshInputText(UText *input, UErrorCode &status)
Set the subject text string upon which the break iterator is operating without changing any other asp...
virtual void setText(UText *text, UErrorCode &status)
Reset the break iterator to operate over the text represented by the UText.
RBBIDataWrapper * fData
The rule data for this BreakIterator instance.
Definition rbbi.h:70
virtual CharacterIterator & getText(void) const
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status)
Get the status (tag) values from the break rule(s) that determined the boundary at the current iterat...
virtual UClassID getDynamicClassID(void) const
Returns a unique class ID POLYMORPHICALLY.
virtual UText * getUText(UText *fillIn, UErrorCode &status) const
Get a UText for the text being analyzed.
void dumpTables()
Debugging function only.
virtual BreakIterator * clone() const
Returns a newly-constructed RuleBasedBreakIterator with the same behavior, and iterating over the sam...
virtual BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status)
Deprecated functionality.
virtual int32_t previous(void)
Moves the iterator backwards, to the last boundary preceding this one.
virtual int32_t current(void) const
Returns the current iteration position.
virtual int32_t next(void)
Advances the iterator to the next boundary position.
void dumpCache()
Debugging function only.
virtual int32_t getRuleStatus() const
Return the status tag from the break rule that determined the boundary at the current iteration posit...
RuleBasedBreakIterator()
Default constructor.
virtual void setText(const UnicodeString &newText)
Set the iterator to analyze a new piece of text.
virtual UBool operator==(const BreakIterator &that) const
Equality operator.
RuleBasedBreakIterator & operator=(const RuleBasedBreakIterator &that)
Assignment operator.
static UClassID getStaticClassID(void)
Returns the class ID for this class.
virtual UBool isBoundary(int32_t offset)
Returns true if the specified position is a boundary position.
virtual int32_t preceding(int32_t offset)
Sets the iterator to refer to the last boundary position before the specified position.
RuleBasedBreakIterator(const RuleBasedBreakIterator &that)
Copy constructor.
virtual int32_t next(int32_t n)
Advances the iterator either forward or backward the specified number of steps.
RuleBasedBreakIterator(UDataMemory *image, UErrorCode &status)
This constructor uses the udata interface to create a BreakIterator whose internal tables live in a m...
virtual int32_t following(int32_t offset)
Sets the iterator to refer to the first boundary position following the specified position.
virtual int32_t first(void)
Sets the current iteration position to the beginning of the text, position zero.
virtual const UnicodeString & getRules(void) const
Returns the description used to create this iterator.
virtual int32_t last(void)
Sets the current iteration position to the end of the text.
virtual ~RuleBasedBreakIterator()
Destructor.
RuleBasedBreakIterator(const UnicodeString &rules, UParseError &parseError, UErrorCode &status)
Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
virtual const uint8_t * getBinaryRules(uint32_t &length)
Return the binary form of compiled break rules, which can then be used to create a new break iterator...
virtual void adoptText(CharacterIterator *newText)
Set the iterator to analyze a new piece of text.
virtual int32_t hashCode(void) const
Compute a hash code for this BreakIterator.
RuleBasedBreakIterator(const uint8_t *compiledRules, uint32_t ruleLength, UErrorCode &status)
Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
A concrete subclass of CharacterIterator that iterates over the characters (code units or code points...
Definition schriter.h:45
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:287
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
C API: Parse Error Information.
C++ API: String Character Iterator.
A UParseError struct is used to returned detailed information about parsing errors.
Definition parseerr.h:58
UText struct.
Definition utext.h:1345
C API: Data loading interface.
struct UDataMemory UDataMemory
Forward declaration of the data memory type.
Definition udata.h:158
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:400
int8_t UBool
The ICU boolean type.
Definition umachine.h:236
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition uobject.h:93
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition utypes.h:396
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition utypes.h:359
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition uversion.h:138
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition uversion.h:137