ICU 62.1 62.1
uset.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2002-2014, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: uset.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2002mar07
16* created by: Markus W. Scherer
17*
18* C version of UnicodeSet.
19*/
20
21
29#ifndef __USET_H__
30#define __USET_H__
31
32#include "unicode/utypes.h"
33#include "unicode/uchar.h"
35
36#ifndef UCNV_H
37struct USet;
43typedef struct USet USet;
44#endif
45
51enum {
57
85
95};
96
152typedef enum USetSpanCondition {
201#ifndef U_HIDE_DEPRECATED_API
207#endif // U_HIDE_DEPRECATED_API
209
210enum {
219
247
248/*********************************************************************
249 * USet API
250 *********************************************************************/
251
259U_STABLE USet* U_EXPORT2
261
272U_STABLE USet* U_EXPORT2
274
284U_STABLE USet* U_EXPORT2
285uset_openPattern(const UChar* pattern, int32_t patternLength,
286 UErrorCode* ec);
287
299U_STABLE USet* U_EXPORT2
300uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
301 uint32_t options,
302 UErrorCode* ec);
303
310U_STABLE void U_EXPORT2
312
313#if U_SHOW_CPLUSPLUS_API
314
316
327
329
330#endif
331
341U_STABLE USet * U_EXPORT2
342uset_clone(const USet *set);
343
353U_STABLE UBool U_EXPORT2
354uset_isFrozen(const USet *set);
355
370U_STABLE void U_EXPORT2
372
383U_STABLE USet * U_EXPORT2
385
395U_STABLE void U_EXPORT2
397 UChar32 start, UChar32 end);
398
420U_STABLE int32_t U_EXPORT2
422 const UChar *pattern, int32_t patternLength,
423 uint32_t options,
424 UErrorCode *status);
425
448U_STABLE void U_EXPORT2
450 UProperty prop, int32_t value, UErrorCode* ec);
451
487U_STABLE void U_EXPORT2
489 const UChar *prop, int32_t propLength,
490 const UChar *value, int32_t valueLength,
491 UErrorCode* ec);
492
502U_STABLE UBool U_EXPORT2
503uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
504 int32_t pos);
505
521U_STABLE int32_t U_EXPORT2
523 UChar* result, int32_t resultCapacity,
524 UBool escapeUnprintable,
525 UErrorCode* ec);
526
535U_STABLE void U_EXPORT2
537
550U_STABLE void U_EXPORT2
551uset_addAll(USet* set, const USet *additionalSet);
552
562U_STABLE void U_EXPORT2
564
574U_STABLE void U_EXPORT2
575uset_addString(USet* set, const UChar* str, int32_t strLen);
576
586U_STABLE void U_EXPORT2
587uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
588
597U_STABLE void U_EXPORT2
599
609U_STABLE void U_EXPORT2
611
621U_STABLE void U_EXPORT2
622uset_removeString(USet* set, const UChar* str, int32_t strLen);
623
635U_STABLE void U_EXPORT2
636uset_removeAll(USet* set, const USet* removeSet);
637
652U_STABLE void U_EXPORT2
653uset_retain(USet* set, UChar32 start, UChar32 end);
654
667U_STABLE void U_EXPORT2
668uset_retainAll(USet* set, const USet* retain);
669
678U_STABLE void U_EXPORT2
680
689U_STABLE void U_EXPORT2
691
703U_STABLE void U_EXPORT2
704uset_complementAll(USet* set, const USet* complement);
705
713U_STABLE void U_EXPORT2
715
742U_STABLE void U_EXPORT2
743uset_closeOver(USet* set, int32_t attributes);
744
751U_STABLE void U_EXPORT2
753
761U_STABLE UBool U_EXPORT2
762uset_isEmpty(const USet* set);
763
772U_STABLE UBool U_EXPORT2
774
784U_STABLE UBool U_EXPORT2
785uset_containsRange(const USet* set, UChar32 start, UChar32 end);
786
795U_STABLE UBool U_EXPORT2
796uset_containsString(const USet* set, const UChar* str, int32_t strLen);
797
808U_STABLE int32_t U_EXPORT2
809uset_indexOf(const USet* set, UChar32 c);
810
821U_STABLE UChar32 U_EXPORT2
822uset_charAt(const USet* set, int32_t charIndex);
823
832U_STABLE int32_t U_EXPORT2
833uset_size(const USet* set);
834
843U_STABLE int32_t U_EXPORT2
845
864U_STABLE int32_t U_EXPORT2
865uset_getItem(const USet* set, int32_t itemIndex,
866 UChar32* start, UChar32* end,
867 UChar* str, int32_t strCapacity,
868 UErrorCode* ec);
869
878U_STABLE UBool U_EXPORT2
879uset_containsAll(const USet* set1, const USet* set2);
880
891U_STABLE UBool U_EXPORT2
892uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
893
902U_STABLE UBool U_EXPORT2
903uset_containsNone(const USet* set1, const USet* set2);
904
913U_STABLE UBool U_EXPORT2
914uset_containsSome(const USet* set1, const USet* set2);
915
935U_STABLE int32_t U_EXPORT2
936uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
937
956U_STABLE int32_t U_EXPORT2
957uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
958
978U_STABLE int32_t U_EXPORT2
979uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
980
999U_STABLE int32_t U_EXPORT2
1000uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1001
1010U_STABLE UBool U_EXPORT2
1011uset_equals(const USet* set1, const USet* set2);
1012
1013/*********************************************************************
1014 * Serialized set API
1015 *********************************************************************/
1016
1066U_STABLE int32_t U_EXPORT2
1067uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1068
1077U_STABLE UBool U_EXPORT2
1078uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1079
1087U_STABLE void U_EXPORT2
1089
1098U_STABLE UBool U_EXPORT2
1100
1110U_STABLE int32_t U_EXPORT2
1112
1126U_STABLE UBool U_EXPORT2
1127uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1128 UChar32* pStart, UChar32* pEnd);
1129
1130#endif
"Smart pointer" class, closes a USet via uset_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
A serialized form of a Unicode set.
Definition uset.h:225
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition uset.h:245
int32_t bmpLength
The length of the array that contains BMP characters.
Definition uset.h:235
const uint16_t * array
The serialized Unicode Set.
Definition uset.h:230
int32_t length
The total length of the array.
Definition uset.h:240
C API: Unicode Properties.
UProperty
Selection constants for Unicode properties.
Definition uchar.h:165
struct USet USet
Definition ucnv.h:69
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:400
int8_t UBool
The ICU boolean type.
Definition umachine.h:236
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition umachine.h:353
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition umachine.h:111
UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
void uset_clear(USet *set)
Removes all of the elements from this set.
int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns TRUE if the given USet contains the given string.
UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
UBool uset_isEmpty(const USet *set)
Returns TRUE if the given USet contains no characters and no strings.
USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns TRUE if the given USerializedSet contains the given character.
int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set.
void uset_removeAllStrings(USet *set)
Remove all strings from this set.
int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition uset.h:152
@ USET_SPAN_NOT_CONTAINED
Continues a span() while there is no set element at the current position.
Definition uset.h:165
@ USET_SPAN_CONTAINED
Spans the longest substring that is a concatenation of set elements (characters or strings).
Definition uset.h:180
@ USET_SPAN_CONDITION_COUNT
One more than the last span condition.
Definition uset.h:206
@ USET_SPAN_SIMPLE
Continues a span() while there is a set element at the current position.
Definition uset.h:200
void uset_complement(USet *set)
Inverts this set.
void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
@ USET_SERIALIZED_STATIC_ARRAY_CAPACITY
Capacity of USerializedSet::staticArray.
Definition uset.h:217
void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property.
void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
USet * uset_clone(const USet *set)
Returns a copy of this object.
void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
@ USET_IGNORE_SPACE
Ignore white space within patterns unless quoted or escaped.
Definition uset.h:56
@ USET_ADD_CASE_MAPPINGS
Enable case insensitive matching.
Definition uset.h:94
@ USET_CASE_INSENSITIVE
Enable case insensitive matching.
Definition uset.h:84
int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in the given USet.
void uset_freeze(USet *set)
Freeze the set (make it immutable).
int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
struct USerializedSet USerializedSet
A serialized form of a Unicode set.
UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
void uset_close(USet *set)
Disposes of the storage used by a USet object.
UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns TRUE if the given USet contains all characters c where start <= c && c <= end.
UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
USet * uset_openEmpty(void)
Create an empty USet object.
void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
UBool uset_contains(const USet *set, UChar32 c)
Returns TRUE if the given USet contains the given character.
void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition utypes.h:396
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition uversion.h:138
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition uversion.h:137