ICU 62.1 62.1
uspoof.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4***************************************************************************
5* Copyright (C) 2008-2016, International Business Machines Corporation
6* and others. All Rights Reserved.
7***************************************************************************
8* file name: uspoof.h
9* encoding: UTF-8
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2008Feb13
14* created by: Andy Heninger
15*
16* Unicode Spoof Detection
17*/
18
19#ifndef USPOOF_H
20#define USPOOF_H
21
22#include "unicode/utypes.h"
23#include "unicode/uset.h"
24#include "unicode/parseerr.h"
26
27#if !UCONFIG_NO_NORMALIZATION
28
29
30#if U_SHOW_CPLUSPLUS_API
31#include "unicode/unistr.h"
32#include "unicode/uniset.h"
33#endif
34
35
370struct USpoofChecker;
376struct USpoofCheckResult;
382
390typedef enum USpoofChecks {
400
410
420
431
432#ifndef U_HIDE_DEPRECATED_API
439#endif /* U_HIDE_DEPRECATED_API */
440
455
456#ifndef U_HIDE_DEPRECATED_API
463#endif /* U_HIDE_DEPRECATED_API */
464
472
479
487
488#ifndef U_HIDE_DRAFT_API
509#endif /* U_HIDE_DRAFT_API */
510
517
530 USPOOF_AUX_INFO = 0x40000000
531
533
534
544 typedef enum URestrictionLevel {
551 USPOOF_ASCII = 0x10000000,
599#ifndef U_HIDE_INTERNAL_API
605#endif /* U_HIDE_INTERNAL_API */
607
618U_STABLE USpoofChecker * U_EXPORT2
620
621
643U_STABLE USpoofChecker * U_EXPORT2
644uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
645 UErrorCode *pErrorCode);
646
677U_STABLE USpoofChecker * U_EXPORT2
678uspoof_openFromSource(const char *confusables, int32_t confusablesLen,
679 const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
680 int32_t *errType, UParseError *pe, UErrorCode *status);
681
682
688U_STABLE void U_EXPORT2
690
691#if U_SHOW_CPLUSPLUS_API
692
694
705
707
708#endif
709
719U_STABLE USpoofChecker * U_EXPORT2
721
722
760U_STABLE void U_EXPORT2
761uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
762
774U_STABLE int32_t U_EXPORT2
776
788U_STABLE void U_EXPORT2
790
791
801
844U_STABLE void U_EXPORT2
845uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status);
846
868U_STABLE const char * U_EXPORT2
870
871
890U_STABLE void U_EXPORT2
892
893
914U_STABLE const USet * U_EXPORT2
916
917
918#if U_SHOW_CPLUSPLUS_API
937U_STABLE void U_EXPORT2
939
940
961U_STABLE const icu::UnicodeSet * U_EXPORT2
963#endif
964
965
998U_STABLE int32_t U_EXPORT2
1000 const UChar *id, int32_t length,
1001 int32_t *position,
1002 UErrorCode *status);
1003
1004
1037U_STABLE int32_t U_EXPORT2
1039 const char *id, int32_t length,
1040 int32_t *position,
1041 UErrorCode *status);
1042
1043
1044#if U_SHOW_CPLUSPLUS_API
1073U_STABLE int32_t U_EXPORT2
1075 const icu::UnicodeString &id,
1076 int32_t *position,
1077 UErrorCode *status);
1078#endif
1079
1080
1109U_STABLE int32_t U_EXPORT2
1111 const UChar* id, int32_t length,
1112 USpoofCheckResult* checkResult,
1113 UErrorCode *status);
1114
1146U_STABLE int32_t U_EXPORT2
1148 const char *id, int32_t length,
1149 USpoofCheckResult* checkResult,
1150 UErrorCode *status);
1151
1152#if U_SHOW_CPLUSPLUS_API
1178U_STABLE int32_t U_EXPORT2
1180 const icu::UnicodeString &id,
1181 USpoofCheckResult* checkResult,
1182 UErrorCode *status);
1183#endif
1184
1203U_STABLE USpoofCheckResult* U_EXPORT2
1205
1213U_STABLE void U_EXPORT2
1215
1216#if U_SHOW_CPLUSPLUS_API
1217
1219
1230
1232
1233#endif
1234
1249U_STABLE int32_t U_EXPORT2
1251
1264
1276U_STABLE const USet* U_EXPORT2
1278
1279
1323U_STABLE int32_t U_EXPORT2
1325 const UChar *id1, int32_t length1,
1326 const UChar *id2, int32_t length2,
1327 UErrorCode *status);
1328
1329
1330
1356U_STABLE int32_t U_EXPORT2
1358 const char *id1, int32_t length1,
1359 const char *id2, int32_t length2,
1360 UErrorCode *status);
1361
1362
1363
1364
1365#if U_SHOW_CPLUSPLUS_API
1387U_STABLE int32_t U_EXPORT2
1389 const icu::UnicodeString &s1,
1390 const icu::UnicodeString &s2,
1391 UErrorCode *status);
1392#endif
1393
1394
1426U_STABLE int32_t U_EXPORT2
1428 uint32_t type,
1429 const UChar *id, int32_t length,
1430 UChar *dest, int32_t destCapacity,
1431 UErrorCode *status);
1432
1466U_STABLE int32_t U_EXPORT2
1468 uint32_t type,
1469 const char *id, int32_t length,
1470 char *dest, int32_t destCapacity,
1471 UErrorCode *status);
1472
1473#if U_SHOW_CPLUSPLUS_API
1499 uint32_t type,
1500 const icu::UnicodeString &id,
1501 icu::UnicodeString &dest,
1502 UErrorCode *status);
1503#endif /* U_SHOW_CPLUSPLUS_API */
1504
1517U_STABLE const USet * U_EXPORT2
1519
1532U_STABLE const USet * U_EXPORT2
1534
1535#if U_SHOW_CPLUSPLUS_API
1536
1549U_STABLE const icu::UnicodeSet * U_EXPORT2
1551
1564U_STABLE const icu::UnicodeSet * U_EXPORT2
1566
1567#endif /* U_SHOW_CPLUSPLUS_API */
1568
1591U_STABLE int32_t U_EXPORT2
1593 void *data, int32_t capacity,
1594 UErrorCode *status);
1595
1596
1597#endif
1598
1599#endif /* USPOOF_H */
"Smart pointer" class, closes a USpoofCheckResult via uspoof_closeCheckResult.
"Smart pointer" class, closes a USpoofChecker via uspoof_close().
A mutable set of Unicode characters and multicharacter strings.
Definition uniset.h:278
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:287
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
C API: Parse Error Information.
A UParseError struct is used to returned detailed information about parsing errors.
Definition parseerr.h:58
struct USet USet
Definition ucnv.h:69
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition umachine.h:353
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition umachine.h:111
C++ API: Unicode Set.
C++ API: Unicode String.
C API: Unicode Set.
int32_t uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id, int32_t length, char *dest, int32_t destCapacity, UErrorCode *status)
Get the "skeleton" for an identifier.
USpoofCheckResult * uspoof_openCheckResult(UErrorCode *status)
Create a USpoofCheckResult, used by the uspoof_check2 class of functions to return information about ...
USpoofChecks
Enum for the kinds of checks that USpoofChecker can perform.
Definition uspoof.h:390
@ USPOOF_MIXED_NUMBERS
Check that an identifier does not mix numbers from different numbering systems.
Definition uspoof.h:486
@ USPOOF_RESTRICTION_LEVEL
Check that an identifier is no looser than the specified RestrictionLevel.
Definition uspoof.h:454
@ USPOOF_CHAR_LIMIT
Check that an identifier contains only characters from a specified set of acceptable characters.
Definition uspoof.h:478
@ USPOOF_CONFUSABLE
Enable this flag in uspoof_setChecks to turn on all types of confusables.
Definition uspoof.h:430
@ USPOOF_ALL_CHECKS
Enable all spoof checks.
Definition uspoof.h:516
@ USPOOF_SINGLE_SCRIPT
Check that an identifier contains only characters from a single script (plus chars from the common an...
Definition uspoof.h:462
@ USPOOF_HIDDEN_OVERLAY
Check that an identifier does not have a combining character following a character in which that comb...
Definition uspoof.h:508
@ USPOOF_INVISIBLE
Check an identifier for the presence of invisible characters, such as zero-width spaces,...
Definition uspoof.h:471
@ USPOOF_MIXED_SCRIPT_CONFUSABLE
When performing the two-string uspoof_areConfusable test, this flag in the return value indicates tha...
Definition uspoof.h:409
@ USPOOF_AUX_INFO
Enable the return of auxillary (non-error) information in the upper bits of the check results value.
Definition uspoof.h:530
@ USPOOF_WHOLE_SCRIPT_CONFUSABLE
When performing the two-string uspoof_areConfusable test, this flag in the return value indicates tha...
Definition uspoof.h:419
@ USPOOF_SINGLE_SCRIPT_CONFUSABLE
When performing the two-string uspoof_areConfusable test, this flag in the return value indicates tha...
Definition uspoof.h:399
@ USPOOF_ANY_CASE
This flag is deprecated and no longer affects the behavior of SpoofChecker.
Definition uspoof.h:438
int32_t uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status)
Get the set of checks that this Spoof Checker has been configured to perform.
void uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status)
Specify the bitmask of checks that will be performed by uspoof_check.
const USet * uspoof_getInclusionSet(UErrorCode *status)
Get the set of Candidate Characters for Inclusion in Identifiers, as defined in http://unicode....
void uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel)
Set the loosest restriction level allowed for strings.
void uspoof_closeCheckResult(USpoofCheckResult *checkResult)
Close a USpoofCheckResult, freeing any memory that was being held by its implementation.
U_I18N_API icu::UnicodeString & uspoof_getSkeletonUnicodeString(const USpoofChecker *sc, uint32_t type, const icu::UnicodeString &id, icu::UnicodeString &dest, UErrorCode *status)
Get the "skeleton" for an identifier.
URestrictionLevel uspoof_getRestrictionLevel(const USpoofChecker *sc)
Get the Restriction Level that will be tested if the checks include USPOOF_RESTRICTION_LEVEL.
URestrictionLevel
Constants from UAX #39 for use in uspoof_setRestrictionLevel, and for returned identifier restriction...
Definition uspoof.h:544
@ USPOOF_SINGLE_SCRIPT_RESTRICTIVE
The string classifies as ASCII-Only, or all characters in the string are in the identifier profile an...
Definition uspoof.h:558
@ USPOOF_MODERATELY_RESTRICTIVE
The string classifies as Highly Restrictive, or all characters in the string are in the identifier pr...
Definition uspoof.h:580
@ USPOOF_ASCII
All characters in the string are in the identifier profile and all characters in the string are in th...
Definition uspoof.h:551
@ USPOOF_UNDEFINED_RESTRICTIVE
An undefined restriction level.
Definition uspoof.h:604
@ USPOOF_RESTRICTION_LEVEL_MASK
Mask for selecting the Restriction Level bits from the return value of uspoof_check.
Definition uspoof.h:598
@ USPOOF_MINIMALLY_RESTRICTIVE
All characters in the string are in the identifier profile.
Definition uspoof.h:586
@ USPOOF_HIGHLY_RESTRICTIVE
The string classifies as Single Script, or all characters in the string are in the identifier profile...
Definition uspoof.h:572
@ USPOOF_UNRESTRICTIVE
Any valid identifiers, including characters outside of the Identifier Profile.
Definition uspoof.h:592
void uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status)
Limit the acceptable characters to those specified by a Unicode Set.
USpoofChecker * uspoof_clone(const USpoofChecker *sc, UErrorCode *status)
Clone a Spoof Checker.
int32_t uspoof_check2(const USpoofChecker *sc, const UChar *id, int32_t length, USpoofCheckResult *checkResult, UErrorCode *status)
Check the specified string for possible security issues.
int32_t uspoof_areConfusableUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &s1, const icu::UnicodeString &s2, UErrorCode *status)
A version of uspoof_areConfusable accepting UnicodeStrings.
USpoofChecker * uspoof_openFromSource(const char *confusables, int32_t confusablesLen, const char *confusablesWholeScript, int32_t confusablesWholeScriptLen, int32_t *errType, UParseError *pe, UErrorCode *status)
Open a Spoof Checker from the source form of the spoof data.
int32_t uspoof_check2UnicodeString(const USpoofChecker *sc, const icu::UnicodeString &id, USpoofCheckResult *checkResult, UErrorCode *status)
Check the specified string for possible security issues.
int32_t uspoof_serialize(USpoofChecker *sc, void *data, int32_t capacity, UErrorCode *status)
Serialize the data for a spoof detector into a chunk of memory.
int32_t uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status)
Indicates which of the spoof check(s) have failed.
const icu::UnicodeSet * uspoof_getRecommendedUnicodeSet(UErrorCode *status)
Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined in http:/...
const USet * uspoof_getRecommendedSet(UErrorCode *status)
Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined in http:/...
URestrictionLevel uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status)
Gets the restriction level that the text meets, if the USPOOF_RESTRICTION_LEVEL check was enabled; ot...
const USet * uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status)
Get a USet for the characters permitted in an identifier.
int32_t uspoof_check(const USpoofChecker *sc, const UChar *id, int32_t length, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
int32_t uspoof_check2UTF8(const USpoofChecker *sc, const char *id, int32_t length, USpoofCheckResult *checkResult, UErrorCode *status)
Check the specified string for possible security issues.
USpoofChecker * uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength, UErrorCode *pErrorCode)
Open a Spoof checker from its serialized form, stored in 32-bit-aligned memory.
const USet * uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status)
Gets the set of numerics found in the string, if the USPOOF_MIXED_NUMBERS check was enabled; otherwis...
struct USpoofCheckResult USpoofCheckResult
Definition uspoof.h:381
void uspoof_close(USpoofChecker *sc)
Close a Spoof Checker, freeing any memory that was being held by its implementation.
int32_t uspoof_checkUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &id, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
void uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status)
Limit the acceptable characters to those specified by a Unicode Set.
int32_t uspoof_checkUTF8(const USpoofChecker *sc, const char *id, int32_t length, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
int32_t uspoof_getSkeleton(const USpoofChecker *sc, uint32_t type, const UChar *id, int32_t length, UChar *dest, int32_t destCapacity, UErrorCode *status)
Get the "skeleton" for an identifier.
int32_t uspoof_areConfusable(const USpoofChecker *sc, const UChar *id1, int32_t length1, const UChar *id2, int32_t length2, UErrorCode *status)
Check the whether two specified strings are visually confusable.
void uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status)
Limit characters that are acceptable in identifiers being checked to those normally used with the lan...
USpoofChecker * uspoof_open(UErrorCode *status)
Create a Unicode Spoof Checker, configured to perform all checks except for USPOOF_LOCALE_LIMIT and U...
const icu::UnicodeSet * uspoof_getInclusionUnicodeSet(UErrorCode *status)
Get the set of Candidate Characters for Inclusion in Identifiers, as defined in http://unicode....
struct USpoofChecker USpoofChecker
typedef for C of USpoofChecker
Definition uspoof.h:374
const icu::UnicodeSet * uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status)
Get a UnicodeSet for the characters permitted in an identifier.
const char * uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status)
Get a list of locales for the scripts that are acceptable in strings to be checked.
int32_t uspoof_areConfusableUTF8(const USpoofChecker *sc, const char *id1, int32_t length1, const char *id2, int32_t length2, UErrorCode *status)
A version of uspoof_areConfusable accepting strings in UTF-8 format.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition utypes.h:396
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition utypes.h:360
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition uversion.h:138
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition uversion.h:137