ICU 76.1 76.1
uscript.h File Reference

C API: Unicode Script Information. More...

#include "unicode/utypes.h"

Go to the source code of this file.

Namespaces

namespace  icu
 File coll.h.
 

Typedefs

typedef enum UScriptCode UScriptCode
 Constants for ISO 15924 script codes.
 
typedef enum UScriptUsage UScriptUsage
 Script usage constants.
 

Enumerations

enum  UScriptCode {
  USCRIPT_INVALID_CODE = -1 , USCRIPT_COMMON = 0 , USCRIPT_INHERITED = 1 , USCRIPT_ARABIC = 2 ,
  USCRIPT_ARMENIAN = 3 , USCRIPT_BENGALI = 4 , USCRIPT_BOPOMOFO = 5 , USCRIPT_CHEROKEE = 6 ,
  USCRIPT_COPTIC = 7 , USCRIPT_CYRILLIC = 8 , USCRIPT_DESERET = 9 , USCRIPT_DEVANAGARI = 10 ,
  USCRIPT_ETHIOPIC = 11 , USCRIPT_GEORGIAN = 12 , USCRIPT_GOTHIC = 13 , USCRIPT_GREEK = 14 ,
  USCRIPT_GUJARATI = 15 , USCRIPT_GURMUKHI = 16 , USCRIPT_HAN = 17 , USCRIPT_HANGUL = 18 ,
  USCRIPT_HEBREW = 19 , USCRIPT_HIRAGANA = 20 , USCRIPT_KANNADA = 21 , USCRIPT_KATAKANA = 22 ,
  USCRIPT_KHMER = 23 , USCRIPT_LAO = 24 , USCRIPT_LATIN = 25 , USCRIPT_MALAYALAM = 26 ,
  USCRIPT_MONGOLIAN = 27 , USCRIPT_MYANMAR = 28 , USCRIPT_OGHAM = 29 , USCRIPT_OLD_ITALIC = 30 ,
  USCRIPT_ORIYA = 31 , USCRIPT_RUNIC = 32 , USCRIPT_SINHALA = 33 , USCRIPT_SYRIAC = 34 ,
  USCRIPT_TAMIL = 35 , USCRIPT_TELUGU = 36 , USCRIPT_THAANA = 37 , USCRIPT_THAI = 38 ,
  USCRIPT_TIBETAN = 39 , USCRIPT_CANADIAN_ABORIGINAL = 40 , USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL , USCRIPT_YI = 41 ,
  USCRIPT_TAGALOG = 42 , USCRIPT_HANUNOO = 43 , USCRIPT_BUHID = 44 , USCRIPT_TAGBANWA = 45 ,
  USCRIPT_BRAILLE = 46 , USCRIPT_CYPRIOT = 47 , USCRIPT_LIMBU = 48 , USCRIPT_LINEAR_B = 49 ,
  USCRIPT_OSMANYA = 50 , USCRIPT_SHAVIAN = 51 , USCRIPT_TAI_LE = 52 , USCRIPT_UGARITIC = 53 ,
  USCRIPT_KATAKANA_OR_HIRAGANA = 54 , USCRIPT_BUGINESE = 55 , USCRIPT_GLAGOLITIC = 56 , USCRIPT_KHAROSHTHI = 57 ,
  USCRIPT_SYLOTI_NAGRI = 58 , USCRIPT_NEW_TAI_LUE = 59 , USCRIPT_TIFINAGH = 60 , USCRIPT_OLD_PERSIAN = 61 ,
  USCRIPT_BALINESE = 62 , USCRIPT_BATAK = 63 , USCRIPT_BLISSYMBOLS = 64 , USCRIPT_BRAHMI = 65 ,
  USCRIPT_CHAM = 66 , USCRIPT_CIRTH = 67 , USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68 , USCRIPT_DEMOTIC_EGYPTIAN = 69 ,
  USCRIPT_HIERATIC_EGYPTIAN = 70 , USCRIPT_EGYPTIAN_HIEROGLYPHS = 71 , USCRIPT_KHUTSURI = 72 , USCRIPT_SIMPLIFIED_HAN = 73 ,
  USCRIPT_TRADITIONAL_HAN = 74 , USCRIPT_PAHAWH_HMONG = 75 , USCRIPT_OLD_HUNGARIAN = 76 , USCRIPT_HARAPPAN_INDUS = 77 ,
  USCRIPT_JAVANESE = 78 , USCRIPT_KAYAH_LI = 79 , USCRIPT_LATIN_FRAKTUR = 80 , USCRIPT_LATIN_GAELIC = 81 ,
  USCRIPT_LEPCHA = 82 , USCRIPT_LINEAR_A = 83 , USCRIPT_MANDAIC = 84 , USCRIPT_MANDAEAN = USCRIPT_MANDAIC ,
  USCRIPT_MAYAN_HIEROGLYPHS = 85 , USCRIPT_MEROITIC_HIEROGLYPHS = 86 , USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS , USCRIPT_NKO = 87 ,
  USCRIPT_ORKHON = 88 , USCRIPT_OLD_PERMIC = 89 , USCRIPT_PHAGS_PA = 90 , USCRIPT_PHOENICIAN = 91 ,
  USCRIPT_MIAO = 92 , USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO , USCRIPT_RONGORONGO = 93 , USCRIPT_SARATI = 94 ,
  USCRIPT_ESTRANGELO_SYRIAC = 95 , USCRIPT_WESTERN_SYRIAC = 96 , USCRIPT_EASTERN_SYRIAC = 97 , USCRIPT_TENGWAR = 98 ,
  USCRIPT_VAI = 99 , USCRIPT_VISIBLE_SPEECH = 100 , USCRIPT_CUNEIFORM = 101 , USCRIPT_UNWRITTEN_LANGUAGES = 102 ,
  USCRIPT_UNKNOWN = 103 , USCRIPT_CARIAN = 104 , USCRIPT_JAPANESE = 105 , USCRIPT_LANNA = 106 ,
  USCRIPT_LYCIAN = 107 , USCRIPT_LYDIAN = 108 , USCRIPT_OL_CHIKI = 109 , USCRIPT_REJANG = 110 ,
  USCRIPT_SAURASHTRA = 111 , USCRIPT_SIGN_WRITING = 112 , USCRIPT_SUNDANESE = 113 , USCRIPT_MOON = 114 ,
  USCRIPT_MEITEI_MAYEK = 115 , USCRIPT_IMPERIAL_ARAMAIC = 116 , USCRIPT_AVESTAN = 117 , USCRIPT_CHAKMA = 118 ,
  USCRIPT_KOREAN = 119 , USCRIPT_KAITHI = 120 , USCRIPT_MANICHAEAN = 121 , USCRIPT_INSCRIPTIONAL_PAHLAVI = 122 ,
  USCRIPT_PSALTER_PAHLAVI = 123 , USCRIPT_BOOK_PAHLAVI = 124 , USCRIPT_INSCRIPTIONAL_PARTHIAN = 125 , USCRIPT_SAMARITAN = 126 ,
  USCRIPT_TAI_VIET = 127 , USCRIPT_MATHEMATICAL_NOTATION = 128 , USCRIPT_SYMBOLS = 129 , USCRIPT_BAMUM = 130 ,
  USCRIPT_LISU = 131 , USCRIPT_NAKHI_GEBA = 132 , USCRIPT_OLD_SOUTH_ARABIAN = 133 , USCRIPT_BASSA_VAH = 134 ,
  USCRIPT_DUPLOYAN = 135 , USCRIPT_DUPLOYAN_SHORTAND = USCRIPT_DUPLOYAN , USCRIPT_ELBASAN = 136 , USCRIPT_GRANTHA = 137 ,
  USCRIPT_KPELLE = 138 , USCRIPT_LOMA = 139 , USCRIPT_MENDE = 140 , USCRIPT_MEROITIC_CURSIVE = 141 ,
  USCRIPT_OLD_NORTH_ARABIAN = 142 , USCRIPT_NABATAEAN = 143 , USCRIPT_PALMYRENE = 144 , USCRIPT_KHUDAWADI = 145 ,
  USCRIPT_SINDHI = USCRIPT_KHUDAWADI , USCRIPT_WARANG_CITI = 146 , USCRIPT_AFAKA = 147 , USCRIPT_JURCHEN = 148 ,
  USCRIPT_MRO = 149 , USCRIPT_NUSHU = 150 , USCRIPT_SHARADA = 151 , USCRIPT_SORA_SOMPENG = 152 ,
  USCRIPT_TAKRI = 153 , USCRIPT_TANGUT = 154 , USCRIPT_WOLEAI = 155 , USCRIPT_ANATOLIAN_HIEROGLYPHS = 156 ,
  USCRIPT_KHOJKI = 157 , USCRIPT_TIRHUTA = 158 , USCRIPT_CAUCASIAN_ALBANIAN = 159 , USCRIPT_MAHAJANI = 160 ,
  USCRIPT_AHOM = 161 , USCRIPT_HATRAN = 162 , USCRIPT_MODI = 163 , USCRIPT_MULTANI = 164 ,
  USCRIPT_PAU_CIN_HAU = 165 , USCRIPT_SIDDHAM = 166 , USCRIPT_ADLAM = 167 , USCRIPT_BHAIKSUKI = 168 ,
  USCRIPT_MARCHEN = 169 , USCRIPT_NEWA = 170 , USCRIPT_OSAGE = 171 , USCRIPT_HAN_WITH_BOPOMOFO = 172 ,
  USCRIPT_JAMO = 173 , USCRIPT_SYMBOLS_EMOJI = 174 , USCRIPT_MASARAM_GONDI = 175 , USCRIPT_SOYOMBO = 176 ,
  USCRIPT_ZANABAZAR_SQUARE = 177 , USCRIPT_DOGRA = 178 , USCRIPT_GUNJALA_GONDI = 179 , USCRIPT_MAKASAR = 180 ,
  USCRIPT_MEDEFAIDRIN = 181 , USCRIPT_HANIFI_ROHINGYA = 182 , USCRIPT_SOGDIAN = 183 , USCRIPT_OLD_SOGDIAN = 184 ,
  USCRIPT_ELYMAIC = 185 , USCRIPT_NYIAKENG_PUACHUE_HMONG = 186 , USCRIPT_NANDINAGARI = 187 , USCRIPT_WANCHO = 188 ,
  USCRIPT_CHORASMIAN = 189 , USCRIPT_DIVES_AKURU = 190 , USCRIPT_KHITAN_SMALL_SCRIPT = 191 , USCRIPT_YEZIDI = 192 ,
  USCRIPT_CYPRO_MINOAN = 193 , USCRIPT_OLD_UYGHUR = 194 , USCRIPT_TANGSA = 195 , USCRIPT_TOTO = 196 ,
  USCRIPT_VITHKUQI = 197 , USCRIPT_KAWI = 198 , USCRIPT_NAG_MUNDARI = 199 , USCRIPT_ARABIC_NASTALIQ = 200 ,
  USCRIPT_GARAY = 201 , USCRIPT_GURUNG_KHEMA = 202 , USCRIPT_KIRAT_RAI = 203 , USCRIPT_OL_ONAL = 204 ,
  USCRIPT_SUNUWAR = 205 , USCRIPT_TODHRI = 206 , USCRIPT_TULU_TIGALARI = 207 , USCRIPT_CODE_LIMIT = 208
}
 Constants for ISO 15924 script codes. More...
 
enum  UScriptUsage {
  USCRIPT_USAGE_NOT_ENCODED , USCRIPT_USAGE_UNKNOWN , USCRIPT_USAGE_EXCLUDED , USCRIPT_USAGE_LIMITED_USE ,
  USCRIPT_USAGE_ASPIRATIONAL , USCRIPT_USAGE_RECOMMENDED
}
 Script usage constants. More...
 

Functions

U_CAPI int32_t uscript_getCode (const char *nameOrAbbrOrLocale, UScriptCode *fillIn, int32_t capacity, UErrorCode *err)
 Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
 
U_CAPI const char * uscript_getName (UScriptCode scriptCode)
 Returns the long Unicode script name, if there is one.
 
U_CAPI const char * uscript_getShortName (UScriptCode scriptCode)
 Returns the 4-letter ISO 15924 script code, which is the same as the short Unicode script name if Unicode has names for the script.
 
U_CAPI UScriptCode uscript_getScript (UChar32 codepoint, UErrorCode *err)
 Gets the script code associated with the given codepoint.
 
U_CAPI UBool uscript_hasScript (UChar32 c, UScriptCode sc)
 Do the Script_Extensions of code point c contain script sc? If c does not have explicit Script_Extensions, then this tests whether c has the Script property value sc.
 
U_CAPI int32_t uscript_getScriptExtensions (UChar32 c, UScriptCode *scripts, int32_t capacity, UErrorCode *errorCode)
 Writes code point c's Script_Extensions as a list of UScriptCode values to the output scripts array and returns the number of script codes.
 
U_CAPI int32_t uscript_getSampleString (UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode)
 Writes the script sample character string.
 
U_COMMON_API icu::UnicodeString uscript_getSampleUnicodeString (UScriptCode script)
 Returns the script sample character string.
 
U_CAPI UScriptUsage uscript_getUsage (UScriptCode script)
 Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
 
U_CAPI UBool uscript_isRightToLeft (UScriptCode script)
 Returns true if the script is written right-to-left.
 
U_CAPI UBool uscript_breaksBetweenLetters (UScriptCode script)
 Returns true if the script allows line breaks between letters (excluding hyphenation).
 
U_CAPI UBool uscript_isCased (UScriptCode script)
 Returns true if in modern (or most recent) usage of the script case distinctions are customary.
 

Detailed Description

C API: Unicode Script Information.

Definition in file uscript.h.

Typedef Documentation

◆ UScriptCode

typedef enum UScriptCode UScriptCode

Constants for ISO 15924 script codes.

The current set of script code constants supports at least all scripts that are encoded in the version of Unicode which ICU currently supports. The names of the constants are usually derived from the Unicode script property value aliases. See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .

In addition, constants for many ISO 15924 script codes are included, for use with language tags, CLDR data, and similar. Some of those codes are not used in the Unicode Character Database (UCD). For example, there are no characters that have a UCD script property value of Hans or Hant. All Han ideographs have the Hani script property value in Unicode.

Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.

Starting with ICU 55, script codes are only added when their scripts have been or will certainly be encoded in Unicode, and have been assigned Unicode script property value aliases, to ensure that their script names are stable and match the names of the constants. Script codes like Latf and Aran that are not subject to separate encoding may be added at any time.

Stable
ICU 2.2

◆ UScriptUsage

typedef enum UScriptUsage UScriptUsage

Script usage constants.

See UAX #31 Unicode Identifier and Pattern Syntax. http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers

Stable
ICU 51

Enumeration Type Documentation

◆ UScriptCode

Constants for ISO 15924 script codes.

The current set of script code constants supports at least all scripts that are encoded in the version of Unicode which ICU currently supports. The names of the constants are usually derived from the Unicode script property value aliases. See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .

In addition, constants for many ISO 15924 script codes are included, for use with language tags, CLDR data, and similar. Some of those codes are not used in the Unicode Character Database (UCD). For example, there are no characters that have a UCD script property value of Hans or Hant. All Han ideographs have the Hani script property value in Unicode.

Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.

Starting with ICU 55, script codes are only added when their scripts have been or will certainly be encoded in Unicode, and have been assigned Unicode script property value aliases, to ensure that their script names are stable and match the names of the constants. Script codes like Latf and Aran that are not subject to separate encoding may be added at any time.

Stable
ICU 2.2
Enumerator
USCRIPT_INVALID_CODE 
Stable
ICU 2.2
USCRIPT_COMMON 
Stable
ICU 2.2
USCRIPT_INHERITED 
Stable
ICU 2.2
USCRIPT_ARABIC 
Stable
ICU 2.2
USCRIPT_ARMENIAN 
Stable
ICU 2.2
USCRIPT_BENGALI 
Stable
ICU 2.2
USCRIPT_BOPOMOFO 
Stable
ICU 2.2
USCRIPT_CHEROKEE 
Stable
ICU 2.2
USCRIPT_COPTIC 
Stable
ICU 2.2
USCRIPT_CYRILLIC 
Stable
ICU 2.2
USCRIPT_DESERET 
Stable
ICU 2.2
USCRIPT_DEVANAGARI 
Stable
ICU 2.2
USCRIPT_ETHIOPIC 
Stable
ICU 2.2
USCRIPT_GEORGIAN 
Stable
ICU 2.2
USCRIPT_GOTHIC 
Stable
ICU 2.2
USCRIPT_GREEK 
Stable
ICU 2.2
USCRIPT_GUJARATI 
Stable
ICU 2.2
USCRIPT_GURMUKHI 
Stable
ICU 2.2
USCRIPT_HAN 
Stable
ICU 2.2
USCRIPT_HANGUL 
Stable
ICU 2.2
USCRIPT_HEBREW 
Stable
ICU 2.2
USCRIPT_HIRAGANA 
Stable
ICU 2.2
USCRIPT_KANNADA 
Stable
ICU 2.2
USCRIPT_KATAKANA 
Stable
ICU 2.2
USCRIPT_KHMER 
Stable
ICU 2.2
USCRIPT_LAO 
Stable
ICU 2.2
USCRIPT_LATIN 
Stable
ICU 2.2
USCRIPT_MALAYALAM 
Stable
ICU 2.2
USCRIPT_MONGOLIAN 
Stable
ICU 2.2
USCRIPT_MYANMAR 
Stable
ICU 2.2
USCRIPT_OGHAM 
Stable
ICU 2.2
USCRIPT_OLD_ITALIC 
Stable
ICU 2.2
USCRIPT_ORIYA 
Stable
ICU 2.2
USCRIPT_RUNIC 
Stable
ICU 2.2
USCRIPT_SINHALA 
Stable
ICU 2.2
USCRIPT_SYRIAC 
Stable
ICU 2.2
USCRIPT_TAMIL 
Stable
ICU 2.2
USCRIPT_TELUGU 
Stable
ICU 2.2
USCRIPT_THAANA 
Stable
ICU 2.2
USCRIPT_THAI 
Stable
ICU 2.2
USCRIPT_TIBETAN 
Stable
ICU 2.2
USCRIPT_CANADIAN_ABORIGINAL 

Canadian_Aboriginal script.

Stable
ICU 2.6
USCRIPT_UCAS 

Canadian_Aboriginal script (alias).

Stable
ICU 2.2
USCRIPT_YI 
Stable
ICU 2.2
USCRIPT_TAGALOG 
Stable
ICU 2.2
USCRIPT_HANUNOO 
Stable
ICU 2.2
USCRIPT_BUHID 
Stable
ICU 2.2
USCRIPT_TAGBANWA 
Stable
ICU 2.2
USCRIPT_BRAILLE 
Stable
ICU 2.6
USCRIPT_CYPRIOT 
Stable
ICU 2.6
USCRIPT_LIMBU 
Stable
ICU 2.6
USCRIPT_LINEAR_B 
Stable
ICU 2.6
USCRIPT_OSMANYA 
Stable
ICU 2.6
USCRIPT_SHAVIAN 
Stable
ICU 2.6
USCRIPT_TAI_LE 
Stable
ICU 2.6
USCRIPT_UGARITIC 
Stable
ICU 2.6
USCRIPT_KATAKANA_OR_HIRAGANA 

New script code in Unicode 4.0.1.

Stable
ICU 3.0
USCRIPT_BUGINESE 
Stable
ICU 3.4
USCRIPT_GLAGOLITIC 
Stable
ICU 3.4
USCRIPT_KHAROSHTHI 
Stable
ICU 3.4
USCRIPT_SYLOTI_NAGRI 
Stable
ICU 3.4
USCRIPT_NEW_TAI_LUE 
Stable
ICU 3.4
USCRIPT_TIFINAGH 
Stable
ICU 3.4
USCRIPT_OLD_PERSIAN 
Stable
ICU 3.4
USCRIPT_BALINESE 
Stable
ICU 3.6
USCRIPT_BATAK 
Stable
ICU 3.6
USCRIPT_BLISSYMBOLS 
Stable
ICU 3.6
USCRIPT_BRAHMI 
Stable
ICU 3.6
USCRIPT_CHAM 
Stable
ICU 3.6
USCRIPT_CIRTH 
Stable
ICU 3.6
USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC 
Stable
ICU 3.6
USCRIPT_DEMOTIC_EGYPTIAN 
Stable
ICU 3.6
USCRIPT_HIERATIC_EGYPTIAN 
Stable
ICU 3.6
USCRIPT_EGYPTIAN_HIEROGLYPHS 
Stable
ICU 3.6
USCRIPT_KHUTSURI 
Stable
ICU 3.6
USCRIPT_SIMPLIFIED_HAN 
Stable
ICU 3.6
USCRIPT_TRADITIONAL_HAN 
Stable
ICU 3.6
USCRIPT_PAHAWH_HMONG 
Stable
ICU 3.6
USCRIPT_OLD_HUNGARIAN 
Stable
ICU 3.6
USCRIPT_HARAPPAN_INDUS 
Stable
ICU 3.6
USCRIPT_JAVANESE 
Stable
ICU 3.6
USCRIPT_KAYAH_LI 
Stable
ICU 3.6
USCRIPT_LATIN_FRAKTUR 
Stable
ICU 3.6
USCRIPT_LATIN_GAELIC 
Stable
ICU 3.6
USCRIPT_LEPCHA 
Stable
ICU 3.6
USCRIPT_LINEAR_A 
Stable
ICU 3.6
USCRIPT_MANDAIC 
Stable
ICU 4.6
USCRIPT_MANDAEAN 
Stable
ICU 3.6
USCRIPT_MAYAN_HIEROGLYPHS 
Stable
ICU 3.6
USCRIPT_MEROITIC_HIEROGLYPHS 
Stable
ICU 4.6
USCRIPT_MEROITIC 
Stable
ICU 3.6
USCRIPT_NKO 
Stable
ICU 3.6
USCRIPT_ORKHON 
Stable
ICU 3.6
USCRIPT_OLD_PERMIC 
Stable
ICU 3.6
USCRIPT_PHAGS_PA 
Stable
ICU 3.6
USCRIPT_PHOENICIAN 
Stable
ICU 3.6
USCRIPT_MIAO 
Stable
ICU 52
USCRIPT_PHONETIC_POLLARD 
Stable
ICU 3.6
USCRIPT_RONGORONGO 
Stable
ICU 3.6
USCRIPT_SARATI 
Stable
ICU 3.6
USCRIPT_ESTRANGELO_SYRIAC 
Stable
ICU 3.6
USCRIPT_WESTERN_SYRIAC 
Stable
ICU 3.6
USCRIPT_EASTERN_SYRIAC 
Stable
ICU 3.6
USCRIPT_TENGWAR 
Stable
ICU 3.6
USCRIPT_VAI 
Stable
ICU 3.6
USCRIPT_VISIBLE_SPEECH 
Stable
ICU 3.6
USCRIPT_CUNEIFORM 
Stable
ICU 3.6
USCRIPT_UNWRITTEN_LANGUAGES 
Stable
ICU 3.6
USCRIPT_UNKNOWN 
Stable
ICU 3.6
USCRIPT_CARIAN 
Stable
ICU 3.8
USCRIPT_JAPANESE 
Stable
ICU 3.8
USCRIPT_LANNA 
Stable
ICU 3.8
USCRIPT_LYCIAN 
Stable
ICU 3.8
USCRIPT_LYDIAN 
Stable
ICU 3.8
USCRIPT_OL_CHIKI 
Stable
ICU 3.8
USCRIPT_REJANG 
Stable
ICU 3.8
USCRIPT_SAURASHTRA 
Stable
ICU 3.8
USCRIPT_SIGN_WRITING 

Sutton SignWriting.

Stable
ICU 3.8
USCRIPT_SUNDANESE 
Stable
ICU 3.8
USCRIPT_MOON 
Stable
ICU 3.8
USCRIPT_MEITEI_MAYEK 
Stable
ICU 3.8
USCRIPT_IMPERIAL_ARAMAIC 
Stable
ICU 4.0
USCRIPT_AVESTAN 
Stable
ICU 4.0
USCRIPT_CHAKMA 
Stable
ICU 4.0
USCRIPT_KOREAN 
Stable
ICU 4.0
USCRIPT_KAITHI 
Stable
ICU 4.0
USCRIPT_MANICHAEAN 
Stable
ICU 4.0
USCRIPT_INSCRIPTIONAL_PAHLAVI 
Stable
ICU 4.0
USCRIPT_PSALTER_PAHLAVI 
Stable
ICU 4.0
USCRIPT_BOOK_PAHLAVI 
Stable
ICU 4.0
USCRIPT_INSCRIPTIONAL_PARTHIAN 
Stable
ICU 4.0
USCRIPT_SAMARITAN 
Stable
ICU 4.0
USCRIPT_TAI_VIET 
Stable
ICU 4.0
USCRIPT_MATHEMATICAL_NOTATION 
Stable
ICU 4.0
USCRIPT_SYMBOLS 
Stable
ICU 4.0
USCRIPT_BAMUM 
Stable
ICU 4.4
USCRIPT_LISU 
Stable
ICU 4.4
USCRIPT_NAKHI_GEBA 
Stable
ICU 4.4
USCRIPT_OLD_SOUTH_ARABIAN 
Stable
ICU 4.4
USCRIPT_BASSA_VAH 
Stable
ICU 4.6
USCRIPT_DUPLOYAN 
Stable
ICU 54
USCRIPT_DUPLOYAN_SHORTAND 
Deprecated
ICU 54 Typo, use USCRIPT_DUPLOYAN
USCRIPT_ELBASAN 
Stable
ICU 4.6
USCRIPT_GRANTHA 
Stable
ICU 4.6
USCRIPT_KPELLE 
Stable
ICU 4.6
USCRIPT_LOMA 
Stable
ICU 4.6
USCRIPT_MENDE 

Mende Kikakui.

Stable
ICU 4.6
USCRIPT_MEROITIC_CURSIVE 
Stable
ICU 4.6
USCRIPT_OLD_NORTH_ARABIAN 
Stable
ICU 4.6
USCRIPT_NABATAEAN 
Stable
ICU 4.6
USCRIPT_PALMYRENE 
Stable
ICU 4.6
USCRIPT_KHUDAWADI 
Stable
ICU 54
USCRIPT_SINDHI 
Stable
ICU 4.6
USCRIPT_WARANG_CITI 
Stable
ICU 4.6
USCRIPT_AFAKA 
Stable
ICU 4.8
USCRIPT_JURCHEN 
Stable
ICU 4.8
USCRIPT_MRO 
Stable
ICU 4.8
USCRIPT_NUSHU 
Stable
ICU 4.8
USCRIPT_SHARADA 
Stable
ICU 4.8
USCRIPT_SORA_SOMPENG 
Stable
ICU 4.8
USCRIPT_TAKRI 
Stable
ICU 4.8
USCRIPT_TANGUT 
Stable
ICU 4.8
USCRIPT_WOLEAI 
Stable
ICU 4.8
USCRIPT_ANATOLIAN_HIEROGLYPHS 
Stable
ICU 49
USCRIPT_KHOJKI 
Stable
ICU 49
USCRIPT_TIRHUTA 
Stable
ICU 49
USCRIPT_CAUCASIAN_ALBANIAN 
Stable
ICU 52
USCRIPT_MAHAJANI 
Stable
ICU 52
USCRIPT_AHOM 
Stable
ICU 54
USCRIPT_HATRAN 
Stable
ICU 54
USCRIPT_MODI 
Stable
ICU 54
USCRIPT_MULTANI 
Stable
ICU 54
USCRIPT_PAU_CIN_HAU 
Stable
ICU 54
USCRIPT_SIDDHAM 
Stable
ICU 54
USCRIPT_ADLAM 
Stable
ICU 58
USCRIPT_BHAIKSUKI 
Stable
ICU 58
USCRIPT_MARCHEN 
Stable
ICU 58
USCRIPT_NEWA 
Stable
ICU 58
USCRIPT_OSAGE 
Stable
ICU 58
USCRIPT_HAN_WITH_BOPOMOFO 
Stable
ICU 58
USCRIPT_JAMO 
Stable
ICU 58
USCRIPT_SYMBOLS_EMOJI 
Stable
ICU 58
USCRIPT_MASARAM_GONDI 
Stable
ICU 60
USCRIPT_SOYOMBO 
Stable
ICU 60
USCRIPT_ZANABAZAR_SQUARE 
Stable
ICU 60
USCRIPT_DOGRA 
Stable
ICU 62
USCRIPT_GUNJALA_GONDI 
Stable
ICU 62
USCRIPT_MAKASAR 
Stable
ICU 62
USCRIPT_MEDEFAIDRIN 
Stable
ICU 62
USCRIPT_HANIFI_ROHINGYA 
Stable
ICU 62
USCRIPT_SOGDIAN 
Stable
ICU 62
USCRIPT_OLD_SOGDIAN 
Stable
ICU 62
USCRIPT_ELYMAIC 
Stable
ICU 64
USCRIPT_NYIAKENG_PUACHUE_HMONG 
Stable
ICU 64
USCRIPT_NANDINAGARI 
Stable
ICU 64
USCRIPT_WANCHO 
Stable
ICU 64
USCRIPT_CHORASMIAN 
Stable
ICU 66
USCRIPT_DIVES_AKURU 
Stable
ICU 66
USCRIPT_KHITAN_SMALL_SCRIPT 
Stable
ICU 66
USCRIPT_YEZIDI 
Stable
ICU 66
USCRIPT_CYPRO_MINOAN 
Stable
ICU 70
USCRIPT_OLD_UYGHUR 
Stable
ICU 70
USCRIPT_TANGSA 
Stable
ICU 70
USCRIPT_TOTO 
Stable
ICU 70
USCRIPT_VITHKUQI 
Stable
ICU 70
USCRIPT_KAWI 
Stable
ICU 72
USCRIPT_NAG_MUNDARI 
Stable
ICU 72
USCRIPT_ARABIC_NASTALIQ 
Stable
ICU 75
USCRIPT_GARAY 
Stable
ICU 76
USCRIPT_GURUNG_KHEMA 
Stable
ICU 76
USCRIPT_KIRAT_RAI 
Stable
ICU 76
USCRIPT_OL_ONAL 
Stable
ICU 76
USCRIPT_SUNUWAR 
Stable
ICU 76
USCRIPT_TODHRI 
Stable
ICU 76
USCRIPT_TULU_TIGALARI 
Stable
ICU 76
USCRIPT_CODE_LIMIT 

One more than the highest normal UScriptCode value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT).

Deprecated
ICU 58 The numeric value may change over time, see ICU ticket #12420.

Definition at line 54 of file uscript.h.

◆ UScriptUsage

Script usage constants.

See UAX #31 Unicode Identifier and Pattern Syntax. http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers

Stable
ICU 51
Enumerator
USCRIPT_USAGE_NOT_ENCODED 

Not encoded in Unicode.

Stable
ICU 51
USCRIPT_USAGE_UNKNOWN 

Unknown script usage.

Stable
ICU 51
USCRIPT_USAGE_EXCLUDED 

Candidate for Exclusion from Identifiers.

Stable
ICU 51
USCRIPT_USAGE_LIMITED_USE 

Limited Use script.

Stable
ICU 51
USCRIPT_USAGE_ASPIRATIONAL 

Aspirational Use script.

Stable
ICU 51
USCRIPT_USAGE_RECOMMENDED 

Recommended script.

Stable
ICU 51

Definition at line 646 of file uscript.h.

Function Documentation

◆ uscript_breaksBetweenLetters()

U_CAPI UBool uscript_breaksBetweenLetters ( UScriptCode script)

Returns true if the script allows line breaks between letters (excluding hyphenation).

Such a script typically requires dictionary-based line breaking. For example, Hani and Thai.

Parameters
scriptscript code
Returns
true if the script allows line breaks between letters
Stable
ICU 51

◆ uscript_getCode()

U_CAPI int32_t uscript_getCode ( const char * nameOrAbbrOrLocale,
UScriptCode * fillIn,
int32_t capacity,
UErrorCode * err )

Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.

Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". Fills in USCRIPT_LATIN given "en" OR "en_US" If the required capacity is greater than the capacity of the destination buffer, then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.

Note: To search by short or long script alias only, use u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does a fast lookup with no access of the locale data.

Parameters
nameOrAbbrOrLocalename of the script, as given in PropertyValueAliases.txt, or ISO 15924 code or locale
fillInthe UScriptCode buffer to fill in the script code
capacitythe capacity (size) of UScriptCode buffer passed in.
errthe error status code.
Returns
The number of script codes filled in the buffer passed in
Stable
ICU 2.4

◆ uscript_getName()

U_CAPI const char * uscript_getName ( UScriptCode scriptCode)

Returns the long Unicode script name, if there is one.

Otherwise returns the 4-letter ISO 15924 script code. Returns "Malayam" given USCRIPT_MALAYALAM.

Parameters
scriptCodeUScriptCode enum
Returns
long script name as given in PropertyValueAliases.txt, or the 4-letter code, or NULL if scriptCode is invalid
Stable
ICU 2.4

◆ uscript_getSampleString()

U_CAPI int32_t uscript_getSampleString ( UScriptCode script,
UChar * dest,
int32_t capacity,
UErrorCode * pErrorCode )

Writes the script sample character string.

This string normally consists of one code point but might be longer. The string is empty if the script is not encoded.

Parameters
scriptscript code
destoutput string array
capacitynumber of UChars in the dest array
pErrorCodestandard ICU in/out error code, must pass U_SUCCESS() on input
Returns
the string length, even if U_BUFFER_OVERFLOW_ERROR
Stable
ICU 51

◆ uscript_getSampleUnicodeString()

U_COMMON_API icu::UnicodeString uscript_getSampleUnicodeString ( UScriptCode script)

Returns the script sample character string.

This string normally consists of one code point but might be longer. The string is empty if the script is not encoded.

Parameters
scriptscript code
Returns
the sample character string
Stable
ICU 51

◆ uscript_getScript()

U_CAPI UScriptCode uscript_getScript ( UChar32 codepoint,
UErrorCode * err )

Gets the script code associated with the given codepoint.

Returns USCRIPT_MALAYALAM given 0x0D02

Parameters
codepointUChar32 codepoint
errthe error status code.
Returns
The UScriptCode, or 0 if codepoint is invalid
Stable
ICU 2.4

◆ uscript_getScriptExtensions()

U_CAPI int32_t uscript_getScriptExtensions ( UChar32 c,
UScriptCode * scripts,
int32_t capacity,
UErrorCode * errorCode )

Writes code point c's Script_Extensions as a list of UScriptCode values to the output scripts array and returns the number of script codes.

  • If c does have Script_Extensions, then the Script property value (normally Common or Inherited) is not included.
  • If c does not have Script_Extensions, then the one Script code is written to the output array.
  • If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written. In other words, if the return value is 1, then the output array contains exactly c's single Script code. If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.

Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/.

If there are more than capacity script codes to be written, then U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned. (Usual ICU buffer handling behavior.)

Parameters
ccode point
scriptsoutput script code array
capacitycapacity of the scripts array
errorCodeStandard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
number of script codes in c's Script_Extensions, or 1 for the single Script value, written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
Stable
ICU 49

◆ uscript_getShortName()

U_CAPI const char * uscript_getShortName ( UScriptCode scriptCode)

Returns the 4-letter ISO 15924 script code, which is the same as the short Unicode script name if Unicode has names for the script.

Returns "Mlym" given USCRIPT_MALAYALAM.

Parameters
scriptCodeUScriptCode enum
Returns
short script name (4-letter code), or NULL if scriptCode is invalid
Stable
ICU 2.4

◆ uscript_getUsage()

U_CAPI UScriptUsage uscript_getUsage ( UScriptCode script)

Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.

Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.

Parameters
scriptscript code
Returns
script usage
See also
UScriptUsage
Stable
ICU 51

◆ uscript_hasScript()

U_CAPI UBool uscript_hasScript ( UChar32 c,
UScriptCode sc )

Do the Script_Extensions of code point c contain script sc? If c does not have explicit Script_Extensions, then this tests whether c has the Script property value sc.

Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/.

Parameters
ccode point
scscript code
Returns
true if sc is in Script_Extensions(c)
Stable
ICU 49

◆ uscript_isCased()

U_CAPI UBool uscript_isCased ( UScriptCode script)

Returns true if in modern (or most recent) usage of the script case distinctions are customary.

For example, Latn and Cyrl.

Parameters
scriptscript code
Returns
true if the script is cased
Stable
ICU 51

◆ uscript_isRightToLeft()

U_CAPI UBool uscript_isRightToLeft ( UScriptCode script)

Returns true if the script is written right-to-left.

For example, Arab and Hebr.

Parameters
scriptscript code
Returns
true if the script is right-to-left
Stable
ICU 51