ICU 50.1.2  50.1.2
Typedefs | Enumerations | Functions
uscript.h File Reference

C API: Unicode Script Information. More...

#include "unicode/utypes.h"

Go to the source code of this file.

Typedefs

typedef enum UScriptCode UScriptCode
 Constants for ISO 15924 script codes. More...
 

Enumerations

enum  UScriptCode {
  USCRIPT_INVALID_CODE = -1, USCRIPT_COMMON = 0, USCRIPT_INHERITED = 1, USCRIPT_ARABIC = 2,
  USCRIPT_ARMENIAN = 3, USCRIPT_BENGALI = 4, USCRIPT_BOPOMOFO = 5, USCRIPT_CHEROKEE = 6,
  USCRIPT_COPTIC = 7, USCRIPT_CYRILLIC = 8, USCRIPT_DESERET = 9, USCRIPT_DEVANAGARI = 10,
  USCRIPT_ETHIOPIC = 11, USCRIPT_GEORGIAN = 12, USCRIPT_GOTHIC = 13, USCRIPT_GREEK = 14,
  USCRIPT_GUJARATI = 15, USCRIPT_GURMUKHI = 16, USCRIPT_HAN = 17, USCRIPT_HANGUL = 18,
  USCRIPT_HEBREW = 19, USCRIPT_HIRAGANA = 20, USCRIPT_KANNADA = 21, USCRIPT_KATAKANA = 22,
  USCRIPT_KHMER = 23, USCRIPT_LAO = 24, USCRIPT_LATIN = 25, USCRIPT_MALAYALAM = 26,
  USCRIPT_MONGOLIAN = 27, USCRIPT_MYANMAR = 28, USCRIPT_OGHAM = 29, USCRIPT_OLD_ITALIC = 30,
  USCRIPT_ORIYA = 31, USCRIPT_RUNIC = 32, USCRIPT_SINHALA = 33, USCRIPT_SYRIAC = 34,
  USCRIPT_TAMIL = 35, USCRIPT_TELUGU = 36, USCRIPT_THAANA = 37, USCRIPT_THAI = 38,
  USCRIPT_TIBETAN = 39, USCRIPT_CANADIAN_ABORIGINAL = 40, USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL, USCRIPT_YI = 41,
  USCRIPT_TAGALOG = 42, USCRIPT_HANUNOO = 43, USCRIPT_BUHID = 44, USCRIPT_TAGBANWA = 45,
  USCRIPT_BRAILLE = 46, USCRIPT_CYPRIOT = 47, USCRIPT_LIMBU = 48, USCRIPT_LINEAR_B = 49,
  USCRIPT_OSMANYA = 50, USCRIPT_SHAVIAN = 51, USCRIPT_TAI_LE = 52, USCRIPT_UGARITIC = 53,
  USCRIPT_KATAKANA_OR_HIRAGANA = 54, USCRIPT_BUGINESE = 55, USCRIPT_GLAGOLITIC = 56, USCRIPT_KHAROSHTHI = 57,
  USCRIPT_SYLOTI_NAGRI = 58, USCRIPT_NEW_TAI_LUE = 59, USCRIPT_TIFINAGH = 60, USCRIPT_OLD_PERSIAN = 61,
  USCRIPT_BALINESE = 62, USCRIPT_BATAK = 63, USCRIPT_BLISSYMBOLS = 64, USCRIPT_BRAHMI = 65,
  USCRIPT_CHAM = 66, USCRIPT_CIRTH = 67, USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, USCRIPT_DEMOTIC_EGYPTIAN = 69,
  USCRIPT_HIERATIC_EGYPTIAN = 70, USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, USCRIPT_KHUTSURI = 72, USCRIPT_SIMPLIFIED_HAN = 73,
  USCRIPT_TRADITIONAL_HAN = 74, USCRIPT_PAHAWH_HMONG = 75, USCRIPT_OLD_HUNGARIAN = 76, USCRIPT_HARAPPAN_INDUS = 77,
  USCRIPT_JAVANESE = 78, USCRIPT_KAYAH_LI = 79, USCRIPT_LATIN_FRAKTUR = 80, USCRIPT_LATIN_GAELIC = 81,
  USCRIPT_LEPCHA = 82, USCRIPT_LINEAR_A = 83, USCRIPT_MANDAIC = 84, USCRIPT_MANDAEAN = USCRIPT_MANDAIC,
  USCRIPT_MAYAN_HIEROGLYPHS = 85, USCRIPT_MEROITIC_HIEROGLYPHS = 86, USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS, USCRIPT_NKO = 87,
  USCRIPT_ORKHON = 88, USCRIPT_OLD_PERMIC = 89, USCRIPT_PHAGS_PA = 90, USCRIPT_PHOENICIAN = 91,
  USCRIPT_PHONETIC_POLLARD = 92, USCRIPT_RONGORONGO = 93, USCRIPT_SARATI = 94, USCRIPT_ESTRANGELO_SYRIAC = 95,
  USCRIPT_WESTERN_SYRIAC = 96, USCRIPT_EASTERN_SYRIAC = 97, USCRIPT_TENGWAR = 98, USCRIPT_VAI = 99,
  USCRIPT_VISIBLE_SPEECH = 100, USCRIPT_CUNEIFORM = 101, USCRIPT_UNWRITTEN_LANGUAGES = 102, USCRIPT_UNKNOWN = 103,
  USCRIPT_CARIAN = 104, USCRIPT_JAPANESE = 105, USCRIPT_LANNA = 106, USCRIPT_LYCIAN = 107,
  USCRIPT_LYDIAN = 108, USCRIPT_OL_CHIKI = 109, USCRIPT_REJANG = 110, USCRIPT_SAURASHTRA = 111,
  USCRIPT_SIGN_WRITING = 112, USCRIPT_SUNDANESE = 113, USCRIPT_MOON = 114, USCRIPT_MEITEI_MAYEK = 115,
  USCRIPT_IMPERIAL_ARAMAIC = 116, USCRIPT_AVESTAN = 117, USCRIPT_CHAKMA = 118, USCRIPT_KOREAN = 119,
  USCRIPT_KAITHI = 120, USCRIPT_MANICHAEAN = 121, USCRIPT_INSCRIPTIONAL_PAHLAVI = 122, USCRIPT_PSALTER_PAHLAVI = 123,
  USCRIPT_BOOK_PAHLAVI = 124, USCRIPT_INSCRIPTIONAL_PARTHIAN = 125, USCRIPT_SAMARITAN = 126, USCRIPT_TAI_VIET = 127,
  USCRIPT_MATHEMATICAL_NOTATION = 128, USCRIPT_SYMBOLS = 129, USCRIPT_BAMUM = 130, USCRIPT_LISU = 131,
  USCRIPT_NAKHI_GEBA = 132, USCRIPT_OLD_SOUTH_ARABIAN = 133, USCRIPT_BASSA_VAH = 134, USCRIPT_DUPLOYAN_SHORTAND = 135,
  USCRIPT_ELBASAN = 136, USCRIPT_GRANTHA = 137, USCRIPT_KPELLE = 138, USCRIPT_LOMA = 139,
  USCRIPT_MENDE = 140, USCRIPT_MEROITIC_CURSIVE = 141, USCRIPT_OLD_NORTH_ARABIAN = 142, USCRIPT_NABATAEAN = 143,
  USCRIPT_PALMYRENE = 144, USCRIPT_SINDHI = 145, USCRIPT_WARANG_CITI = 146, USCRIPT_AFAKA = 147,
  USCRIPT_JURCHEN = 148, USCRIPT_MRO = 149, USCRIPT_NUSHU = 150, USCRIPT_SHARADA = 151,
  USCRIPT_SORA_SOMPENG = 152, USCRIPT_TAKRI = 153, USCRIPT_TANGUT = 154, USCRIPT_WOLEAI = 155,
  USCRIPT_ANATOLIAN_HIEROGLYPHS = 156, USCRIPT_KHOJKI = 157, USCRIPT_TIRHUTA = 158, USCRIPT_CODE_LIMIT = 159
}
 Constants for ISO 15924 script codes. More...
 

Functions

int32_t uscript_getCode (const char *nameOrAbbrOrLocale, UScriptCode *fillIn, int32_t capacity, UErrorCode *err)
 Gets script codes associated with the given locale or ISO 15924 abbreviation or name. More...
 
const char * uscript_getName (UScriptCode scriptCode)
 Gets a script name associated with the given script code. More...
 
const char * uscript_getShortName (UScriptCode scriptCode)
 Gets a script name associated with the given script code. More...
 
UScriptCode uscript_getScript (UChar32 codepoint, UErrorCode *err)
 Gets the script code associated with the given codepoint. More...
 
UBool uscript_hasScript (UChar32 c, UScriptCode sc)
 Do the Script_Extensions of code point c contain script sc? If c does not have explicit Script_Extensions, then this tests whether c has the Script property value sc. More...
 
int32_t uscript_getScriptExtensions (UChar32 c, UScriptCode *scripts, int32_t capacity, UErrorCode *errorCode)
 Writes code point c's Script_Extensions as a list of UScriptCode values to the output scripts array and returns the number of script codes. More...
 

Detailed Description

C API: Unicode Script Information.

Definition in file uscript.h.

Typedef Documentation

typedef enum UScriptCode UScriptCode

Constants for ISO 15924 script codes.

Many of these script codes - those from Unicode's ScriptNames.txt - are character property values for Unicode's Script property. See UAX #24 Script Names (http://www.unicode.org/reports/tr24/).

Starting with ICU 3.6, constants for most ISO 15924 script codes are included (currently excluding private-use codes Qaaa..Qabx). For scripts for which there are codes in ISO 15924 but which are not used in the Unicode Character Database (UCD), there are no Unicode characters associated with those scripts.

For example, there are no characters that have a UCD script code of Hans or Hant. All Han ideographs have the Hani script code. The Hans and Hant script codes are used with CLDR data.

ISO 15924 script codes are included for use with CLDR and similar.

Stable:
ICU 2.2

Enumeration Type Documentation

Constants for ISO 15924 script codes.

Many of these script codes - those from Unicode's ScriptNames.txt - are character property values for Unicode's Script property. See UAX #24 Script Names (http://www.unicode.org/reports/tr24/).

Starting with ICU 3.6, constants for most ISO 15924 script codes are included (currently excluding private-use codes Qaaa..Qabx). For scripts for which there are codes in ISO 15924 but which are not used in the Unicode Character Database (UCD), there are no Unicode characters associated with those scripts.

For example, there are no characters that have a UCD script code of Hans or Hant. All Han ideographs have the Hani script code. The Hans and Hant script codes are used with CLDR data.

ISO 15924 script codes are included for use with CLDR and similar.

Stable:
ICU 2.2
Enumerator
USCRIPT_INVALID_CODE 
Stable:
ICU 2.2
USCRIPT_COMMON 
Stable:
ICU 2.2
USCRIPT_INHERITED 
Stable:
ICU 2.2
USCRIPT_ARABIC 
Stable:
ICU 2.2
USCRIPT_ARMENIAN 
Stable:
ICU 2.2
USCRIPT_BENGALI 
Stable:
ICU 2.2
USCRIPT_BOPOMOFO 
Stable:
ICU 2.2
USCRIPT_CHEROKEE 
Stable:
ICU 2.2
USCRIPT_COPTIC 
Stable:
ICU 2.2
USCRIPT_CYRILLIC 
Stable:
ICU 2.2
USCRIPT_DESERET 
Stable:
ICU 2.2
USCRIPT_DEVANAGARI 
Stable:
ICU 2.2
USCRIPT_ETHIOPIC 
Stable:
ICU 2.2
USCRIPT_GEORGIAN 
Stable:
ICU 2.2
USCRIPT_GOTHIC 
Stable:
ICU 2.2
USCRIPT_GREEK 
Stable:
ICU 2.2
USCRIPT_GUJARATI 
Stable:
ICU 2.2
USCRIPT_GURMUKHI 
Stable:
ICU 2.2
USCRIPT_HAN 
Stable:
ICU 2.2
USCRIPT_HANGUL 
Stable:
ICU 2.2
USCRIPT_HEBREW 
Stable:
ICU 2.2
USCRIPT_HIRAGANA 
Stable:
ICU 2.2
USCRIPT_KANNADA 
Stable:
ICU 2.2
USCRIPT_KATAKANA 
Stable:
ICU 2.2
USCRIPT_KHMER 
Stable:
ICU 2.2
USCRIPT_LAO 
Stable:
ICU 2.2
USCRIPT_LATIN 
Stable:
ICU 2.2
USCRIPT_MALAYALAM 
Stable:
ICU 2.2
USCRIPT_MONGOLIAN 
Stable:
ICU 2.2
USCRIPT_MYANMAR 
Stable:
ICU 2.2
USCRIPT_OGHAM 
Stable:
ICU 2.2
USCRIPT_OLD_ITALIC 
Stable:
ICU 2.2
USCRIPT_ORIYA 
Stable:
ICU 2.2
USCRIPT_RUNIC 
Stable:
ICU 2.2
USCRIPT_SINHALA 
Stable:
ICU 2.2
USCRIPT_SYRIAC 
Stable:
ICU 2.2
USCRIPT_TAMIL 
Stable:
ICU 2.2
USCRIPT_TELUGU 
Stable:
ICU 2.2
USCRIPT_THAANA 
Stable:
ICU 2.2
USCRIPT_THAI 
Stable:
ICU 2.2
USCRIPT_TIBETAN 
Stable:
ICU 2.2
USCRIPT_CANADIAN_ABORIGINAL 

Canadian_Aboriginal script.

Stable:
ICU 2.6
USCRIPT_UCAS 

Canadian_Aboriginal script (alias).

Stable:
ICU 2.2
USCRIPT_YI 
Stable:
ICU 2.2
USCRIPT_TAGALOG 
Stable:
ICU 2.2
USCRIPT_HANUNOO 
Stable:
ICU 2.2
USCRIPT_BUHID 
Stable:
ICU 2.2
USCRIPT_TAGBANWA 
Stable:
ICU 2.2
USCRIPT_BRAILLE 
Stable:
ICU 2.6
USCRIPT_CYPRIOT 
Stable:
ICU 2.6
USCRIPT_LIMBU 
Stable:
ICU 2.6
USCRIPT_LINEAR_B 
Stable:
ICU 2.6
USCRIPT_OSMANYA 
Stable:
ICU 2.6
USCRIPT_SHAVIAN 
Stable:
ICU 2.6
USCRIPT_TAI_LE 
Stable:
ICU 2.6
USCRIPT_UGARITIC 
Stable:
ICU 2.6
USCRIPT_KATAKANA_OR_HIRAGANA 

New script code in Unicode 4.0.1.

Stable:
ICU 3.0
USCRIPT_BUGINESE 
Stable:
ICU 3.4
USCRIPT_GLAGOLITIC 
Stable:
ICU 3.4
USCRIPT_KHAROSHTHI 
Stable:
ICU 3.4
USCRIPT_SYLOTI_NAGRI 
Stable:
ICU 3.4
USCRIPT_NEW_TAI_LUE 
Stable:
ICU 3.4
USCRIPT_TIFINAGH 
Stable:
ICU 3.4
USCRIPT_OLD_PERSIAN 
Stable:
ICU 3.4
USCRIPT_BALINESE 
Stable:
ICU 3.6
USCRIPT_BATAK 
Stable:
ICU 3.6
USCRIPT_BLISSYMBOLS 
Stable:
ICU 3.6
USCRIPT_BRAHMI 
Stable:
ICU 3.6
USCRIPT_CHAM 
Stable:
ICU 3.6
USCRIPT_CIRTH 
Stable:
ICU 3.6
USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC 
Stable:
ICU 3.6
USCRIPT_DEMOTIC_EGYPTIAN 
Stable:
ICU 3.6
USCRIPT_HIERATIC_EGYPTIAN 
Stable:
ICU 3.6
USCRIPT_EGYPTIAN_HIEROGLYPHS 
Stable:
ICU 3.6
USCRIPT_KHUTSURI 
Stable:
ICU 3.6
USCRIPT_SIMPLIFIED_HAN 
Stable:
ICU 3.6
USCRIPT_TRADITIONAL_HAN 
Stable:
ICU 3.6
USCRIPT_PAHAWH_HMONG 
Stable:
ICU 3.6
USCRIPT_OLD_HUNGARIAN 
Stable:
ICU 3.6
USCRIPT_HARAPPAN_INDUS 
Stable:
ICU 3.6
USCRIPT_JAVANESE 
Stable:
ICU 3.6
USCRIPT_KAYAH_LI 
Stable:
ICU 3.6
USCRIPT_LATIN_FRAKTUR 
Stable:
ICU 3.6
USCRIPT_LATIN_GAELIC 
Stable:
ICU 3.6
USCRIPT_LEPCHA 
Stable:
ICU 3.6
USCRIPT_LINEAR_A 
Stable:
ICU 3.6
USCRIPT_MANDAIC 
Stable:
ICU 4.6
USCRIPT_MANDAEAN 
Stable:
ICU 3.6
USCRIPT_MAYAN_HIEROGLYPHS 
Stable:
ICU 3.6
USCRIPT_MEROITIC_HIEROGLYPHS 
Stable:
ICU 4.6
USCRIPT_MEROITIC 
Stable:
ICU 3.6
USCRIPT_NKO 
Stable:
ICU 3.6
USCRIPT_ORKHON 
Stable:
ICU 3.6
USCRIPT_OLD_PERMIC 
Stable:
ICU 3.6
USCRIPT_PHAGS_PA 
Stable:
ICU 3.6
USCRIPT_PHOENICIAN 
Stable:
ICU 3.6
USCRIPT_PHONETIC_POLLARD 
Stable:
ICU 3.6
USCRIPT_RONGORONGO 
Stable:
ICU 3.6
USCRIPT_SARATI 
Stable:
ICU 3.6
USCRIPT_ESTRANGELO_SYRIAC 
Stable:
ICU 3.6
USCRIPT_WESTERN_SYRIAC 
Stable:
ICU 3.6
USCRIPT_EASTERN_SYRIAC 
Stable:
ICU 3.6
USCRIPT_TENGWAR 
Stable:
ICU 3.6
USCRIPT_VAI 
Stable:
ICU 3.6
USCRIPT_VISIBLE_SPEECH 
Stable:
ICU 3.6
USCRIPT_CUNEIFORM 
Stable:
ICU 3.6
USCRIPT_UNWRITTEN_LANGUAGES 
Stable:
ICU 3.6
USCRIPT_UNKNOWN 
Stable:
ICU 3.6
USCRIPT_CARIAN 
Stable:
ICU 3.8
USCRIPT_JAPANESE 
Stable:
ICU 3.8
USCRIPT_LANNA 
Stable:
ICU 3.8
USCRIPT_LYCIAN 
Stable:
ICU 3.8
USCRIPT_LYDIAN 
Stable:
ICU 3.8
USCRIPT_OL_CHIKI 
Stable:
ICU 3.8
USCRIPT_REJANG 
Stable:
ICU 3.8
USCRIPT_SAURASHTRA 
Stable:
ICU 3.8
USCRIPT_SIGN_WRITING 
Stable:
ICU 3.8
USCRIPT_SUNDANESE 
Stable:
ICU 3.8
USCRIPT_MOON 
Stable:
ICU 3.8
USCRIPT_MEITEI_MAYEK 
Stable:
ICU 3.8
USCRIPT_IMPERIAL_ARAMAIC 
Stable:
ICU 4.0
USCRIPT_AVESTAN 
Stable:
ICU 4.0
USCRIPT_CHAKMA 
Stable:
ICU 4.0
USCRIPT_KOREAN 
Stable:
ICU 4.0
USCRIPT_KAITHI 
Stable:
ICU 4.0
USCRIPT_MANICHAEAN 
Stable:
ICU 4.0
USCRIPT_INSCRIPTIONAL_PAHLAVI 
Stable:
ICU 4.0
USCRIPT_PSALTER_PAHLAVI 
Stable:
ICU 4.0
USCRIPT_BOOK_PAHLAVI 
Stable:
ICU 4.0
USCRIPT_INSCRIPTIONAL_PARTHIAN 
Stable:
ICU 4.0
USCRIPT_SAMARITAN 
Stable:
ICU 4.0
USCRIPT_TAI_VIET 
Stable:
ICU 4.0
USCRIPT_MATHEMATICAL_NOTATION 
Stable:
ICU 4.0
USCRIPT_SYMBOLS 
Stable:
ICU 4.0
USCRIPT_BAMUM 
Stable:
ICU 4.4
USCRIPT_LISU 
Stable:
ICU 4.4
USCRIPT_NAKHI_GEBA 
Stable:
ICU 4.4
USCRIPT_OLD_SOUTH_ARABIAN 
Stable:
ICU 4.4
USCRIPT_BASSA_VAH 
Stable:
ICU 4.6
USCRIPT_DUPLOYAN_SHORTAND 
Stable:
ICU 4.6
USCRIPT_ELBASAN 
Stable:
ICU 4.6
USCRIPT_GRANTHA 
Stable:
ICU 4.6
USCRIPT_KPELLE 
Stable:
ICU 4.6
USCRIPT_LOMA 
Stable:
ICU 4.6
USCRIPT_MENDE 
Stable:
ICU 4.6
USCRIPT_MEROITIC_CURSIVE 
Stable:
ICU 4.6
USCRIPT_OLD_NORTH_ARABIAN 
Stable:
ICU 4.6
USCRIPT_NABATAEAN 
Stable:
ICU 4.6
USCRIPT_PALMYRENE 
Stable:
ICU 4.6
USCRIPT_SINDHI 
Stable:
ICU 4.6
USCRIPT_WARANG_CITI 
Stable:
ICU 4.6
USCRIPT_AFAKA 
Stable:
ICU 4.8
USCRIPT_JURCHEN 
Stable:
ICU 4.8
USCRIPT_MRO 
Stable:
ICU 4.8
USCRIPT_NUSHU 
Stable:
ICU 4.8
USCRIPT_SHARADA 
Stable:
ICU 4.8
USCRIPT_SORA_SOMPENG 
Stable:
ICU 4.8
USCRIPT_TAKRI 
Stable:
ICU 4.8
USCRIPT_TANGUT 
Stable:
ICU 4.8
USCRIPT_WOLEAI 
Stable:
ICU 4.8
USCRIPT_ANATOLIAN_HIEROGLYPHS 
Stable:
ICU 49
USCRIPT_KHOJKI 
Stable:
ICU 49
USCRIPT_TIRHUTA 
Stable:
ICU 49
USCRIPT_CODE_LIMIT 
Stable:
ICU 2.2

Definition at line 46 of file uscript.h.

Function Documentation

int32_t uscript_getCode ( const char *  nameOrAbbrOrLocale,
UScriptCode fillIn,
int32_t  capacity,
UErrorCode err 
)

Gets script codes associated with the given locale or ISO 15924 abbreviation or name.

Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". Fills in USCRIPT_LATIN given "en" OR "en_US" If required capacity is greater than capacity of the destination buffer then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned

Note: To search by short or long script alias only, use u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. This does a fast lookup with no access of the locale data.

Parameters
nameOrAbbrOrLocalename of the script, as given in PropertyValueAliases.txt, or ISO 15924 code or locale
fillInthe UScriptCode buffer to fill in the script code
capacitythe capacity (size) fo UScriptCode buffer passed in.
errthe error status code.
Returns
The number of script codes filled in the buffer passed in
Stable:
ICU 2.4
const char* uscript_getName ( UScriptCode  scriptCode)

Gets a script name associated with the given script code.

Returns "Malayam" given USCRIPT_MALAYALAM

Parameters
scriptCodeUScriptCode enum
Returns
script long name as given in PropertyValueAliases.txt, or NULL if scriptCode is invalid
Stable:
ICU 2.4
UScriptCode uscript_getScript ( UChar32  codepoint,
UErrorCode err 
)

Gets the script code associated with the given codepoint.

Returns USCRIPT_MALAYALAM given 0x0D02

Parameters
codepointUChar32 codepoint
errthe error status code.
Returns
The UScriptCode, or 0 if codepoint is invalid
Stable:
ICU 2.4
int32_t uscript_getScriptExtensions ( UChar32  c,
UScriptCode scripts,
int32_t  capacity,
UErrorCode errorCode 
)

Writes code point c's Script_Extensions as a list of UScriptCode values to the output scripts array and returns the number of script codes.

  • If c does have Script_Extensions, then the Script property value (normally Common or Inherited) is not included.
  • If c does not have Script_Extensions, then the one Script code is written to the output array.
  • If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written. In other words, if the return value is 1, then the output array contains exactly c's single Script code. If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.

Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/.

If there are more than capacity script codes to be written, then U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned. (Usual ICU buffer handling behavior.)

The Script_Extensions property is provisional. It may be modified or removed in future versions of the Unicode Standard, and thus in ICU.

Parameters
ccode point
scriptsoutput script code array
capacitycapacity of the scripts array
errorCodeStandard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)
Returns
number of script codes in c's Script_Extensions, or 1 for the single Script value, written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
Draft:
This API may be changed in the future versions and was introduced in ICU 49
const char* uscript_getShortName ( UScriptCode  scriptCode)

Gets a script name associated with the given script code.

Returns "Mlym" given USCRIPT_MALAYALAM

Parameters
scriptCodeUScriptCode enum
Returns
script abbreviated name as given in PropertyValueAliases.txt, or NULL if scriptCode is invalid
Stable:
ICU 2.4
UBool uscript_hasScript ( UChar32  c,
UScriptCode  sc 
)

Do the Script_Extensions of code point c contain script sc? If c does not have explicit Script_Extensions, then this tests whether c has the Script property value sc.

Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/.

The Script_Extensions property is provisional. It may be modified or removed in future versions of the Unicode Standard, and thus in ICU.

Parameters
ccode point
scscript code
Returns
TRUE if sc is in Script_Extensions(c)
Draft:
This API may be changed in the future versions and was introduced in ICU 49