module Unicode
Constants
- VERSION
Public Class Methods
Source
VALUE unicode_get_abbr_categories(VALUE obj, VALUE str) { WString wstr; get_categories_param param = { &wstr, str, catname_abbr }; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str)); return rb_ensure(get_categories_internal, (VALUE)¶m, get_categories_ensure, (VALUE)&wstr); /* wstr will be freed in get_text_elements_ensure() */ }
Source
endif static VALUE unicode_capitalize(VALUE obj, VALUE str) { WString ustr; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&result); capitalize_internal(&ustr, &result); //sort_canonical(&result); WStr_free(&ustr); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
VALUE unicode_get_categories(VALUE obj, VALUE str) { WString wstr; get_categories_param param = { &wstr, str, catname_long }; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str)); return rb_ensure(get_categories_internal, (VALUE)¶m, get_categories_ensure, (VALUE)&wstr); /* wstr will be freed in get_text_elements_ensure() */ }
Source
static VALUE unicode_compose(VALUE obj, VALUE str) { WString ustr; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str)); sort_canonical(&ustr); WStr_alloc(&result); compose_internal(&ustr, &result); WStr_free(&ustr); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_decompose(VALUE obj, VALUE str) { WString ustr; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&result); decompose_internal(&ustr, &result); WStr_free(&ustr); sort_canonical(&result); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_decompose_compat(VALUE obj, VALUE str) { WString ustr; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&result); decompose_compat_internal(&ustr, &result); WStr_free(&ustr); sort_canonical(&result); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_decompose_safe(VALUE obj, VALUE str) { WString ustr; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&result); decompose_safe_internal(&ustr, &result); WStr_free(&ustr); sort_canonical(&result); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_downcase(VALUE obj, VALUE str) { WString ustr; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&result); downcase_internal(&ustr, &result); //sort_canonical(&result); WStr_free(&ustr); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_normalize_C(VALUE obj, VALUE str) { WString ustr1; WString ustr2; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&ustr2); decompose_internal(&ustr1, &ustr2); WStr_free(&ustr1); sort_canonical(&ustr2); WStr_alloc(&result); compose_internal(&ustr2, &result); WStr_free(&ustr2); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_normalize_safe(VALUE obj, VALUE str) { WString ustr1; WString ustr2; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&ustr2); decompose_safe_internal(&ustr1, &ustr2); WStr_free(&ustr1); sort_canonical(&ustr2); WStr_alloc(&result); compose_internal(&ustr2, &result); WStr_free(&ustr2); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_decompose(VALUE obj, VALUE str) { WString ustr; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&result); decompose_internal(&ustr, &result); WStr_free(&ustr); sort_canonical(&result); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_decompose_safe(VALUE obj, VALUE str) { WString ustr; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&result); decompose_safe_internal(&ustr, &result); WStr_free(&ustr); sort_canonical(&result); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_normalize_KC(VALUE obj, VALUE str) { WString ustr1; WString ustr2; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&ustr2); decompose_compat_internal(&ustr1, &ustr2); WStr_free(&ustr1); sort_canonical(&ustr2); WStr_alloc(&result); compose_internal(&ustr2, &result); WStr_free(&ustr2); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_decompose_compat(VALUE obj, VALUE str) { WString ustr; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&result); decompose_compat_internal(&ustr, &result); WStr_free(&ustr); sort_canonical(&result); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_normalize_C(VALUE obj, VALUE str) { WString ustr1; WString ustr2; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&ustr2); decompose_internal(&ustr1, &ustr2); WStr_free(&ustr1); sort_canonical(&ustr2); WStr_alloc(&result); compose_internal(&ustr2, &result); WStr_free(&ustr2); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_normalize_safe(VALUE obj, VALUE str) { WString ustr1; WString ustr2; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&ustr2); decompose_safe_internal(&ustr1, &ustr2); WStr_free(&ustr1); sort_canonical(&ustr2); WStr_alloc(&result); compose_internal(&ustr2, &result); WStr_free(&ustr2); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_decompose(VALUE obj, VALUE str) { WString ustr; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&result); decompose_internal(&ustr, &result); WStr_free(&ustr); sort_canonical(&result); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_decompose_safe(VALUE obj, VALUE str) { WString ustr; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&result); decompose_safe_internal(&ustr, &result); WStr_free(&ustr); sort_canonical(&result); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_normalize_KC(VALUE obj, VALUE str) { WString ustr1; WString ustr2; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&ustr2); decompose_compat_internal(&ustr1, &ustr2); WStr_free(&ustr1); sort_canonical(&ustr2); WStr_alloc(&result); compose_internal(&ustr2, &result); WStr_free(&ustr2); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_decompose_compat(VALUE obj, VALUE str) { WString ustr; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&result); decompose_compat_internal(&ustr, &result); WStr_free(&ustr); sort_canonical(&result); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
static VALUE unicode_strcmp(VALUE obj, VALUE str1, VALUE str2) { WString wstr1; WString wstr2; WString result1; WString result2; UString ustr1; UString ustr2; int ret; Check_Type(str1, T_STRING); Check_Type(str2, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str1); CONVERT_TO_UTF8(str2); #endif WStr_allocWithUTF8L(&wstr1, RSTRING_PTR(str1), RSTRING_LEN(str1)); WStr_allocWithUTF8L(&wstr2, RSTRING_PTR(str2), RSTRING_LEN(str2)); WStr_alloc(&result1); WStr_alloc(&result2); decompose_internal(&wstr1, &result1); decompose_internal(&wstr2, &result2); WStr_free(&wstr1); WStr_free(&wstr2); sort_canonical(&result1); sort_canonical(&result2); UniStr_alloc(&ustr1); UniStr_alloc(&ustr2); WStr_convertIntoUString(&result1, &ustr1); WStr_convertIntoUString(&result2, &ustr2); WStr_free(&result1); WStr_free(&result2); UniStr_addChar(&ustr1, '\0'); UniStr_addChar(&ustr2, '\0'); ret = strcmp((char*)ustr1.str, (char*)ustr2.str); UniStr_free(&ustr1); UniStr_free(&ustr2); return INT2FIX(ret); }
Source
static VALUE unicode_strcmp_compat(VALUE obj, VALUE str1, VALUE str2) { WString wstr1; WString wstr2; WString result1; WString result2; UString ustr1; UString ustr2; int ret; Check_Type(str1, T_STRING); Check_Type(str2, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str1); CONVERT_TO_UTF8(str2); #endif WStr_allocWithUTF8L(&wstr1, RSTRING_PTR(str1), RSTRING_LEN(str1)); WStr_allocWithUTF8L(&wstr2, RSTRING_PTR(str2), RSTRING_LEN(str2)); WStr_alloc(&result1); WStr_alloc(&result2); decompose_compat_internal(&wstr1, &result1); decompose_compat_internal(&wstr2, &result2); WStr_free(&wstr1); WStr_free(&wstr2); sort_canonical(&result1); sort_canonical(&result2); UniStr_alloc(&ustr1); UniStr_alloc(&ustr2); WStr_convertIntoUString(&result1, &ustr1); WStr_convertIntoUString(&result2, &ustr2); WStr_free(&result1); WStr_free(&result2); UniStr_addChar(&ustr1, '\0'); UniStr_addChar(&ustr2, '\0'); ret = strcmp((char*)ustr1.str, (char*)ustr2.str); UniStr_free(&ustr1); UniStr_free(&ustr2); return INT2FIX(ret); }
Source
VALUE unicode_get_text_elements(VALUE obj, VALUE str) { WString wstr; get_text_elements_param param = { &wstr, str }; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str)); return rb_ensure(get_text_elements_internal, (VALUE)¶m, get_text_elements_ensure, (VALUE)&wstr); /* wstr will be freed in get_text_elements_ensure() */ }
Source
static VALUE unicode_upcase(VALUE obj, VALUE str) { WString ustr; WString result; UString ret; VALUE vret; Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str)); WStr_alloc(&result); upcase_internal(&ustr, &result); //sort_canonical(&result); WStr_free(&ustr); UniStr_alloc(&ret); WStr_convertIntoUString(&result, &ret); WStr_free(&result); vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len))); UniStr_free(&ret); return vret; }
Source
VALUE unicode_wcswidth(int argc, VALUE* argv, VALUE obj) { WString wstr; int i, count; int width = 0; int cjk_p = 0; VALUE str; VALUE cjk; count = rb_scan_args(argc, argv, "11", &str, &cjk); if (count > 1) cjk_p = RTEST(cjk); Check_Type(str, T_STRING); #ifdef HAVE_RUBY_ENCODING_H CONVERT_TO_UTF8(str); #endif WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str)); for (i = 0; i <wstr.len; i++) { int c = wstr.str[i]; int cat = get_gencat(c); int eaw = get_eawidth(c); if ((c > 0 && c < 32) || (c >= 0x7f && c < 0xa0)) { /* Control Characters */ width = -1; break; } else if (c != 0x00ad && /* SOFT HYPHEN */ (cat == c_Mn || cat == c_Me || /* Non-spacing Marks */ cat == c_Cf || /* Format */ c == 0 || /* NUL */ (c >= 0x1160 && c <= 0x11ff))) /* HANGUL JUNGSEONG/JONGSEONG */ /* zero width */ ; else if (eaw == w_F || eaw == w_W || /* Fullwidth or Wide */ (c >= 0x4db6 && c <= 0x4dbf) || /* CJK Reserved */ (c >= 0x9fcd && c <= 0x9fff) || /* CJK Reserved */ (c >= 0xfa6e && c <= 0xfa6f) || /* CJK Reserved */ (c >= 0xfada && c <= 0xfaff) || /* CJK Reserved */ (c >= 0x2a6d7 && c <= 0x2a6ff) || /* CJK Reserved */ (c >= 0x2b735 && c <= 0x2b73f) || /* CJK Reserved */ (c >= 0x2b81e && c <= 0x2f7ff) || /* CJK Reserved */ (c >= 0x2fa1e && c <= 0x2fffd) || /* CJK Reserved */ (c >= 0x30000 && c <= 0x3fffd) || /* CJK Reserved */ (cjk_p && eaw == w_A)) /* East Asian Ambiguous */ width += 2; else width++; /* Halfwidth or Neutral */ } WStr_free(&wstr); return INT2FIX(width); }