module SoundX
Constants
- VERSION
Public Class Methods
encode(p1)
click to toggle source
static VALUE rb_soundx(int argc, VALUE* argv, VALUE self) { VALUE input; VALUE rbDestString; unsigned char* src; unsigned char written = 0; // Should not exceed 3, so 8 bits is plenty. size_t srclen; size_t i; // for looping the src unsigned char current; unsigned char match; /* Destination is padded with up to 3 trailing zeros for short names. */ char dest[5] = {'0', '0', '0', '0', '\0'}; rb_scan_args(argc, argv, "1", &input); Check_Type(input, T_STRING); srclen = RSTRING_LEN(input); src = (unsigned char *) StringValueCStr(input); if (src[0] > 127) { rb_raise(rb_eArgError, "non-ASCII character found"); } dest[0] = toupper(src[0]); for(i = 1; i < srclen; i++) { if (written >= 3) { break; } if (src[i] > 127) { rb_raise(rb_eArgError, "non-ASCII character found"); break; } current = tolower(src[i]); match = mapping[ current ]; if (0xFE == match) { continue; } // The 2nd character must still respect // the 'double code number' rule. if (1 == i) { if (match == mapping[tolower(src[0])]) { continue; } } // Skip if previous character is the same // e.g. Gutierrez (2nd 'r' ignored) if (src[i] == src[i-1]) { continue; } // If the surname has different letters side-by-side that have the same number // in the soundex coding guide, they should be treated as one letter. // e.g. Jackson if (mapping[current] == mapping[tolower(src[i-1])] && src[i] != src[i-1]) { continue; } // If "H" or "W" separate two consonants that have the same soundex code, // the consonant to the right of the vowel is not coded. if (i+1 < srclen && (current == 'h' || current == 'w')) { if(dest[written] == mapping[tolower(src[i+1])]) { i = i + 1; // Skip over the next consonant continue; } } // We landed on a vowel-like character, // so skip to the next char if ('0' == match) { continue; } dest[written+1] = mapping[ current ]; written++; } rbDestString = rb_str_new_cstr(dest); return rbDestString; }