module TextUtils::StringFilter

Constants

ASCIIFY_MAPPINGS

todo: add unicode codepoint name ???

Public Instance Methods

asciify( content, options={} ) click to toggle source
# File lib/textutils/filter/string_filter.rb, line 132
def asciify( content, options={} )
  buf = ''
  content.each_char do |c|
    if ASCIIFY_MAPPINGS.has_key?( c )
      buf << ASCIIFY_MAPPINGS[ c ]
    else
      buf << c    # just add as is (no mapping)
    end
  end
  buf
end
slugify( content, options={} ) click to toggle source
# File lib/textutils/filter/string_filter.rb, line 144
def slugify( content, options={} )
  
  ## NOTE: for now we do NOT strip non-word characters!!!!
  ##   if it is an accented char, add it to asciify first!!!

  ## converts to lowercase,
  ##  removes non-word characters (alphanumerics and underscores)
  ##  and converts spaces to hyphens.
  ##  Also strips leading and trailing whitespace.

  # 1) asciify and downcase
  content = asciify( content ).downcase

  # 2) replace special chars w/space e.g $&%?!§#=*+._/()[]{}
  ##  --  check in [] do we need to espcae / () [] {}
  content = content.gsub( /[$&%?!§#=*+._\/\(\)\[\]\{\}]/, ' ' )  ## -- replace w/ dash (-)
  content = content.gsub( /["']/, '' )  ## -- remove (use replace too? why? why not? add others???

  # 3) strip leading and trailing spaces; squeeze spaces (e.g. more than one into one space)
  content = content.strip
  content = content.gsub( / {2,}/, ' ' )

  # 4) replace remaining (inner) spaces ( ) with dash (-)
  content = content.gsub( ' ', '-' )
  content
end