module TextUtils::UnicodeHelper
Constants
- U_HYPHEN
NB:
U_HYPHEN_MINUS is standard ascii hyphen/minus e.g. - see en.wikipedia.org/wiki/Dash
- U_MDASH
- U_MINUS
- U_NDASH
- U_NON_BREAKING_HYPHEN
Public Instance Methods
convert_unicode_dashes_to_plain_ascii( text, opts = {} )
click to toggle source
# File lib/textutils/helper/unicode_helper.rb, line 18 def convert_unicode_dashes_to_plain_ascii( text, opts = {} ) text = text.gsub( /(#{U_HYPHEN}|#{U_NON_BREAKING_HYPHEN}|#{U_MINUS}|#{U_NDASH}|#{U_MDASH})/ ) do |_| # puts "found U+#{'%04X' % $1.ord} (#{$1})" msg = '' if $1 == U_HYPHEN msg << "found hyhpen U+2010 (#{$1})" elsif $1 == U_NON_BREAKING_HYPHEN msg << "found non_breaking_hyhpen U+2011 (#{$1})" elsif $1 == U_MINUS msg << "found minus U+2212 (#{$1})" elsif $1 == U_NDASH msg << "found ndash U+2013 (#{$1})" elsif $1 == U_MDASH msg << "found mdash U+2014 (#{$1})" else msg << "found unknown unicode dash U+#{'%04X' % $1.ord} (#{$1})" end msg << " in file >#{opts[:path]}<" if opts[:path] msg << "; converting to plain ascii hyphen_minus (-)" puts "*** warning: #{msg}" '-' end text end