class TextAlignment::CharMapping
Attributes
index_enmap[R]
mapped_text[R]
Public Class Methods
new(_text, char_mapping = nil, to_ignore_whitespaces = false)
click to toggle source
# File lib/text_alignment/char_mapping.rb, line 85 def initialize(_text, char_mapping = nil, to_ignore_whitespaces = false) if to_ignore_whitespaces @method_get_positions_squeeze_ws = method(:get_positions_squeeze_ws_0) @method_squeeze_ws = method(:squeeze_ws_0!) else @method_get_positions_squeeze_ws = method(:get_positions_squeeze_ws_1) @method_squeeze_ws = method(:squeeze_ws_1!) end char_mapping ||= TextAlignment::CHAR_MAPPING.sort{|a, b| b[1].length <=> a[1].length} @mapped_text, offset_mapping = enmap_text(_text, char_mapping) @index_enmap = offset_mapping.to_h @index_demap = offset_mapping.map{|m| m.reverse}.to_h end
Public Instance Methods
demap_position(position)
click to toggle source
# File lib/text_alignment/char_mapping.rb, line 104 def demap_position(position) @index_demap[position] end
enmap_denotations(denotations)
click to toggle source
# File lib/text_alignment/char_mapping.rb, line 108 def enmap_denotations(denotations) return nil if denotations.nil? denotations.map do |d| d.dup.merge(span:{begin:enmap_position(d[:span][:begin]), end:enmap_position(d[:span][:end])}) end end
enmap_position(position)
click to toggle source
# File lib/text_alignment/char_mapping.rb, line 100 def enmap_position(position) @index_enmap[position] end
Private Instance Methods
enmap_text(_text, char_mapping, no_ws = false)
click to toggle source
# File lib/text_alignment/char_mapping.rb, line 118 def enmap_text(_text, char_mapping, no_ws = false) text = _text.dup # To execute the single letter mapping replacement char_mapping.each do |one, long| text.gsub!(one, long) if long.length == 1 end # To get the replacement positions, (position, old_length, new_length), for char mappings rpositions = [] char_mapping.each do |one, long| next if long.length == 1 init_next = 0 while loc = text.index(long, init_next) rpositions << [loc, long.length, 1] init_next = loc + long.length end # a workaround to avoid messing-up due to embedding text.gsub!(long, one * long.length) end # To get the replacement positions, (position, old_length, new_length), for consecutive whitespaces rpositions += @method_get_positions_squeeze_ws.call(text) rpositions.sort!{|a, b| a[0] <=> b[0]} # To get the offset_mapping before and after replacement offset_mapping = begin i, j = 0, 0 offset_mappings = rpositions.map do |loc, old_len, new_len| pre_len = loc - i m = (0 .. pre_len).map{|c| [i + c, j + c]} i = loc + old_len j += pre_len + new_len m end pre_len = text.length - i offset_mappings << (0 .. pre_len).map{|c| [i + c, j + c]} offset_mappings.reduce(:+) end # To execute the long letter mapping char_mapping.each do |one, long| text.gsub!(one * long.length, one) if long.length > 1 end # To replace multi whitespace sequences to a space @method_squeeze_ws.call(text) [text, offset_mapping] end
get_positions_squeeze_ws_0(text)
click to toggle source
To get squeeze positions of whitespaces to zero
# File lib/text_alignment/char_mapping.rb, line 187 def get_positions_squeeze_ws_0(text) text.enum_for(:scan, /\s+/).map{[b = $~.begin(0), $~.end(0) - b, 0]} end
get_positions_squeeze_ws_1(text)
click to toggle source
To get squeeze positions of whitespaces to one
# File lib/text_alignment/char_mapping.rb, line 176 def get_positions_squeeze_ws_1(text) rpositions = [] text.scan(/\s{2,}/) do |s| loc = $~.begin(0) len = $~.end(0) - loc rpositions << [loc, len, 1] end rpositions end
squeeze_ws_0!(text)
click to toggle source
# File lib/text_alignment/char_mapping.rb, line 195 def squeeze_ws_0!(text) text.gsub!(/\s+/, '') end
squeeze_ws_1!(text)
click to toggle source
# File lib/text_alignment/char_mapping.rb, line 191 def squeeze_ws_1!(text) text.gsub!(/\s{2,}/, ' ') end