class TextAlignment::CharMapping

Attributes

index_enmap[R]
mapped_text[R]

Public Class Methods

new(_text, char_mapping = nil, to_ignore_whitespaces = false) click to toggle source
# File lib/text_alignment/char_mapping.rb, line 85
def initialize(_text, char_mapping = nil, to_ignore_whitespaces = false)
        if to_ignore_whitespaces
                @method_get_positions_squeeze_ws = method(:get_positions_squeeze_ws_0)
                @method_squeeze_ws = method(:squeeze_ws_0!)
        else
                @method_get_positions_squeeze_ws = method(:get_positions_squeeze_ws_1)
                @method_squeeze_ws = method(:squeeze_ws_1!)
        end

        char_mapping ||= TextAlignment::CHAR_MAPPING.sort{|a, b| b[1].length <=> a[1].length}
        @mapped_text, offset_mapping = enmap_text(_text, char_mapping)
        @index_enmap = offset_mapping.to_h
        @index_demap = offset_mapping.map{|m| m.reverse}.to_h
end

Public Instance Methods

demap_position(position) click to toggle source
# File lib/text_alignment/char_mapping.rb, line 104
def demap_position(position)
        @index_demap[position]
end
enmap_denotations(denotations) click to toggle source
# File lib/text_alignment/char_mapping.rb, line 108
def enmap_denotations(denotations)
        return nil if denotations.nil?

        denotations.map do |d|
                d.dup.merge(span:{begin:enmap_position(d[:span][:begin]), end:enmap_position(d[:span][:end])})
        end
end
enmap_position(position) click to toggle source
# File lib/text_alignment/char_mapping.rb, line 100
def enmap_position(position)
        @index_enmap[position]
end

Private Instance Methods

enmap_text(_text, char_mapping, no_ws = false) click to toggle source
# File lib/text_alignment/char_mapping.rb, line 118
def enmap_text(_text, char_mapping, no_ws = false)
        text = _text.dup

        # To execute the single letter mapping replacement
        char_mapping.each do |one, long|
                text.gsub!(one, long) if long.length == 1
        end

        # To get the replacement positions, (position, old_length, new_length), for char mappings
        rpositions = []
        char_mapping.each do |one, long|
                next if long.length == 1

                init_next = 0
                while loc = text.index(long, init_next)
                        rpositions << [loc, long.length, 1]
                        init_next = loc + long.length
                end

                # a workaround to avoid messing-up due to embedding
                text.gsub!(long, one * long.length)
        end

        # To get the replacement positions, (position, old_length, new_length), for consecutive whitespaces
        rpositions += @method_get_positions_squeeze_ws.call(text)
        rpositions.sort!{|a, b| a[0] <=> b[0]}

        # To get the offset_mapping before and after replacement
        offset_mapping = begin
                i, j = 0, 0

                offset_mappings = rpositions.map do |loc, old_len, new_len|
                        pre_len = loc - i
                        m = (0 .. pre_len).map{|c| [i + c, j + c]}
                        i = loc + old_len
                        j += pre_len + new_len

                        m
                end

                pre_len = text.length - i
                offset_mappings << (0 .. pre_len).map{|c| [i + c, j + c]}

                offset_mappings.reduce(:+)
        end

        # To execute the long letter mapping
        char_mapping.each do |one, long|
                text.gsub!(one * long.length, one) if long.length > 1
        end

        # To replace multi whitespace sequences to a space
        @method_squeeze_ws.call(text)

        [text, offset_mapping]
end
get_positions_squeeze_ws_0(text) click to toggle source

To get squeeze positions of whitespaces to zero

# File lib/text_alignment/char_mapping.rb, line 187
def get_positions_squeeze_ws_0(text)
        text.enum_for(:scan, /\s+/).map{[b = $~.begin(0), $~.end(0) - b, 0]}
end
get_positions_squeeze_ws_1(text) click to toggle source

To get squeeze positions of whitespaces to one

# File lib/text_alignment/char_mapping.rb, line 176
def get_positions_squeeze_ws_1(text)
        rpositions = []
        text.scan(/\s{2,}/) do |s|
                loc = $~.begin(0)
                len = $~.end(0) - loc
                rpositions << [loc, len, 1]
        end
        rpositions
end
squeeze_ws_0!(text) click to toggle source
# File lib/text_alignment/char_mapping.rb, line 195
def squeeze_ws_0!(text)
        text.gsub!(/\s+/, '')
end
squeeze_ws_1!(text) click to toggle source
# File lib/text_alignment/char_mapping.rb, line 191
def squeeze_ws_1!(text)
        text.gsub!(/\s{2,}/, ' ')
end