class TextAlignment::MixedAlignment
Attributes
common_elements[R]
mapped_elements[R]
position_map_begin[R]
position_map_end[R]
sdiff[R]
similarity[R]
str1_match_final[R]
str1_match_initial[R]
str2_match_final[R]
str2_match_initial[R]
Public Class Methods
new(_str1, _str2, _mappings = nil)
click to toggle source
# File lib/text_alignment/mixed_alignment.rb, line 20 def initialize(_str1, _str2, _mappings = nil) raise ArgumentError, "nil string" if _str1.nil? || _str2.nil? mappings ||= TextAlignment::CHAR_MAPPING str1 = _str1.dup str2 = _str2.dup _compute_mixed_alignment(str1, str2, mappings) end
Public Instance Methods
transform_a_span(span)
click to toggle source
# File lib/text_alignment/mixed_alignment.rb, line 38 def transform_a_span(span) {begin: @position_map_begin[span[:begin]], end: @position_map_end[span[:end]]} end
transform_begin_position(begin_position)
click to toggle source
# File lib/text_alignment/mixed_alignment.rb, line 30 def transform_begin_position(begin_position) @position_map_begin[begin_position] end
transform_denotations!(denotations)
click to toggle source
# File lib/text_alignment/mixed_alignment.rb, line 46 def transform_denotations!(denotations) denotations.map!{|d| d.begin = @position_map_begin[d.begin]; d.end = @position_map_end[d.end]; d} unless denotations.nil? end
transform_end_position(end_position)
click to toggle source
# File lib/text_alignment/mixed_alignment.rb, line 34 def transform_end_position(end_position) @position_map_end[end_position] end
transform_hdenotations(hdenotations)
click to toggle source
# File lib/text_alignment/mixed_alignment.rb, line 50 def transform_hdenotations(hdenotations) return nil if hdenotations.nil? hdenotations.collect{|d| d.dup.merge({span:transform_a_span(d[:span])})} end
transform_spans(spans)
click to toggle source
# File lib/text_alignment/mixed_alignment.rb, line 42 def transform_spans(spans) spans.map{|span| transform_a_span(span)} end
Private Instance Methods
_compute_mixed_alignment(str1, str2, mappings = [])
click to toggle source
# File lib/text_alignment/mixed_alignment.rb, line 57 def _compute_mixed_alignment(str1, str2, mappings = []) lcsmin = TextAlignment::LCSMin.new(str1, str2) lcs = lcsmin.lcs @sdiff = lcsmin.sdiff if @sdiff.nil? @similarity = 0 return end cmp = TextAlignment::LCSComparison.new(str1, str2, lcs, @sdiff) @similarity = compute_similarity(str1, str2, @sdiff) @str1_match_initial = cmp.str1_match_initial @str1_match_final = cmp.str1_match_final @str2_match_initial = cmp.str2_match_initial @str2_match_final = cmp.str2_match_final posmap_begin, posmap_end = {}, {} @common_elements, @mapped_elements = [], [] addition, deletion = [], [] @sdiff.each do |h| case h.action when '=' p1, p2 = h.old_position, h.new_position @common_elements << [str1[p1], str2[p2]] posmap_begin[p1], posmap_end[p1] = p2, p2 if !addition.empty? && deletion.empty? posmap_end[p1] = p2 - addition.length unless p1 == 0 elsif addition.empty? && !deletion.empty? deletion.each{|p| posmap_begin[p], posmap_end[p] = p2, p2} elsif !addition.empty? && !deletion.empty? if addition.length > 1 || deletion.length > 1 galign = TextAlignment::GLCSAlignment.new(str1[deletion[0] .. deletion[-1]], str2[addition[0] .. addition[-1]], mappings) galign.position_map_begin.each {|k, v| posmap_begin[k + deletion[0]] = v.nil? ? nil : v + addition[0]} galign.position_map_end.each {|k, v| posmap_end[k + deletion[0]] = v.nil? ? nil : v + addition[0]} posmap_begin[p1], posmap_end[p1] = p2, p2 @common_elements += galign.common_elements @mapped_elements += galign.mapped_elements else posmap_begin[deletion[0]], posmap_end[deletion[0]] = addition[0], addition[0] deletion[1..-1].each{|p| posmap_begin[p], posmap_end[p] = nil, nil} @mapped_elements << [str1[deletion[0], deletion.length], str2[addition[0], addition.length]] end end addition.clear; deletion.clear when '!' deletion << h.old_position addition << h.new_position when '-' deletion << h.old_position when '+' addition << h.new_position end end p1, p2 = str1.length, str2.length posmap_begin[p1], posmap_end[p1] = p2, p2 if !addition.empty? && deletion.empty? posmap_end[p1] = p2 - addition.length unless p1 == 0 elsif addition.empty? && !deletion.empty? deletion.each{|p| posmap_begin[p], posmap_end[p] = p2, p2} elsif !addition.empty? && !deletion.empty? if addition.length > 1 && deletion.length > 1 galign = TextAlignment::GLCSAlignment.new(str1[deletion[0] .. deletion[-1]], str2[addition[0] .. addition[-1]], mappings) galign.position_map_begin.each {|k, v| posmap_begin[k + deletion[0]] = v.nil? ? nil : v + addition[0]} galign.position_map_end.each {|k, v| posmap_end[k + deletion[0]] = v.nil? ? nil : v + addition[0]} posmap_begin[p1], posmap_end[p1] = p2, p2 @common_elements += galign.common_elements @mapped_elements += galign.mapped_elements else posmap_begin[deletion[0]], posmap_end[deletion[0]] = addition[0], addition[0] deletion[1..-1].each{|p| posmap_begin[p], posmap_end[p] = nil, nil} @mapped_elements << [str1[deletion[0], deletion.length], str2[addition[0], addition.length]] end end @position_map_begin = posmap_begin.sort.to_h @position_map_end = posmap_end.sort.to_h end
compute_similarity(s1, s2, sdiff)
click to toggle source
# File lib/text_alignment/mixed_alignment.rb, line 144 def compute_similarity(s1, s2, sdiff) return 0 if sdiff.nil? # recoverbility count_nws = sdiff.count{|d| d.old_element =~ /\S/} count_nws_match = sdiff.count{|d| d.action == '=' && d.old_element =~ /\S/} coverage = count_nws_match.to_f / count_nws # fragmentation rate frag_str = sdiff.collect do |d| case d.action when '=' '=' when '-' '' when '+' (d.new_element =~ /\S/) ? '+' : '' else '' end end.join.sub(/^[^=]++/, '').sub(/[^=]+$/, '') count_frag = frag_str.scan(/=+/).count rate_frag = 1.0 / count_frag similarity = coverage * rate_frag end