class SynonymFinder::GroupOrganizer
Public Class Methods
new(synonym_finder)
click to toggle source
# File lib/synonym-finder/group_organizer.rb, line 4 def initialize(synonym_finder) @synonym_finder = synonym_finder @db = @synonym_finder.db @groups = {} end
Public Instance Methods
organize()
click to toggle source
Finds duplication groups for a name. A name can be one or more duplication groups: chresonym, lexical variant, homotypic, alt placement
# File lib/synonym-finder/group_organizer.rb, line 11 def organize SynonymFinder.logger_write(@synonym_finder.object_id, "Grouping results") organize_matches #organize_partial_matches get_output end
Private Instance Methods
add_to_group(key, value)
click to toggle source
# File lib/synonym-finder/group_organizer.rb, line 80 def add_to_group(key, value) gr1 = get_group(key[0], value[:type]) gr2 = get_group(key[1], value[:type]) name_id1, name_id2, group_id = gr1 ? [key[1], key[0], gr1] : [key[0], key[1], gr2] update_score(name_id2, value) score = get_score(value) @groups[name_id1] = {} unless @groups.has_key?(name_id1) @groups[name_id1][value[:type]] = group_id @db.execute("insert into names_groups (name_id, group_id, score_max, score_sum, score_num) values (?, ?, ?, ?, 1)", [name_id1, group_id, score, score]) end
create_group(key, value)
click to toggle source
# File lib/synonym-finder/group_organizer.rb, line 65 def create_group(key, value) @db.execute("insert into groups (id, type) values (?, ?)", [@last_id, value[:type].to_s]) key.each {|i| @groups[i] = {} unless @groups.has_key?(i) } score = get_score(value) @groups[key[0]][value[:type]] = @groups[key[1]][value[:type]] = @last_id @db.execute("insert into names_groups (name_id, group_id, score_max, score_sum, score_num) values (?, ?, ?, ?, 1)", [key[0], @last_id, score, score]) @db.execute("insert into names_groups (name_id, group_id, score_max, score_sum, score_num) values (?, ?, ?, ?, 1)", [key[1], @last_id, score, score]) @last_id += 1 end
get_group(name_id, type)
click to toggle source
# File lib/synonym-finder/group_organizer.rb, line 60 def get_group(name_id, type) return nil unless @groups[name_id] @groups[name_id][type] end
get_output()
click to toggle source
# File lib/synonym-finder/group_organizer.rb, line 103 def get_output data = @db.execute("select x.group_id, g.type, ng.name_id from (select group_id from names_groups group by group_id order by count(*), group_id) x join names_groups ng on x.group_id = ng.group_id join names n on n.id = ng.name_id join groups g on g.id = ng.group_id") group = 0 res = [] current_group = nil data.each do |group_id, type, name_id| if group_id != group res << current_group if current_group group = group_id current_group = { :type => type, :name_ids => [name_id] } else current_group[:name_ids] << name_id end end res end
get_score(value)
click to toggle source
# File lib/synonym-finder/group_organizer.rb, line 97 def get_score(value) return 100 if value[:type] == :chresonym return 10 if value[:alt_placement] && value[:total_length] > 8 score = value[:auth_match] end
organize_matches()
click to toggle source
# File lib/synonym-finder/group_organizer.rb, line 20 def organize_matches @last_id = 1 count = 0 @synonym_finder.matches.each do |key, value| count += 1 SynonymFinder.logger_write(@synonym_finder.object_id, "Grouping match %s" % count) if count % 10000 == 0 gr1 = get_group(key[0], value[:type]) gr2 = get_group(key[1], value[:type]) if gr1 && gr2 update_group(gr1, gr2) if gr1 != gr2 key.each { |name_id| update_score(name_id, value) } elsif !gr1 && !gr2 create_group(key, value) else add_to_group(key, value) end end end
organize_partial_matches()
click to toggle source
# File lib/synonym-finder/group_organizer.rb, line 39 def organize_partial_matches added = {} count = 0 @synonym_finder.part_matches.each do |key, value| count += 1 SynonymFinder.logger_write(@synonym_finder.object_id, "Adding partial matches %s" % count) if count % 10000 == 0 gr1 = get_group(key[0], value[:type]) gr2 = get_group(key[1], value[:type]) if gr1 || gr2 group_id, name_id, name_id_db = gr1 ? [gr1, key[1], key[0]] : [gr2, key[0], key[1]] #name without authorship unless added[name_id] && added[name_id][name_id_db] score = get_score(value) @db.execute("insert into names_groups (name_id, group_id, score_max, score_sum, score_num) values (?, ?, ?, ?, 1)", [name_id, group_id, score, score]) added[name_id] = { name_id_db => 1 } end else create_group(key, value) end end end
update_group(gr1, gr2)
click to toggle source
# File lib/synonym-finder/group_organizer.rb, line 75 def update_group(gr1, gr2) @db.execute("update names_groups set group_id = ? where group_id = ?", [gr1, gr2]) @db.execute("delete from groups where id = ?", gr2) end
update_score(name_id, value)
click to toggle source
# File lib/synonym-finder/group_organizer.rb, line 91 def update_score(name_id, value) score = get_score(value) group_id = get_group(name_id, value[:type]) @db.execute("update names_groups set score_max = max(score_max, ?), score_sum = score_sum + ?, score_num = score_num + 1 where name_id = ? and group_id = ?", [score, score, name_id, group_id]) end