module TextUtils::TitleTable
Public Instance Methods
build_title_table_for( records )
click to toggle source
fix: turn it into a class w/ methods
e.g t =TitleMapper.new( records, name ) # e.g. name='team'
t.map!( line ) t.find_key!( line ) etc. see textutils/title_mapper.rb deprecate code here!!! - move to new TitleMapper class
# File lib/textutils/title.rb, line 32 def build_title_table_for( records ) LogUtils::Logger.root.info " build_title_table_for - deprecated API - use TitleMapper.new instead" ## build known tracks table w/ synonyms e.g. # # [[ 'wolfsbrug', [ 'VfL Wolfsburg' ]], # [ 'augsburg', [ 'FC Augsburg', 'Augi2', 'Augi3' ]], # [ 'stuttgart', [ 'VfB Stuttgart' ]] ] known_titles = [] records.each_with_index do |rec,index| title_candidates = [] title_candidates << rec.title title_candidates += rec.synonyms.split('|') if rec.synonyms.present? ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit) # make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan titles = [] title_candidates.each do |t| titles << t if t =~ /\(.+\)/ extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles extra_title.strip! # strip leading n trailing withspaces too! titles << extra_title end end ## NB: sort here by length (largest goes first - best match) # exclude code and key (key should always go last) titles = titles.sort { |left,right| right.length <=> left.length } ## escape for regex plus allow subs for special chars/accents titles = titles.map { |title| TextUtils.title_esc_regex( title ) } ## NB: only include code field - if defined titles << rec.code if rec.respond_to?(:code) && rec.code.present? known_titles << [ rec.key, titles ] ### fix: use plain logger LogUtils::Logger.root.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<" end known_titles end
find_key_for!( name, line )
click to toggle source
# File lib/textutils/title.rb, line 86 def find_key_for!( name, line ) LogUtils::Logger.root.info " find_key_for! #{name} - deprecated API - use TitleMapper.find_key! instead" regex = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@]) upcase_name = name.upcase downcase_name = name.downcase if line =~ regex value = "#{$1}" ### fix: use plain logger LogUtils::Logger.root.debug " #{downcase_name}: >#{value}<" line.sub!( regex, "[#{upcase_name}]" ) return $1 else return nil end end
find_keys_for!( name, line )
click to toggle source
# File lib/textutils/title.rb, line 108 def find_keys_for!( name, line ) # NB: keys (plural!) - will return array LogUtils::Logger.root.info " find_keys_for! #{name} - deprecated API - use TitleMapper.find_keys! instead" counter = 1 keys = [] downcase_name = name.downcase key = find_key_for!( "#{downcase_name}#{counter}", line ) while key.present? keys << key counter += 1 key = find_key_for!( "#{downcase_name}#{counter}", line ) end keys end
map_title_worker_for!( name, line, key, values )
click to toggle source
# File lib/textutils/title.rb, line 138 def map_title_worker_for!( name, line, key, values ) downcase_name = name.downcase values.each do |value| ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9) ## (thus add it, allows match for Benfica Lis. for example - note . at the end) ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$ regex = /\b#{value}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker) if line =~ regex ### fix: use plain logger LogUtils::Logger.root.debug " match for #{downcase_name} >#{key}< >#{value}<" # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc. line.sub!( regex, "@@oo#{key}oo@@ " ) # NB: add one space char at end return true # break out after first match (do NOT continue) end end return false end
map_titles_for!( name, line, title_table )
click to toggle source
# File lib/textutils/title.rb, line 127 def map_titles_for!( name, line, title_table ) LogUtils::Logger.root.info " map_titles_for! #{name} - deprecated API - use TitleMapper.map_titles! instead" title_table.each do |rec| key = rec[0] values = rec[1] map_title_worker_for!( name, line, key, values ) end end