module TextUtils::TitleTable

Public Instance Methods

build_title_table_for( records ) click to toggle source
fix: turn it into a class w/ methods

e.g t =TitleMapper.new( records, name ) # e.g. name='team'

 t.map!( line )
 t.find_key!( line )
etc.

 see textutils/title_mapper.rb

  deprecate code here!!! - move to new TitleMapper class
# File lib/textutils/title.rb, line 32
def build_title_table_for( records )
    LogUtils::Logger.root.info "  build_title_table_for - deprecated API - use TitleMapper.new instead"

    ## build known tracks table w/ synonyms e.g.
    #
    # [[ 'wolfsbrug', [ 'VfL Wolfsburg' ]],
    #  [ 'augsburg',  [ 'FC Augsburg', 'Augi2', 'Augi3' ]],
    #  [ 'stuttgart', [ 'VfB Stuttgart' ]] ]

    known_titles = []

    records.each_with_index do |rec,index|

      title_candidates = []
      title_candidates << rec.title

      title_candidates += rec.synonyms.split('|') if rec.synonyms.present?


      ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit)
      #  make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan

      titles = []
      title_candidates.each do |t|
        titles << t
        if t =~ /\(.+\)/
          extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles
          extra_title.strip!   # strip leading n trailing withspaces too!
          titles << extra_title
        end
      end


      ## NB: sort here by length (largest goes first - best match)
      #  exclude code and key (key should always go last)
      titles = titles.sort { |left,right| right.length <=> left.length }
      
      ## escape for regex plus allow subs for special chars/accents
      titles = titles.map { |title| TextUtils.title_esc_regex( title )  }

      ## NB: only include code field - if defined
      titles << rec.code          if rec.respond_to?(:code) && rec.code.present?

      known_titles << [ rec.key, titles ]

      ### fix: use plain logger
      LogUtils::Logger.root.debug "  #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"
    end

    known_titles
end
find_key_for!( name, line ) click to toggle source
# File lib/textutils/title.rb, line 86
def find_key_for!( name, line )
  LogUtils::Logger.root.info "  find_key_for! #{name} - deprecated API - use TitleMapper.find_key! instead"

  regex = /@@oo([^@]+?)oo@@/     # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])

  upcase_name   = name.upcase
  downcase_name = name.downcase

  if line =~ regex
    value = "#{$1}"
    ### fix: use plain logger
    LogUtils::Logger.root.debug "   #{downcase_name}: >#{value}<"
      
    line.sub!( regex, "[#{upcase_name}]" )

    return $1
  else
    return nil
  end
end
find_keys_for!( name, line ) click to toggle source
# File lib/textutils/title.rb, line 108
def find_keys_for!( name, line )  # NB: keys (plural!) - will return array
  LogUtils::Logger.root.info "  find_keys_for! #{name} - deprecated API - use TitleMapper.find_keys! instead"

  counter = 1
  keys = []

  downcase_name = name.downcase

  key = find_key_for!( "#{downcase_name}#{counter}", line )
  while key.present?
    keys << key
    counter += 1
    key = find_key_for!( "#{downcase_name}#{counter}", line )
  end

  keys
end
map_title_worker_for!( name, line, key, values ) click to toggle source
# File lib/textutils/title.rb, line 138
def map_title_worker_for!( name, line, key, values )

  downcase_name = name.downcase

  values.each do |value|
    ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
    ## (thus add it, allows match for Benfica Lis.  for example - note . at the end)

    ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
    regex = /\b#{value}(\b| |\t|$)/   # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker)
    if line =~ regex
      ### fix: use plain logger
      LogUtils::Logger.root.debug "     match for #{downcase_name}  >#{key}< >#{value}<"
      # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
      line.sub!( regex, "@@oo#{key}oo@@ " )    # NB: add one space char at end
      return true    # break out after first match (do NOT continue)
    end
  end
  return false
end
map_titles_for!( name, line, title_table ) click to toggle source
# File lib/textutils/title.rb, line 127
def map_titles_for!( name, line, title_table )
  LogUtils::Logger.root.info "  map_titles_for! #{name} - deprecated API - use TitleMapper.map_titles! instead"

  title_table.each do |rec|
    key    = rec[0]
    values = rec[1]
    map_title_worker_for!( name, line, key, values )
  end
end