module TextUtils::ValueHelper

Constants

TITLE_KEY_REGEX

if it looks like a key (only a-z lower case allowed); assume it's a key

- also allow . in keys e.g. world.quali.america, at.cup, etc.
- also allow 0-9 in keys e.g. at.2, at.3.1, etc.
- also allow leading digits e.g. 1850muenchen, 3kronen, etc.

Public Instance Methods

find_grade( value ) click to toggle source
# File lib/textutils/helper/value_helper_i.rb, line 101
def find_grade( value )  # NB: returns ary [grade,value] / two values
  grade = 4  # defaults to grade 4  e.g  *** => 1, ** => 2, * => 3, -/- => 4

  # NB: stars must end field/value or start field/value
  #  e.g.
  #  *** Anton Bauer   or
  #  Anton Bauer ***

  value = value.sub( /^\s*(\*{1,3})\s+/ ) do |_|
    if $1 == '***'
      grade = 1
    elsif $1 == '**'
      grade = 2
    elsif $1 == '*'
      grade = 3
    else
      # unknown grade; not possible, is'it?
    end
    ''  # remove * from title if found
  end

  value = value.sub( /\s+(\*{1,3})\s*$/ ) do |_|
    if $1 == '***'
      grade = 1
    elsif $1 == '**'
      grade = 2
    elsif $1 == '*'
      grade = 3
    else
      # unknown grade; not possible, is'it?
    end
    ''  # remove * from title if found
  end

  [grade,value]
end
find_key_n_title( values ) click to toggle source
# File lib/textutils/helper/value_helper_i.rb, line 24
def find_key_n_title( values )  # note: returns ary [attribs,more_values] / two values
  # todo/fix:
  ##  change title to name
  ##  change synonyms to alt_names (!!!)
  ##   => use new method e.g. find_key_n_name(s) - why?? why not??


  ## fix: add/configure logger for ActiveRecord!!!
  logger = LogKernel::Logger.root


  ### support autogenerate key from first title value
  if values[0] =~ TITLE_KEY_REGEX
    key_col         = values[0]
    title_col       = values[1]
    more_values     = values[2..-1]
  else
    key_col         = '<auto>'
    title_col       = values[0]
    more_values     = values[1..-1]
  end

  attribs = {}

  ## check title_col for grade (e.g. ***/**/*) and use returned stripped title_col if exits
  grade, title_col = find_grade( title_col )

  # NB: for now - do NOT include default grade e.g. if grade (***/**/*) not present; attrib will not be present too
  if grade == 1 || grade == 2 || grade == 3  # grade found/present
    logger.debug "   found grade #{grade} in title"
    attribs[:grade] = grade
  end

 
  ## fix/todo: add find parts ??
  #  e.g. ‹Estrella› ‹Damm› Inedit
  #    becomes =>   title: 'Estrella Damm Inedit'  and  parts: ['Estrella','Damm']



  ## title (split of optional tree hierarchy)
  ##  e.g. Leverkusen › Köln/Bonn › Nordrhein-Westfalen
  ##       Gelsenkirchen › Ruhrgebiet › Nordrhein-Westfalen
  ##       München [Munich] › Bayern  etc.

  ##  fix!!!! - trailing hierarchy get *ignored* for now!!! - fix!!
  ##    pass along in  :tree (or :hierarchy) ??


  ## note: must include leading and trailing space for now (fix!! later)
  ##   hack for avoiding conflict w/ parts; fix: read/parse parts first
  ##  todo: also allow > (as an alternative to ›)

  title_tree = title_col.split( /[ ]+[›][ ]+/ )

  ## title (split of optional synonyms)
  # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
  #      München [Munich]
  titles = NameTokenizer.new.tokenize( title_tree[0] )

  attribs[ :title ]    =  titles[0]

  ## add optional synonyms if present
  attribs[ :synonyms ] =  titles[1..-1].join('|')  if titles.size > 1

  if key_col == '<auto>'
    ## autogenerate key from first title
    key_col = TextUtils.title_to_key( titles[0] )
    logger.debug "   autogen key »#{key_col}« from title »#{titles[0]}«"
  end

  attribs[ :key ] = key_col

  [attribs, more_values]
end
is_address?( value ) click to toggle source
# File lib/textutils/helper/value_helper_ii.rb, line 41
def is_address?( value )
  # if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160
  match_result =  value =~ /\/{2}/
  # match found if 0,1,2,3 etc or no match if nil
  # note: return bool e.g. false|true  (not 0,1,2,3 etc. and nil)
  match_result != nil
end
is_taglist?( value ) click to toggle source
# File lib/textutils/helper/value_helper_ii.rb, line 49
def is_taglist?( value )
  ### note: cannot start w/ number must be letter for now
  ##  -- in the future allow free standing years (e.g. 1980 etc.?? why? why not?)
  ##  e.g. not allowed  14 ha or 5_000 hl etc.
  match_result =  value =~ /^([a-z][a-z0-9\|_ ]*[a-z0-9]|[a-z])$/
  # match found if 0,1,2,3 etc or no match if nil
  # note: return bool e.g. false|true  (not 0,1,2,3 etc. and nil)
  match_result != nil
end
is_website?( value ) click to toggle source
# File lib/textutils/helper/value_helper_ii.rb, line 60
def is_website?( value )
  # check for url/internet address e.g. www.ottakringer.at
  #  - must start w/  www. or
  #  - must end w/   .com
  #
  # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
  match_result =  value =~ /^www\.|\.com$/
  # match found if 0,1,2,3 etc or no match if nil
  # note: return bool e.g. false|true  (not 0,1,2,3 etc. and nil)
  match_result != nil
end
is_year?( value ) click to toggle source
# File lib/textutils/helper/value_helper_ii.rb, line 22
def is_year?( value )
  # founded/established year e.g. 1776
  match_result =  value =~ /^[0-9]{4}$/
  # match found if 0,1,2,3 etc or no match if nil
  # note: return bool e.g. false|true  (not 0,1,2,3 etc. and nil)
  match_result != nil
end
match_abv( value ) { |$to_f| ... } click to toggle source
# File lib/textutils/helper/value_helper_iii_numbers.rb, line 38
def match_abv( value )  # alcohol by volume (abv) e.g. 5.2%
  if value =~ /^<?\s*(\d+(?:\.\d+)?)\s*%$/
    # nb: allow leading < e.g. <0.5%
    yield( $1.to_f )  # convert to decimal? how? use float?
    true # bingo - match found
  else
    false # no match found
  end
end
match_brewery( value ) { |brewery| ... } click to toggle source

fix!!!!: move to beerdb ??? why? why not?? - yes, move to beerdb-models

# File lib/textutils/helper/value_helper_ii.rb, line 10
def match_brewery( value )
  if value =~ /^by:/   ## by:  -brewed by/brewery
    brewery_key = value[3..-1]  ## cut off by: prefix
    brewery = BeerDb::Model::Brewery.find_by_key!( brewery_key )
    yield( brewery )
    true # bingo - match found
  else
    false # no match found
  end
end
match_hl( value ) { |$gsub( /[ _]/, '' ).to_i| ... } click to toggle source
# File lib/textutils/helper/value_helper_iii_numbers.rb, line 68
def match_hl( value )  # hector liters (hl) 1hl = 100l
  if value =~ /^(?:([0-9][0-9_ ]+[0-9]|[0-9]{1,2})\s*hl)$/  # e.g. 20_000 hl or 50hl etc.
    yield( $1.gsub( /[ _]/, '' ).to_i )
    true # bingo - match found
  else
    false # no match found
  end
end
match_kcal( value ) { |$to_f| ... } click to toggle source
# File lib/textutils/helper/value_helper_iii_numbers.rb, line 58
def match_kcal( value )
  if value =~ /^(\d+(?:\.\d+)?)\s*kcal(?:\/100ml)?$/  # kcal
    # nb: allow 44.4 kcal/100ml or 44.4 kcal or 44.4kcal
    yield( $1.to_f )  # convert to decimal? how? use float?
    true # bingo - match found
  else
    false # no match found
  end
end
match_km_squared( value ) { |num| ... } click to toggle source

numbers w/ units

# File lib/textutils/helper/value_helper_iii_numbers.rb, line 27
def match_km_squared( value )
  ## allow numbers like 453 km² or 45_000 km2
  if value =~ /^([0-9][0-9 _]+[0-9]|[0-9]{1,2})(?:\s*(?:km2|km²)\s*)$/
    num = value.gsub( 'km2', '').gsub( 'km²', '' ).gsub(/[ _]/, '').to_i
    yield( num )
    true # bingo - match found
  else
    false # no match found
  end
end
match_number( value ) { |num| ... } click to toggle source
# File lib/textutils/helper/value_helper_iii_numbers.rb, line 11
def match_number( value )
  ## numeric
  ##   note: can use any _ or spaces inside digits e.g. 1_000_000 or 1 000 000)
  if value =~ /^([0-9][0-9 _]+[0-9])|([0-9]{1,2})$/
    num = value.gsub(/[ _]/, '').to_i
    yield( num )
    true # bingo - match found
  else
    false # no match found
  end
end
match_og( value ) { |$to_f| ... } click to toggle source
# File lib/textutils/helper/value_helper_iii_numbers.rb, line 48
def match_og( value ) # plato (stammwuerze/gravity?) e.g. 11.2°
  if value =~ /^(\d+(?:\.\d+)?)°$/
    # nb: no whitespace allowed between ° and number e.g. 11.2°
    yield( $1.to_f )  # convert to decimal? how? use float?
    true # bingo - match found
  else
    false # no match found
  end
end
match_website( value ) { |value| ... } click to toggle source
# File lib/textutils/helper/value_helper_ii.rb, line 72
def match_website( value )
  if is_website?( value )   # check for url/internet address e.g. www.ottakringer.at
    # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
    yield( value )
    true # bingo - match found
  else
    false # no match found
  end
end
match_year( value ) { |to_i| ... } click to toggle source
# File lib/textutils/helper/value_helper_ii.rb, line 31
def match_year( value )
  if is_year?( value )  # founded/established year e.g. 1776
    yield( value.to_i )
    true # bingo - match found
  else
    false # no match found
  end
end