module TextUtils::ValueHelper
Constants
- TITLE_KEY_REGEX
if it looks like a key (only a-z lower case allowed); assume it's a key
- also allow . in keys e.g. world.quali.america, at.cup, etc. - also allow 0-9 in keys e.g. at.2, at.3.1, etc. - also allow leading digits e.g. 1850muenchen, 3kronen, etc.
Public Instance Methods
find_grade( value )
click to toggle source
# File lib/textutils/helper/value_helper_i.rb, line 101 def find_grade( value ) # NB: returns ary [grade,value] / two values grade = 4 # defaults to grade 4 e.g *** => 1, ** => 2, * => 3, -/- => 4 # NB: stars must end field/value or start field/value # e.g. # *** Anton Bauer or # Anton Bauer *** value = value.sub( /^\s*(\*{1,3})\s+/ ) do |_| if $1 == '***' grade = 1 elsif $1 == '**' grade = 2 elsif $1 == '*' grade = 3 else # unknown grade; not possible, is'it? end '' # remove * from title if found end value = value.sub( /\s+(\*{1,3})\s*$/ ) do |_| if $1 == '***' grade = 1 elsif $1 == '**' grade = 2 elsif $1 == '*' grade = 3 else # unknown grade; not possible, is'it? end '' # remove * from title if found end [grade,value] end
find_key_n_title( values )
click to toggle source
# File lib/textutils/helper/value_helper_i.rb, line 24 def find_key_n_title( values ) # note: returns ary [attribs,more_values] / two values # todo/fix: ## change title to name ## change synonyms to alt_names (!!!) ## => use new method e.g. find_key_n_name(s) - why?? why not?? ## fix: add/configure logger for ActiveRecord!!! logger = LogKernel::Logger.root ### support autogenerate key from first title value if values[0] =~ TITLE_KEY_REGEX key_col = values[0] title_col = values[1] more_values = values[2..-1] else key_col = '<auto>' title_col = values[0] more_values = values[1..-1] end attribs = {} ## check title_col for grade (e.g. ***/**/*) and use returned stripped title_col if exits grade, title_col = find_grade( title_col ) # NB: for now - do NOT include default grade e.g. if grade (***/**/*) not present; attrib will not be present too if grade == 1 || grade == 2 || grade == 3 # grade found/present logger.debug " found grade #{grade} in title" attribs[:grade] = grade end ## fix/todo: add find parts ?? # e.g. ‹Estrella› ‹Damm› Inedit # becomes => title: 'Estrella Damm Inedit' and parts: ['Estrella','Damm'] ## title (split of optional tree hierarchy) ## e.g. Leverkusen › Köln/Bonn › Nordrhein-Westfalen ## Gelsenkirchen › Ruhrgebiet › Nordrhein-Westfalen ## München [Munich] › Bayern etc. ## fix!!!! - trailing hierarchy get *ignored* for now!!! - fix!! ## pass along in :tree (or :hierarchy) ?? ## note: must include leading and trailing space for now (fix!! later) ## hack for avoiding conflict w/ parts; fix: read/parse parts first ## todo: also allow > (as an alternative to ›) title_tree = title_col.split( /[ ]+[›][ ]+/ ) ## title (split of optional synonyms) # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern # München [Munich] titles = NameTokenizer.new.tokenize( title_tree[0] ) attribs[ :title ] = titles[0] ## add optional synonyms if present attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1 if key_col == '<auto>' ## autogenerate key from first title key_col = TextUtils.title_to_key( titles[0] ) logger.debug " autogen key »#{key_col}« from title »#{titles[0]}«" end attribs[ :key ] = key_col [attribs, more_values] end
is_address?( value )
click to toggle source
# File lib/textutils/helper/value_helper_ii.rb, line 41 def is_address?( value ) # if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160 match_result = value =~ /\/{2}/ # match found if 0,1,2,3 etc or no match if nil # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil) match_result != nil end
is_taglist?( value )
click to toggle source
# File lib/textutils/helper/value_helper_ii.rb, line 49 def is_taglist?( value ) ### note: cannot start w/ number must be letter for now ## -- in the future allow free standing years (e.g. 1980 etc.?? why? why not?) ## e.g. not allowed 14 ha or 5_000 hl etc. match_result = value =~ /^([a-z][a-z0-9\|_ ]*[a-z0-9]|[a-z])$/ # match found if 0,1,2,3 etc or no match if nil # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil) match_result != nil end
is_website?( value )
click to toggle source
# File lib/textutils/helper/value_helper_ii.rb, line 60 def is_website?( value ) # check for url/internet address e.g. www.ottakringer.at # - must start w/ www. or # - must end w/ .com # # fix: support more url format (e.g. w/o www. - look for .com .country code etc.) match_result = value =~ /^www\.|\.com$/ # match found if 0,1,2,3 etc or no match if nil # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil) match_result != nil end
is_year?( value )
click to toggle source
# File lib/textutils/helper/value_helper_ii.rb, line 22 def is_year?( value ) # founded/established year e.g. 1776 match_result = value =~ /^[0-9]{4}$/ # match found if 0,1,2,3 etc or no match if nil # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil) match_result != nil end
match_abv( value ) { |$to_f| ... }
click to toggle source
# File lib/textutils/helper/value_helper_iii_numbers.rb, line 38 def match_abv( value ) # alcohol by volume (abv) e.g. 5.2% if value =~ /^<?\s*(\d+(?:\.\d+)?)\s*%$/ # nb: allow leading < e.g. <0.5% yield( $1.to_f ) # convert to decimal? how? use float? true # bingo - match found else false # no match found end end
match_brewery( value ) { |brewery| ... }
click to toggle source
fix!!!!: move to beerdb ??? why? why not?? - yes, move to beerdb-models
# File lib/textutils/helper/value_helper_ii.rb, line 10 def match_brewery( value ) if value =~ /^by:/ ## by: -brewed by/brewery brewery_key = value[3..-1] ## cut off by: prefix brewery = BeerDb::Model::Brewery.find_by_key!( brewery_key ) yield( brewery ) true # bingo - match found else false # no match found end end
match_hl( value ) { |$gsub( /[ _]/, '' ).to_i| ... }
click to toggle source
# File lib/textutils/helper/value_helper_iii_numbers.rb, line 68 def match_hl( value ) # hector liters (hl) 1hl = 100l if value =~ /^(?:([0-9][0-9_ ]+[0-9]|[0-9]{1,2})\s*hl)$/ # e.g. 20_000 hl or 50hl etc. yield( $1.gsub( /[ _]/, '' ).to_i ) true # bingo - match found else false # no match found end end
match_kcal( value ) { |$to_f| ... }
click to toggle source
# File lib/textutils/helper/value_helper_iii_numbers.rb, line 58 def match_kcal( value ) if value =~ /^(\d+(?:\.\d+)?)\s*kcal(?:\/100ml)?$/ # kcal # nb: allow 44.4 kcal/100ml or 44.4 kcal or 44.4kcal yield( $1.to_f ) # convert to decimal? how? use float? true # bingo - match found else false # no match found end end
match_km_squared( value ) { |num| ... }
click to toggle source
numbers w/ units
# File lib/textutils/helper/value_helper_iii_numbers.rb, line 27 def match_km_squared( value ) ## allow numbers like 453 km² or 45_000 km2 if value =~ /^([0-9][0-9 _]+[0-9]|[0-9]{1,2})(?:\s*(?:km2|km²)\s*)$/ num = value.gsub( 'km2', '').gsub( 'km²', '' ).gsub(/[ _]/, '').to_i yield( num ) true # bingo - match found else false # no match found end end
match_number( value ) { |num| ... }
click to toggle source
# File lib/textutils/helper/value_helper_iii_numbers.rb, line 11 def match_number( value ) ## numeric ## note: can use any _ or spaces inside digits e.g. 1_000_000 or 1 000 000) if value =~ /^([0-9][0-9 _]+[0-9])|([0-9]{1,2})$/ num = value.gsub(/[ _]/, '').to_i yield( num ) true # bingo - match found else false # no match found end end
match_og( value ) { |$to_f| ... }
click to toggle source
# File lib/textutils/helper/value_helper_iii_numbers.rb, line 48 def match_og( value ) # plato (stammwuerze/gravity?) e.g. 11.2° if value =~ /^(\d+(?:\.\d+)?)°$/ # nb: no whitespace allowed between ° and number e.g. 11.2° yield( $1.to_f ) # convert to decimal? how? use float? true # bingo - match found else false # no match found end end
match_website( value ) { |value| ... }
click to toggle source
# File lib/textutils/helper/value_helper_ii.rb, line 72 def match_website( value ) if is_website?( value ) # check for url/internet address e.g. www.ottakringer.at # fix: support more url format (e.g. w/o www. - look for .com .country code etc.) yield( value ) true # bingo - match found else false # no match found end end
match_year( value ) { |to_i| ... }
click to toggle source
# File lib/textutils/helper/value_helper_ii.rb, line 31 def match_year( value ) if is_year?( value ) # founded/established year e.g. 1776 yield( value.to_i ) true # bingo - match found else false # no match found end end