class UncleKryon::Languages
Constants
- DEFAULT_FILEPATH
Public Class Methods
load_file(filepath=DEFAULT_FILEPATH)
click to toggle source
# File lib/unclekryon/iso/language.rb, line 174 def self.load_file(filepath=DEFAULT_FILEPATH) return Languages.new.load_file(filepath) end
new()
click to toggle source
Calls superclass method
UncleKryon::BaseIsos::new
# File lib/unclekryon/iso/language.rb, line 114 def initialize super() end
parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
click to toggle source
@param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
into local file
@param save_filepath [String] local file to save YAML to @see www.loc.gov/standards/iso639-2/php/code_list.php
# File lib/unclekryon/iso/language.rb, line 182 def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH) doc = Nokogiri::HTML(URI(parse_filepath).open,nil,'utf-8') tds = doc.css('td') langs = Languages.new i = 0 tr = [] tds.each do |td| c = td.content c.gsub!(/[[:space:]]+/,' ') c.strip! tr.push(c) if (i += 1) >= 5 #puts tr.inspect() add_it = true lang = Language.new(tr) if langs.key?(lang.code) # There were so many duplicates, so added comparison check raise "Language already exists: #{lang.inspect}" if lang != langs[lang.code] add_it = false else langs.values.each_value do |v| puts "Duplicate lang names: #{v.name}" if v.name == lang.name end end langs[lang.code] = lang if add_it tr.clear i = 0 end end langs.sort_keys! langs.save_to_file(save_filepath) end
Public Instance Methods
find_by_kryon(text,add_english: false,**options)
click to toggle source
# File lib/unclekryon/iso/language.rb, line 118 def find_by_kryon(text,add_english: false,**options) langs = [] regexes = [ %r{[[:space:]]*[/\+][[:space:]]*}, # Multiple languages are usually separated by '/' /[[:space:]]+/ # Sometimes separated by space/newline ] regexes.each_with_index do |regex,i| try_next_regex = false text.split(regex).each do |t| # Fix misspellings and/or weird shortenings t = t.clone t.gsub!(/\AFRENC\z/i,'French') t.gsub!(/[\+\*]+/,'') # Means more languages, but won't worry about it (since not listed) t.gsub!(/\ASPAN\z/i,'Spanish') t.gsub!(/\AENGLSH\z/i,'English') t.gsub!(/\AHUNGARY\z/i,'Hungarian') lang = find(t) if lang.nil? if i >= (regexes.length - 1) msg = "No language found for: #{t}" if DevOpts.instance.dev? raise msg else log.warn(msg) end else log.warn("Not a language; trying next regex: #{t}") # Try next regex. langs.clear try_next_regex = true break end else langs.push(lang.code) end end # No problem with this regex, so bail out. break unless try_next_regex end eng_code = find_by_code('eng').code if add_english && !langs.include?(eng_code) langs.push(eng_code) end return langs.empty? ? nil : langs end