class UncleKryon::Languages

Constants

DEFAULT_FILEPATH

Public Class Methods

load_file(filepath=DEFAULT_FILEPATH) click to toggle source
# File lib/unclekryon/iso/language.rb, line 174
def self.load_file(filepath=DEFAULT_FILEPATH)
  return Languages.new.load_file(filepath)
end
new() click to toggle source
Calls superclass method UncleKryon::BaseIsos::new
# File lib/unclekryon/iso/language.rb, line 114
def initialize
  super()
end
parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH) click to toggle source

@param parse_filepath [String] use web browser's developer tools to copy & paste table HTML

into local file

@param save_filepath [String] local file to save YAML to @see www.loc.gov/standards/iso639-2/php/code_list.php

# File lib/unclekryon/iso/language.rb, line 182
def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
  doc = Nokogiri::HTML(URI(parse_filepath).open,nil,'utf-8')
  tds = doc.css('td')

  langs = Languages.new
  i = 0
  tr = []

  tds.each do |td|
    c = td.content
    c.gsub!(/[[:space:]]+/,' ')
    c.strip!
    tr.push(c)

    if (i += 1) >= 5
      #puts tr.inspect()

      add_it = true
      lang = Language.new(tr)

      if langs.key?(lang.code)
        # There were so many duplicates, so added comparison check
        raise "Language already exists: #{lang.inspect}" if lang != langs[lang.code]
        add_it = false
      else
        langs.values.each_value do |v|
          puts "Duplicate lang names: #{v.name}" if v.name == lang.name
        end
      end

      langs[lang.code] = lang if add_it
      tr.clear
      i = 0
    end
  end

  langs.sort_keys!
  langs.save_to_file(save_filepath)
end

Public Instance Methods

find_by_kryon(text,add_english: false,**options) click to toggle source
# File lib/unclekryon/iso/language.rb, line 118
def find_by_kryon(text,add_english: false,**options)
  langs = []
  regexes = [
    %r{[[:space:]]*[/\+][[:space:]]*}, # Multiple languages are usually separated by '/'
    /[[:space:]]+/                    # Sometimes separated by space/newline
  ]

  regexes.each_with_index do |regex,i|
    try_next_regex = false

    text.split(regex).each do |t|
      # Fix misspellings and/or weird shortenings
      t = t.clone
      t.gsub!(/\AFRENC\z/i,'French')
      t.gsub!(/[\+\*]+/,'') # Means more languages, but won't worry about it (since not listed)
      t.gsub!(/\ASPAN\z/i,'Spanish')
      t.gsub!(/\AENGLSH\z/i,'English')
      t.gsub!(/\AHUNGARY\z/i,'Hungarian')

      lang = find(t)

      if lang.nil?
        if i >= (regexes.length - 1)
          msg = "No language found for: #{t}"

          if DevOpts.instance.dev?
            raise msg
          else
            log.warn(msg)
          end
        else
          log.warn("Not a language; trying next regex: #{t}")

          # Try next regex.
          langs.clear
          try_next_regex = true
          break
        end
      else
        langs.push(lang.code)
      end
    end

    # No problem with this regex, so bail out.
    break unless try_next_regex
  end

  eng_code = find_by_code('eng').code

  if add_english && !langs.include?(eng_code)
    langs.push(eng_code)
  end

  return langs.empty? ? nil : langs
end