class GenderDetector
Main class for interacting with the data file
Constants
- COUNTRIES
- ISO_3166_MAPPING
- VERSION
Public Class Methods
new(opts = {})
click to toggle source
# File lib/gender_detector.rb, line 39 def initialize(opts = {}) relpath = '../gender_detector/data/nam_dict.txt' opts = { filename: File.expand_path(relpath, __FILE__), case_sensitive: true, unknown_value: :andy }.merge(opts) @filename = opts[:filename] @case_sensitive = opts[:case_sensitive] @unknown_value = opts[:unknown_value] parse opts[:filename] end
Public Instance Methods
get_gender(name, country = nil)
click to toggle source
# File lib/gender_detector.rb, line 70 def get_gender(name, country = nil) name = downcase(name) unless @case_sensitive if !name_exists?(name) @unknown_value elsif country.nil? most_popular_gender(name) do |country_values| country_values.split('').reject { |l| l.strip == '' }.length end elsif COUNTRIES.include?(country) most_popular_gender_in_country(name, country) elsif ISO_3166_MAPPING.include?(country) most_popular_gender_in_country(name, ISO_3166_MAPPING[country]) else raise "No such country: #{country}" end end
inspect()
click to toggle source
# File lib/gender_detector.rb, line 88 def inspect "#<#{self.class.name} filename=\"#{@filename}\" " \ " case_sensitive=#{@case_sensitive} unknown_value=#{@unknown_value}>" end
knows_country?(country)
click to toggle source
# File lib/gender_detector.rb, line 61 def knows_country?(country) COUNTRIES.include?(country) || ISO_3166_MAPPING.include?(country) end
name_exists?(name)
click to toggle source
# File lib/gender_detector.rb, line 65 def name_exists?(name) name = downcase(name) unless @case_sensitive @names.key?(name) ? name : false end
parse(fname)
click to toggle source
# File lib/gender_detector.rb, line 52 def parse(fname) @names = {} open(fname, 'r:iso8859-1:utf-8') do |f| f.each_line do |line| eat_name_line line end end end
Private Instance Methods
downcase(name)
click to toggle source
# File lib/gender_detector.rb, line 145 def downcase(name) if defined?(ActiveSupport::Multibyte::Chars) name.mb_chars.downcase.to_s else name.downcase end end
eat_name_line(line)
click to toggle source
# File lib/gender_detector.rb, line 102 def eat_name_line(line) return if line.start_with?('#', '=') parts = line.split(' ').reject { |p| p.strip == '' } country_values = line.slice(30, line.length) name = @case_sensitive ? parts[1] : downcase(parts[1]) case parts[0] when 'M' then set(name, :male, country_values) when '1M', '?M' then set(name, :mostly_male, country_values) when 'F' then set(name, :female, country_values) when '1F', '?F' then set(name, :mostly_female, country_values) when '?' then set(name, :andy, country_values) else raise "Not sure what to do with a gender of #{parts[0]}" end end
most_popular_gender(name) { |country_values| ... }
click to toggle source
# File lib/gender_detector.rb, line 119 def most_popular_gender(name) return @unknown_value unless @names.key?(name) max = 0 best = @names[name].keys.first @names[name].each do |gender, country_values| count = yield country_values if count > max max = count best = gender end end best end
most_popular_gender_in_country(name, country)
click to toggle source
# File lib/gender_detector.rb, line 95 def most_popular_gender_in_country(name, country) index = COUNTRIES.index(country) most_popular_gender(name) do |country_values| country_values[index].ord end end
set(name, gender, country_values)
click to toggle source
# File lib/gender_detector.rb, line 134 def set(name, gender, country_values) if name.include? '+' ['', '-', ' '].each do |replacement| set name.gsub('+', replacement), gender, country_values end else @names[name] ||= {} @names[name][gender] = country_values end end