class ActiveSupport::Multibyte::Unicode::DatabaseGenerator

Constants

BASE_URI
SOURCES

Public Class Methods

new() click to toggle source
# File activesupport/bin/generate_tables, line 30
def initialize
  @ucd = Unicode::UnicodeDatabase.new
end

Public Instance Methods

create_composition_map() click to toggle source
# File activesupport/bin/generate_tables, line 87
def create_composition_map
  @ucd.codepoints.each do |_, cp|
    if !cp.nil? && cp.combining_class == 0 && cp.decomp_type.nil? && !cp.decomp_mapping.nil? && cp.decomp_mapping.length == 2 && @ucd.codepoints[cp.decomp_mapping[0]].combining_class == 0 && !@ucd.composition_exclusion.include?(cp.code)
      @ucd.composition_map[cp.decomp_mapping[0]] ||= {}
      @ucd.composition_map[cp.decomp_mapping[0]][cp.decomp_mapping[1]] = cp.code
    end
  end
end
dump_to(filename) click to toggle source
# File activesupport/bin/generate_tables, line 124
def dump_to(filename)
  File.open(filename, "wb") do |f|
    f.write Marshal.dump([@ucd.codepoints, @ucd.composition_exclusion, @ucd.composition_map, @ucd.boundary, @ucd.cp1252])
  end
end
normalize_boundary_map() click to toggle source
# File activesupport/bin/generate_tables, line 96
def normalize_boundary_map
  @ucd.boundary.each do |k, v|
    if [:lf, :cr].include? k
      @ucd.boundary[k] = v[0]
    end
  end
end
parse() click to toggle source
# File activesupport/bin/generate_tables, line 104
def parse
  SOURCES.each do |type, url|
    filename = File.join(Dir.tmpdir, UNICODE_VERSION, "#{url.split('/').last}")
    unless File.exist?(filename)
      $stderr.puts "Downloading #{url.split('/').last}"
      FileUtils.mkdir_p(File.dirname(filename))
      File.open(filename, "wb") do |target|
        open(url) do |source|
          source.each_line { |line| target.write line }
        end
      end
    end
    File.open(filename) do |file|
      file.each_line { |line| send "parse_#{type}".intern, line }
    end
  end
  create_composition_map
  normalize_boundary_map
end
parse_codepoints(line) click to toggle source
# File activesupport/bin/generate_tables, line 34
def parse_codepoints(line)
  codepoint = Codepoint.new
  raise "Could not parse input." unless line =~ /^
    ([0-9A-F]+);        # code
    ([^;]+);            # name
    ([A-Z]+);           # general category
    ([0-9]+);           # canonical combining class
    ([A-Z]+);           # bidi class
    (<([A-Z]*)>)?       # decomposition type
    ((\ ?[0-9A-F]+)*);  # decomposition mapping
    ([0-9]*);           # decimal digit
    ([0-9]*);           # digit
    ([^;]*);            # numeric
    ([YN]*);            # bidi mirrored
    ([^;]*);            # unicode 1.0 name
    ([^;]*);            # iso comment
    ([0-9A-F]*);        # simple uppercase mapping
    ([0-9A-F]*);        # simple lowercase mapping
    ([0-9A-F]*)$/ix     # simple titlecase mapping
  codepoint.code              = $1.hex
  codepoint.combining_class   = Integer($4)
  codepoint.decomp_type       = $7
  codepoint.decomp_mapping    = ($8 == "") ? nil : $8.split.collect(&:hex)
  codepoint.uppercase_mapping = ($16 == "") ? 0 : $16.hex
  codepoint.lowercase_mapping = ($17 == "") ? 0 : $17.hex
  @ucd.codepoints[codepoint.code] = codepoint
end
parse_composition_exclusion(line) click to toggle source
# File activesupport/bin/generate_tables, line 75
def parse_composition_exclusion(line)
  if line =~ /^([0-9A-F]+)/i
    @ucd.composition_exclusion << $1.hex
  end
end
parse_cp1252(line) click to toggle source
# File activesupport/bin/generate_tables, line 81
def parse_cp1252(line)
  if line =~ /^([0-9A-Fx]+)\s([0-9A-Fx]+)/i
    @ucd.cp1252[$1.hex] = $2.hex
  end
end
parse_grapheme_break_property(line) click to toggle source
# File activesupport/bin/generate_tables, line 62
def parse_grapheme_break_property(line)
  if line =~ /^([0-9A-F.]+)\s*;\s*([\w]+)\s*#/
    type = $2.downcase.intern
    @ucd.boundary[type] ||= []
    if $1.include? ".."
      parts = $1.split ".."
      @ucd.boundary[type] << (parts[0].hex..parts[1].hex)
    else
      @ucd.boundary[type] << $1.hex
    end
  end
end