class Interscript::Stdlib

Constants

ALIASES

Public Class Methods

available_functions() click to toggle source
# File lib/interscript/stdlib.rb, line 173
def self.available_functions
  %i[title_case downcase compose decompose separate unseparate secryst rababa]
end
boundary_like_alias?(a) click to toggle source
# File lib/interscript/stdlib.rb, line 25
def self.boundary_like_alias?(a)
  %i[line_start line_end string_start string_end boundary non_word_boundary].include?(a)
end
deterministic_sort_by_max_length(ary) click to toggle source

On Windows at least, sort_by is non-deterministic. Let's add some determinism to our efforts.

# File lib/interscript/stdlib.rb, line 166
def self.deterministic_sort_by_max_length(ary)
  # Deterministic on Linux:
  # ary.sort_by{ |rule| -rule.max_length }

  ary.each_with_index.sort_by{ |rule,idx| -rule.max_length*100000 + idx }.map(&:first)
end
parallel_regexp_compile(subs_hash) click to toggle source
# File lib/interscript/stdlib.rb, line 31
def self.parallel_regexp_compile(subs_hash)
  # puts subs_hash.inspect
  regexp = subs_hash.each_with_index.map do |p,i|
    "(?<_%d>%s)" % [i,p[0]]
  end.join("|")
  subs_regexp = Regexp.compile(regexp)
  # puts subs_regexp.inspect
end
parallel_regexp_gsub(string, subs_regexp, subs_hash) click to toggle source
# File lib/interscript/stdlib.rb, line 40
def self.parallel_regexp_gsub(string, subs_regexp, subs_hash)
  string.gsub(subs_regexp) do |match|
    lm = Regexp.last_match
    # Extract the match name
    idx = lm.named_captures.compact.keys.first[1..-1].to_i
    subs_hash[idx]
  end
end
parallel_regexp_gsub_debug(string, subs_regexp, subs_array) click to toggle source
# File lib/interscript/stdlib.rb, line 49
def self.parallel_regexp_gsub_debug(string, subs_regexp, subs_array)
  # only gathering debug info, test data is available in maps_analyze_staging
  $subs_matches = []
  $subs_regexp = subs_regexp
  #$subs_array = subs_array
  string.gsub(subs_regexp) do |match|
    lm = Regexp.last_match
    # puts lm.inspect
    # Extract the match name
    matched = lm.named_captures.compact.keys.first
    # puts matched.inspect
    # puts [lm.begin(matched), lm.end(matched)].inspect
    idx = matched[1..-1].to_i
    debug_info = {begin: lm.begin(matched), end: lm.end(matched), idx: idx, result: subs_array[idx]}
    $subs_matches << debug_info
    subs_array[idx]
  end
end
parallel_replace(str, hash) click to toggle source
# File lib/interscript/stdlib.rb, line 159
def self.parallel_replace(str, hash)
  tree = parallel_replace_compile_tree(hash)
  parallel_replace_tree(str, tree)
end
parallel_replace_compile_hash(a) click to toggle source
# File lib/interscript/stdlib.rb, line 69
def self.parallel_replace_compile_hash(a)
  h = {}
  a.each do |from,to|
    h[from] = to
  end
  h
end
parallel_replace_compile_tree(hash) click to toggle source

hash can be either a hash or a hash-like array

# File lib/interscript/stdlib.rb, line 98
def self.parallel_replace_compile_tree(hash)
  hh = hash.hash
  if @treecache[hh]
    tree = @treecache[hh]
  else
    tree = {}
    hash.each do |from, to|
      from = Array(from)
      from.each do |f|
        branch = tree
        chars = f.split("")
        chars[0..-2].each do |c|
          branch[c.ord] ||= {}
          branch = branch[c.ord]
        end
        branch[chars.last.ord] ||= {}
        branch[chars.last.ord][nil] = to
      end
    end
    @treecache[hh] = tree
  end
end
parallel_replace_hash(str,h) click to toggle source
# File lib/interscript/stdlib.rb, line 77
def self.parallel_replace_hash(str,h)
  newstr = ""
  len = str.length
  max_key_len = h.keys.map(&:length).max
  i = 0
  while i < len
    max_key_len.downto(1).each do |checked_len|
      substr = str[i,checked_len]
      if h[substr]
        newstr << h[substr]
        i += substr.length
      elsif checked_len==1
        newstr << str[i,1]
        i += 1
      end
    end
  end
  newstr
end
parallel_replace_tree(str, tree) click to toggle source
# File lib/interscript/stdlib.rb, line 121
def self.parallel_replace_tree(str, tree)
  newstr = ""
  len = str.length
  i = 0
  while i < len
    c = str[i]

    sub = ""
    branch = tree
    match, repl = nil, nil

    j = 0
    while j < len-i
      cc = str[i+j]
      if branch.include? cc.ord
        branch = branch[cc.ord]
        sub << cc
        if branch.include? nil
          match = sub.dup
          repl = branch[nil]
        end
        j += 1
      else
        break
      end
    end

    if match
      i += match.length
      newstr << repl
    else
      newstr << c
      i += 1
    end
  end
  newstr
end
re_only_alias?(a) click to toggle source
# File lib/interscript/stdlib.rb, line 21
def self.re_only_alias?(a)
  ! %i[none space].include?(a)
end
reverse_function() click to toggle source
# File lib/interscript/stdlib.rb, line 177
def self.reverse_function
  {
    title_case: :downcase, # Those two are best-effort,
    downcase: :title_case, # but probably wrong.

    compose: :decompose,
    decompose: :compose,

    separate: :unseparate,
    unseparate: :separate
  }
end