class TwitterCldr::Shared::Bidi

Constants

MAX_DEPTH

Attributes

levels[R]
string_arr[R]
types[R]

Public Class Methods

from_string(str, options = {}) click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 36
def from_string(str, options = {})
  string_arr = str.unpack("U*")
  Bidi.new(options.merge(types: compute_types(string_arr), string_arr: string_arr))
end
from_type_array(types, options = {}) click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 41
def from_type_array(types, options = {})
  Bidi.new(options.merge(types: types))
end
new(options = {}) click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 54
def initialize(options = {})
  @string_arr = options[:string_arr] || options[:types]
  @types = options[:types] || []
  @levels = []
  @runs = []
  @direction = options[:direction]
  @default_direction = options[:default_direction] || :LTR
  @length = @types.size
  run_bidi
end

Protected Class Methods

compute_types(arr) click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 47
def compute_types(arr)
  arr.map do |code_point|
    TwitterCldr::Shared::CodePoint.get(code_point).bidi_class.to_sym
  end
end

Public Instance Methods

reorder_visually!() click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 69
def reorder_visually!
  raise "No string given!" unless @string_arr

  # Do this explicitly so we can also find the maximum depth at the
  # same time.
  max = 0
  lowest_odd = MAX_DEPTH + 1

  @levels.each do |level|
    max = [level, max].max
    lowest_odd = [lowest_odd, level].min unless level.even?
  end

  # Reverse the runs starting with the deepest.
  max.downto(lowest_odd) do |depth|
    start = 0

    while start < @levels.size
      # Find the start of a run >= DEPTH.
      start += 1 while start < @levels.size && @levels[start] < depth

      break if start == @levels.size

      # Find the end of the run.
      finish = start + 1
      finish += 1 while finish < @levels.size && @levels[finish] >= depth

      # Reverse this run.
      ((finish - start) / 2).times do |i|
        tmpb = @levels[finish - i - 1]
        @levels[finish - i - 1] = @levels[start + i]
        @levels[start + i] = tmpb

        tmpo = @string_arr[finish - i - 1]
        @string_arr[finish - i - 1] = @string_arr[start + i]
        @string_arr[start + i] = tmpo
      end

      # Handle the next run.
      start = finish + 1
    end
  end

  self
end
to_s() click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 65
def to_s
  @string_arr.pack("U*")
end

Protected Instance Methods

arraycopy(orig, orig_index, dest, dest_index, length) click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 504
def arraycopy(orig, orig_index, dest, dest_index, length)
  orig[orig_index...(orig_index + length)].each_with_index do |elem, count|
    dest[dest_index + count] = elem
  end
end
compute_explicit_levels() click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 134
def compute_explicit_levels
  current_embedding = @base_embedding

  # The directional override is a Character directionality
  # constant.  -1 means there is no override.
  directional_override = -1

  # The stack of pushed embeddings, and the stack pointer.
  # Note that because the direction is inherent in the depth,
  # and because we have a bit left over in a byte, we can encode
  # the override, if any, directly in this value on the stack.

  embedding_stack = []
  @formatter_indices ||= []
  sp = 0

  @length.times do |i|
    is_ltr = false
    is_special = true
    is_ltr = @types[i] == :LRE || @types[i] == :LRO

    case @types[i]
      when :RLE, :RLO, :LRE, :LRO
        new_embedding = if is_ltr
          # Least greater even.
          ((current_embedding & ~1) + 2)
        else
          # Least greater odd.
          ((current_embedding + 1) | 1)
        end

        # FIXME: we don't properly handle invalid pushes.
        if new_embedding < MAX_DEPTH
          # The new level is valid.  Push the old value.
          # See above for a comment on the encoding here.

          current_embedding |= -0x80 if (directional_override != -1)
          embedding_stack[sp] = current_embedding
          current_embedding = new_embedding
          sp += 1

          directional_override = if @types[i] == :LRO
            :L
          elsif @types[i] == :RLO
            :R
          else
            -1
          end
        end

      when :PDF
        # FIXME: we don't properly handle a pop with a corresponding
        # invalid push.
        # If sp === 0, we saw a pop without a push.  Just ignore it.
        if sp > 0
          sp -= 1
          new_embedding = embedding_stack[sp]
          current_embedding = new_embedding & 0x7f

          directional_override = if new_embedding < 0
            (new_embedding & 1) == 0 ? :L : :R
          else
            -1
          end
        end

      else
        is_special = false
    end

    @levels[i] = current_embedding

    if is_special
      # Mark this character for removal.
      @formatter_indices << i
    elsif directional_override != -1
      @types[i] = directional_override
    end
  end

  # Remove the formatting codes and update both the arrays
  # and 'length'.  It would be more efficient not to remove
  # these codes, but it is also more complicated.  Also, the
  # Unicode algorithm reference does not properly describe
  # how this is to be done -- from what I can tell, their suggestions
  # in this area will not yield the correct results.

  output = 0
  input = 0
  size = @formatter_indices.size

  0.upto(size).each do |i|
    if i == size
      next_fmt = @length
    else
      next_fmt = @formatter_indices[i]
    end

    len = next_fmt - input

    # Non-formatter codes are from 'input' to 'next_fmt'.
    arraycopy(@levels, input, @levels, output, len)
    arraycopy(@types, input, @types, output, len)

    output += len
    input = next_fmt + 1
  end

  @length -= @formatter_indices.size
end
compute_paragraph_embedding_level() click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 117
def compute_paragraph_embedding_level
  # First check to see if the user supplied a directionality override.
  if [:LTR, :RTL].include?(@direction)
    @direction == :LTR ? 0 : 1
  else
    # This implements rules P2 and P3.
    # (Note that we don't need P1, as the user supplies
    # a paragraph.)
    @types.each do |type|
      return 0 if type == :L
      return 1 if type == :R
    end

    @default_direction == :LTR ? 0 : 1
  end
end
compute_runs() click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 245
def compute_runs
  run_count = 0
  current_embedding = @base_embedding

  @length.times do |i|
    if @levels[i] != current_embedding
      current_embedding = @levels[i]
      run_count += 1
    end
  end

  # This may be called multiple times.  If so, and if
  # the number of runs has not changed, then don't bother
  # allocating a new array.
  where = 0
  last_run_start = 0
  current_embedding = @base_embedding

  @length.times do |i|
    if @levels[i] != current_embedding
      @runs[where] = last_run_start
      where += 1
      last_run_start = i
      current_embedding = @levels[i]
    end
  end

  @runs[where] = last_run_start
end
get_run_count() click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 370
def get_run_count
  @runs.size
end
get_run_level(which) click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 374
def get_run_level(which)
  @levels[@runs[which]]
end
get_run_limit(which) click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 378
def get_run_limit(which)
  if which == (@runs.length - 1)
    @length
  else
    @runs[which + 1]
  end
end
get_run_start(which) click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 386
def get_run_start(which)
  @runs[which]
end
reinsert_formatting_codes() click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 461
def reinsert_formatting_codes
  if @formatter_indices
    input = @length
    output = @levels.size

    # Process from the end as we are copying the array over itself here.
    (@formatter_indices.size - 1).downto(0) do |index|
      next_fmt = @formatter_indices[index]

      # nextFmt points to a location in the original array.  So,
      # nextFmt+1 is the target of our copying.  output is the location
      # to which we last copied, thus we can derive the length of the
      # copy from it.
      len = output - next_fmt - 1
      output = next_fmt
      input -= len

      # Note that we no longer need 'types' at this point, so we
      # only edit 'levels'.
      if next_fmt + 1 < @levels.size
        arraycopy(@levels, input, @levels, next_fmt + 1, len)
      end

      # Now set the level at the reinsertion point.
      right_level = if output == @levels.length - 1
        @base_embedding
      else
        @levels[output + 1] || 0
      end

      left_level = if input == 0
        @base_embedding
      else
        @levels[input] || 0;
      end

      @levels[output] = [left_level, right_level].max
    end
  end

  @length = @levels.size
end
resolve_implicit_levels() click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 390
def resolve_implicit_levels
  # This implements rules I1 and I2.
  @length.times do |i|
    if (@levels[i] & 1) == 0
      if @types[i] == :R
        @levels[i] += 1
      elsif @types[i] == :AN || @types[i] == :EN
        @levels[i] += 2
      end
    else
      if @types[i] == :L || @types[i] == :AN || @types[i] == :EN
        @levels[i] += 1
      end
    end
  end
end
resolve_neutral_types() click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 407
def resolve_neutral_types
  # This implements rules N1 and N2.
  run_count = get_run_count
  previous_level = @base_embedding

  run_count.times do |run|
    start = get_run_start(run)
    finish = get_run_limit(run)
    level = get_run_level(run)
    next unless level

    embedding_direction = level.even? ? :L : :R
    # These are the names used in the Bidi algorithm.
    sor = [previous_level, level].max.even? ? :L : :R

    next_level = if run == (run_count - 1)
      @base_embedding
    else
      get_run_level(run + 1)
    end

    eor = [level, next_level].max.even? ? :L : :R
    prev_strong = sor
    neutral_start = -1

    start.upto(finish) do |i|
      new_strong = -1
      this_type = i == finish ? eor : @types[i]

      case this_type
        when :L
          new_strong = :L
        when :R, :AN, :EN
          new_strong = :R
        when :BN, :ON, :S, :B, :WS
          neutral_start = i if neutral_start == -1
      end

      # If we see a strong character, update all the neutrals.
      if new_strong != -1
        if neutral_start != -1
          override = prev_strong == new_strong ? prev_strong : embedding_direction
          neutral_start.upto(i - 1) { |j| @types[j] = override }
        end

        prev_strong = new_strong
        neutral_start = -1
      end
    end

    previous_level = level
  end
end
resolve_weak_types() click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 275
def resolve_weak_types
  run_count = @runs.size
  previous_level = @base_embedding

  run_count.times do |run_idx|
    start = get_run_start(run_idx)
    finish = get_run_limit(run_idx)
    level = get_run_level(run_idx) || 0

    # These are the names used in the Bidi algorithm.
    sor = [previous_level, level].max.even? ? :L : :R

    next_level = if run_idx == (run_count - 1)
      @base_embedding
    else
      get_run_level(run_idx + 1) || 0
    end

    eor = [level, next_level].max.even? ? :L : :R
    prev_type = sor
    prev_strong_type = sor

    start.upto(finish - 1) do |i|
      next_type = (i == finish - 1) ? eor : @types[i + 1]

      # Rule W1: change NSM to the prevailing direction.
      if @types[i] == :NSM
        @types[i] = prev_type
      else
        prev_type = @types[i]
      end

      # Rule W2: change EN to AN in some cases.
      if @types[i] == :EN
        if prev_strong_type == :AL
          @types[i] = :AN
        end
      elsif @types[i] == :L || @types[i] == :R || @types[i] == :AL
        prev_strong_type = @types[i]
      end

      # Rule W3: change AL to R.
      if @types[i] == :AL
        @types[i] = :R
      end

      # Rule W4: handle separators between two numbers.
      if prev_type == :EN && next_type == :EN
        if @types[i] == :ES || @types[i] == :CS
          @types[i] = nextType
        end
      elsif prev_type == :AN && next_type == :AN && @types[i] == :CS
        @types[i] = next_type
      end

      # Rule W5: change a sequence of european terminators to
      # european numbers, if they are adjacent to european numbers.
      # We also include BN characters in this.
      if @types[i] == :ET || @types[i] == :BN
        if prev_type == :EN
          @types[i] = prev_type
        else
          # Look ahead to see if there is an EN terminating this
          # sequence of ETs.
          j = i + 1

          while j < finish && @types[j] == :ET || @types[j] == :BN
            j += 1
          end

          if j < finish && @types[j] == :EN
            # Change them all to EN now.
            i.upto(j - 1) do |k|
              @types[k] = :EN
            end
          end
        end
      end

      # Rule W6: separators and terminators change to ON.
      # Again we include BN.
      if @types[i] == :ET || @types[i] == :CS || @types[i] == :BN
        @types[i] = :ON
      end

      # Rule W7: change european number types.
      if prev_strong_type == :L && @types[i] == :EN
        @types[i] = prev_strong_type
      end
    end

    previous_level = level
  end
end
run_bidi() click to toggle source
# File lib/twitter_cldr/shared/bidi.rb, line 510
def run_bidi
  @base_embedding = compute_paragraph_embedding_level

  compute_explicit_levels
  compute_runs
  resolve_weak_types
  resolve_neutral_types
  resolve_implicit_levels
  reinsert_formatting_codes

  # After resolving the implicit levels, the number
  # of runs may have changed.
  compute_runs
end