class ToknInternal::CodeSet

A CodeSet is an ordered set of character or token codes that are used as labels on DFA edges.

In addition to unicode character codes 0…0x10ffff, they also represent epsilon transitions (-1), or token identifiers ( < -1).

Each CodeSet is represented as an array with 2n elements; each pair represents a closed lower and open upper range of values.

Thus a value x is within the set [a1,a2,b1,b2,..] iff (a1 <= x < a2) or (b1 <= x < b2) or …

Public Class Methods

new(lower = nil, upper = nil) click to toggle source

Initialize set; optionally add an initial contiguous range

# File lib/tokn/code_set.rb, line 29
def initialize(lower = nil, upper = nil)
  @elem = []
  if lower
    add(lower,upper)
  end
end

Public Instance Methods

add(lower, upper = nil) click to toggle source

Add a contiguous range of values to the set @param lower min value in range @param upper one plus max value in range

# File lib/tokn/code_set.rb, line 72
def add(lower, upper = nil)
  if not upper
    upper = lower + 1
  end
  
  if lower >= upper
    raise RangeError
  end
  
  newSet = [] 
  i = 0
  while i < @elem.size and @elem[i] < lower
    newSet.push(@elem[i])
    i += 1
  end 
  
  if (i & 1) == 0
    newSet.push(lower)
  end
  
  while i < @elem.size and @elem[i] <= upper
    i += 1
  end
  
  if (i & 1) == 0
    newSet.push(upper)
  end
  
  while i < @elem.size 
    newSet.push(@elem[i])
    i += 1
  end
  
  @elem = newSet
  
end
addSet(s) click to toggle source

Add every value from another CodeSet to this one

# File lib/tokn/code_set.rb, line 174
def addSet(s)
  sa = s.array
  
  (0 ... sa.length).step(2) {
    |i| add(sa[i],sa[i+1])
  }
end
array() click to toggle source

Get the array containing the code set range pairs

# File lib/tokn/code_set.rb, line 44
def array
  @elem
end
cardinality() click to toggle source

Determine how many distinct values are represented by this set

# File lib/tokn/code_set.rb, line 297
def cardinality
  c = 0
  i = 0
  while i < @elem.length
    c += @elem[i+1] - @elem[i]
    i += 2
  end
  c
end
contains?(val) click to toggle source

Determine if this set contains a particular value

# File lib/tokn/code_set.rb, line 183
def contains?(val)
  ret = false
  i = 0
  while i < @elem.size
    if val < @elem[i]
      break
    end 
    if val < @elem[i+1]
      ret = true
      break
    end
    i += 2
  end  
  
  ret
  
end
difference(s) click to toggle source

Calculate difference of this set minus another

# File lib/tokn/code_set.rb, line 158
def difference(s)
  combineWith(s, 'd')
end
difference!(s) click to toggle source

Replace this set with itself minus another

# File lib/tokn/code_set.rb, line 153
def difference!(s)
  setTo(difference(s))
end
empty?() click to toggle source

Determine if this set is empty

# File lib/tokn/code_set.rb, line 309
def empty?
  @elem.empty?
end
eql?(other) click to toggle source

Determine if this set is equivalent to another, by comparing the contained arrays

# File lib/tokn/code_set.rb, line 63
def eql?(other)
  @elem == other.array
end
hash() click to toggle source

Get hash code; just uses hash code of the contained array

# File lib/tokn/code_set.rb, line 56
def hash
  @elem.hash
end
inspect() click to toggle source

Calls to_s

# File lib/tokn/code_set.rb, line 224
def inspect
  to_s
end
intersect(s) click to toggle source

Calculate the intersection of this set and another

# File lib/tokn/code_set.rb, line 163
def intersect(s)
  combineWith(s, 'i')
end
intersect!(s) click to toggle source

Set this set equal to its intersection with another

# File lib/tokn/code_set.rb, line 169
def intersect!(s)
  setTo(intersect(s))
end
makeCopy() click to toggle source

Construct a copy of this set

# File lib/tokn/code_set.rb, line 21
def makeCopy
  c = CodeSet.new
  c.setTo(self)
  c
end
negate(lower = 0, upper = CODEMAX) click to toggle source

Negate the inclusion of a contiguous range of values

@param lower min value in range @param upper one plus max value in range

# File lib/tokn/code_set.rb, line 256
def negate(lower = 0, upper =  CODEMAX)
  s2 = CodeSet.new(lower,upper)
  if lower >= upper
    raise RangeError
  end
  
  newSet = [] 
  i = 0
  while i < @elem.size and @elem[i] <= lower
    newSet.push(@elem[i])
    i += 1
  end 
  
  if i > 0 and newSet[i-1] == lower
    newSet.pop
  else
    newSet.push(lower)
  end
  
  while i < @elem.size and @elem[i] <= upper
    newSet.push(@elem[i])
    i += 1
  end 
  
  
  if newSet.length > 0 and newSet.last == upper
    newSet.pop
  else
    newSet.push(upper)
  end
  
  while i < @elem.size 
    newSet.push(@elem[i])
    i += 1
  end 
  
  @elem = newSet
  
end
remove(lower, upper = nil) click to toggle source

Remove a contiguous range of values from the set @param lower min value in range @param upper one plus max value in range

# File lib/tokn/code_set.rb, line 114
def remove(lower, upper = nil)
  if not upper
    upper = lower + 1
  end
  
  if lower >= upper
    raise RangeError
  end
  
  newSet = [] 
  i = 0
  while i < @elem.size and @elem[i] < lower
    newSet.push(@elem[i])
    i += 1
  end 
  
  if (i & 1) == 1
    newSet.push(lower)
  end
  
  while i < @elem.size and @elem[i] <= upper
    i += 1
  end
  
  if (i & 1) == 1
    newSet.push(upper)
  end
  
  while i < @elem.size 
    newSet.push(@elem[i])
    i += 1
  end
  
  setArray(newSet)
  
end
setArray(a) click to toggle source

Replace this set's array @param a array to point to (does not make a copy of it)

# File lib/tokn/code_set.rb, line 51
def setArray(a)
  @elem = a
end
setTo(otherSet) click to toggle source

Replace this set with a copy of another

# File lib/tokn/code_set.rb, line 38
def setTo(otherSet)
  @elem.replace(otherSet.array)  
end
to_s() click to toggle source

Get string representation of set, treating them (where possible) as printable ASCII characters

# File lib/tokn/code_set.rb, line 204
def to_s
  s = ''
  i = 0
  while i < @elem.size
    if s.size
      s += ' '
    end
    
    lower = @elem[i]
    upper = @elem[i+1]
    s += dbStr(lower)
    if upper != 1+lower
      s += '..' + dbStr(upper-1)
    end
    i += 2
  end
  return s
end
to_s_alt() click to toggle source

Get string representation of set, treating them as integers

# File lib/tokn/code_set.rb, line 231
def to_s_alt
  s = ''
  i = 0
  while i < @elem.size
    if s.length > 0
      s += ' '
    end
    low = @elem[i]
    upr = @elem[i+1]
    s += low.to_s
    if upr > low+1
      s += '..'
      s += (upr-1).to_s
    end
    i += 2
  end
  return s
end

Private Instance Methods

combineWith(s, oper) click to toggle source

Combine this range (a) with another (b) according to particular operation > s other range (b) > oper 'i': intersection, a^b

'd': difference, a-b
'n': negation, (a & !b) | (!a & b)
# File lib/tokn/code_set.rb, line 339
def combineWith(s, oper)
  sa = array
  sb = s.array
  
  i = 0
  j = 0
  c = []
  
  wasInside = false
  
  while i < sa.length || j < sb.length
    
    if i == sa.length 
      v = sb[j]
    elsif j == sb.length
      v = sa[i]
    else
      v = [sa[i],sb[j]].min
    end
  
    if i < sa.length && v == sa[i]
      i += 1
    end      
    if j < sb.length && v == sb[j]
      j += 1
    end      
    
    case oper
    when 'i'
      inside = ((i & 1) == 1) && ((j & 1) == 1)
    when 'd'
      inside = ((i & 1) == 1) && ((j & 1) == 0)
    else
      raise Exception, "illegal"
    end
    
    if inside != wasInside
      c.push v
      wasInside = inside
    end
  end
  ret = CodeSet.new()
  ret.setArray(c)
  ret
end
dbStr(charCode) click to toggle source

Get a debug description of a value within a CodeSet, suitable for including within a .dot label

# File lib/tokn/code_set.rb, line 318
def dbStr(charCode)
  
  # Unless it corresponds to a non-confusing printable ASCII value,
  # just print its decimal equivalent
  
  s = charCode.to_s
  
  if charCode == EPSILON
    s = "(e)"
  elsif (charCode > 32 && charCode < 0x7f && !"'\"\\[]{}()".index(charCode.chr))
    s = charCode.chr
  end  
  return s
end