class Tokn::DFA

A DFA for tokenizing; includes pointer to a start state, and a list of token names

Constants

VERSION

Attributes

startState[R]
tokenNames[R]

Public Class Methods

from_file(path) click to toggle source

Compile a Tokenizer DFA from a text file (that contains a JSON string)

# File lib/tokn/dfa.rb, line 48
def self.from_file(path)
  from_json(read_text_file(path))
end
from_json(jsonStr) click to toggle source

Compile a Tokenizer DFA from a JSON string

# File lib/tokn/dfa.rb, line 54
def self.from_json(jsonStr)
  db = false
  
  !db|| pr("\n\nextractDFA %s...\n",jsonStr)
  
  h = JSON.parse(jsonStr)
  
  version = h["version"]
  
  if !version || version.floor != VERSION.floor 
    raise ArgumentError, 
       "Bad or missing version number: "+version.to_s+", expected "+VERSION.to_s
  end
  
  tNames = h["tokens"]
  stateInfo = h["states"]
  
  !db|| pr("tokens=%s\n",d(tNames))
  !db|| pr("stateInfo=\n%s\n",d(stateInfo))
  
  st = []
  stateInfo.each_with_index do |(key,val),i|
    !db|| pr(" creating new state, id=%d\n",i)
    st.push(State.new(i))
  end
  
  st.each do |s|
    !db|| pr("proc state %s\n",d(s))
    
    finalState, edgeList = stateInfo[s.id]
    s.finalState = finalState
    edgeList.each do |edge|
      label,destState = edge
      cr = CodeSet.new()
      cr.setArray(label)
      s.addEdge(cr, st[destState])
    end
  end
  
  DFA.new(tNames, st[0])

end
from_script(script, persistPath = nil) click to toggle source

Compile a Tokenizer DFA from a token definition script. If persistPath is not null, it first checks if the file exists and if so, assumes it contains (in JSON form) a previously compiled DFA matching this script, and reads the DFA from it.

Second, if no such file exists, it writes the DFA to it after compilation.

# File lib/tokn/dfa.rb, line 20
def self.from_script(script, persistPath = nil)
  
  if persistPath and File.exist?(persistPath)
    return extractDFA(read_text_file(persistPath))
  end
  
  req('token_defn_parser')
  
  td = TokenDefParser.new(script)
  dfa = td.dfa
  
  if persistPath
    write_text_file(persistPath, dfa.serialize())
  end

  dfa  
end
from_script_file(scriptPath, persistPath = nil) click to toggle source

Similar to from_script, but reads the script into memory from the file at scriptPath.

# File lib/tokn/dfa.rb, line 41
def self.from_script_file(scriptPath, persistPath = nil)
  self.from_script(read_text_file(scriptPath), persistPath)  
end
new(tokenNameList, startState) click to toggle source

Construct a DFA, given a list of token names and a starting state.

# File lib/tokn/dfa.rb, line 101
def initialize(tokenNameList, startState)
  
  if (startState.id != 0)
    raise ArgumentError, "Start state id must be zero"
  end
  
  @tokenNames = tokenNameList
  @startState = startState
  @tokenIdMap = {}
  @tokenNames.each_with_index do |name, i|
    @tokenIdMap[name] = i
  end
  
end

Public Instance Methods

serialize() click to toggle source

Serialize this DFA to a JSON string. The DFA in JSON form has this structure:

{
  "version" => version number (float)
  "tokens" => array of token names (strings)
  "states" => array of states, ordered by id (0,1,..)
}

Each state has this format:

[ finalState (boolean),
 [edge0, edge1, ...]
]

Edge:

[label, destination id (integer)]

Labels are arrays of integers, exactly the structure of a CodeSet array.

# File lib/tokn/dfa.rb, line 163
def serialize 
  
  h = {"version"=>VERSION, "tokens"=>tokenNames}
  
  
  stateSet,_,_ = startState.reachableStates
  
  idToStateMap = {}
  stateSet.each{ |st| idToStateMap[st.id] = st }
  
  stateList = []
  
  nextId = 0
  idToStateMap.each_pair do |id, st|
    if nextId != id
      raise ArgumentError, "unexpected state ids"
    end
    nextId += 1
    
    stateList.push(st)
  end
  
  if stateList.size == 0
    raise ArgumentError, "bad states"
  end
  
  if stateList[0] != startState
    raise ArgumentError, "bad start state"
  end
  
  stateInfo = []
  stateList.each do |st|
      stateInfo.push(stateToList(st))
  end
  h["states"] = stateInfo 
  
  JSON.generate(h)
end
tokenId(tokenName) click to toggle source

Get id of token given its name @param tokenName name of token @return nil if there is no token with that name

# File lib/tokn/dfa.rb, line 139
def tokenId(tokenName)
  @tokenIdMap[tokenName]
end
tokenName(tokenId) click to toggle source

Determine the name of a token, given its id. Returns <UNKNOWN> if its id is UNKNOWN_TOKEN, or <EOF> if the tokenId is nil. Otherwise, assumes tokenId is 0 … n-1, where n is the number of token names in the DFA.

# File lib/tokn/dfa.rb, line 121
def tokenName(tokenId)
  if !tokenId
    nm = "<EOF>"
  elsif tokenId == UNKNOWN_TOKEN
    nm = "<UNKNOWN>"
  else
    if tokenId < 0 || tokenId >= tokenNames.size
      raise IndexError, "No such token id: "+tokenId.to_s
    end
    nm = tokenNames[tokenId]
  end 
  nm 
end

Private Instance Methods

stateToList(state) click to toggle source
# File lib/tokn/dfa.rb, line 206
def stateToList(state)
  list = [state.finalState?]
  ed = []
  state.edges.each do |lbl, dest|
    edInfo = [lbl.array, dest.id]
    ed.push(edInfo)
  end
  list.push(ed)
  
  list
end