class Treebank::Transform
Constants
- VERSION
Public Class Methods
new(doc)
click to toggle source
# File lib/treebank/transform.rb, line 12 def initialize(doc) @doc = Nokogiri::XML(doc); end
Public Instance Methods
extract_cts_name(extension = '')
click to toggle source
# File lib/treebank/transform.rb, line 23 def extract_cts_name(extension = '') sentence = @doc.xpath('//treebank/sentence').first match = sentence['document_id'].match('urn:cts:.*Lit:(.*)') "#{match[1]}#{extension}" if match end
transform()
click to toggle source
# File lib/treebank/transform.rb, line 16 def transform transform_sentence_level transform_document_level @doc.to_xml(indent: 2) end
Private Instance Methods
beta2unicode()
click to toggle source
# File lib/treebank/transform.rb, line 35 def beta2unicode Dir.chdir(RESOURCES) do @xslt = Nokogiri::XSLT(File.read(BETA_2_UNICODE)) @doc = @xslt.transform(@doc) end end
has_elliptic_head(label)
click to toggle source
# File lib/treebank/transform.rb, line 60 def has_elliptic_head(label) label.match(/ExD\d+/) end
transform_document_level()
click to toggle source
# File lib/treebank/transform.rb, line 31 def transform_document_level beta2unicode end
transform_elliptic_nodes(sentence, word_node)
click to toggle source
# File lib/treebank/transform.rb, line 53 def transform_elliptic_nodes(sentence, word_node) if has_elliptic_head(word_node['relation']) word = EllipticWord.new(word_node, sentence) word.parse_elliptic_head end end
transform_participles(node)
click to toggle source
# File lib/treebank/transform.rb, line 64 def transform_participles(node) postag = node['postag'] if postag.start_with?('t') node['postag'] = postag.sub('t', 'v') end end
transform_sentence_level()
click to toggle source
# File lib/treebank/transform.rb, line 42 def transform_sentence_level @doc.xpath('//treebank/sentence').each do |sentence_node| sentence = Sentence.new(sentence_node) sentence.ctsify sentence_node.xpath('word').each do |word_node| transform_elliptic_nodes(sentence, word_node) transform_participles(word_node) end end end