class PROIEL::Converter::CoNLLU

Constants

AUXILIARIES
COPULAR_LEMMATA
DEPONENTS

try to guess deponency based on the lemma

DETERMINERS
MORPHOLOGY_MAP
NEGATION_LEMMATA
OBLIQUENESS_HIERARCHY
PARTICLE_LEMMATA
POS_MAP
RELATION_MAPPING
TAM_PARTICLE_LEMMATA

Public Class Methods

process(tb, options = []) click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 14
def process(tb, options = [])
  error_count = 0 
  sentence_count = 0
  tb.sources.each do |source|
    source.divs.each do |div|
      div.sentences.each do |sentence|
        sentence_count += 1
        n = Sentence.new sentence
        begin
          # Do the conversion first to avoid spurious headers if the conversion fails
          a = n.convert.to_conll
          puts "# source = #{source.title}, #{div.title}"
          # using printable_form would give us punctuation, which must then be added to the tree
          puts "# text = #{sentence.tokens.map(&:form).compact.join(' ')}"
          puts "# sent_id = #{sentence.id}"
          puts a
          puts
        rescue => e
          error_count += 1
          STDERR.puts "Cannot convert #{sentence.id} (#{sentence.citation}): #{e}"
          STDERR.puts e.backtrace.join("\n") unless e.is_a? RuntimeError
        end
      end
    end
  end
  STDERR.puts "#{error_count} sentences out of #{sentence_count} could not be converted"
end