class PROIEL::Converter::CoNLLU::Token

Constants

MORPHOLOGY_POSITIONAL_TAG_SEQUENCE

Attributes

citation_part[R]
empty_token_sort[R]
form[R]
head_id[RW]
id[R]
language[R]
lemma[R]
part_of_speech[R]
relation[R]
upos[RW]

Public Class Methods

new(id, head_id, form, lemma, part_of_speech, language, morphology, relation, empty_token_sort, slashes, citation_part, sentence) click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 218
def initialize(id, head_id, form, lemma, part_of_speech, language, morphology, relation, empty_token_sort, slashes, citation_part, sentence)
  @id = id
  @head_id = head_id
  @form = form
  @lemma = lemma
  @part_of_speech = part_of_speech
  @language = language
  @morphology = morphology
  @relation = relation
  @empty_token_sort = empty_token_sort
  @slashes = slashes
  @sentence = sentence
  @features = (morphology ? map_morphology(morphology) : '' )
  @citation_part = "ref=" + (citation_part ? citation_part : "").gsub(/\s/, '_')
  @upos = nil
end

Public Instance Methods

TAM_particle?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 349
def TAM_particle?
  @relation == 'aux' and TAM_PARTICLE_LEMMATA.include?([lemma, part_of_speech, language].join(','))
end
add_slash!(slash) click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 626
def add_slash!(slash)
  @slashes << slash
end
adjectival?() click to toggle source

returns true if the node is an adjective or an ordinal

# File lib/proiel/cli/converters/conll-u.rb, line 253
def adjectival?
  @part_of_speech == 'A-' or @part_of_speech == 'Mo'
end
adverb?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 261
def adverb?
  @part_of_speech =~ /\AD/
end
auxiliary?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 295
def auxiliary?
  AUXILIARIES.include?([lemma, part_of_speech, language].join(','))
end
cardinal?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 265
def cardinal?
  @part_of_speech == 'Ma'
end
change_coordinations!() click to toggle source

Changes coordinations recursively from the bottom of the graph

# File lib/proiel/cli/converters/conll-u.rb, line 602
def change_coordinations!
  dependents.each(&:change_coordinations!)
  process_coordination! if conjunction?
end
clausal?() click to toggle source

A node is clausal if it is a verb and not nominalized; or it has a copula dependent; or it has a subject (e.g. in an absolute constructino without a verb; or if it is the root (e.g. in a nominal clause)

# File lib/proiel/cli/converters/conll-u.rb, line 270
def clausal?
  (@part_of_speech == 'V-' and !nominalized?) or
    dependents.any?(&:copula?) or
    dependents.any? { |d| ['sub', 'nsubj', 'nsubjpass', 'csubj', 'csubjpass'].include? d.relation  } or
    root?
end
conj_head() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 394
def conj_head
  raise "Not a conjunct" unless @relation == 'conj'
  if head.relation == 'conj'
    head.conj_head
  else
    head
  end
end
conjunction?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 277
def conjunction?
  part_of_speech == 'C-' or @empty_token_sort == 'C'
end
coordinated?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 281
def coordinated?
  head and head.conjunction? and head.relation == @relation
end
copula?() click to toggle source

Returns true if the node has an xobj dependent and either 1) the lemma is copular or 2) the node is empty and has no pid slash or a pid slash to a node with a copular lemma

# File lib/proiel/cli/converters/conll-u.rb, line 288
def copula?
  @relation == 'cop' or 
  (COPULAR_LEMMATA.include?([lemma, part_of_speech, language].join(',')) or
   (@empty_token_sort == 'V' and (pid.nil? or pid.is_empty? or COPULAR_LEMMATA.include?([pid.lemma, pid.part_of_speech, pid.language].join(',')))) and
   dependents.any? { |d| d.relation == 'xobj' } )
end
count_subgraph() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 382
def count_subgraph
  dependents.map(&:count_subgraph).inject(0, :+) + (is_empty? ? 0 : 1)
end
dependents() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 452
def dependents
  @sentence.tokens.select { |t| t.head_id == @id }.sort_by(&:id)
end
deponent?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 323
def deponent?
  DEPONENTS[@language] and DEPONENTS[@language].match(@lemma)
end
determiner?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 299
def determiner?
  DETERMINERS.include? @part_of_speech
end
distribute_shared_modifiers!() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 614
def distribute_shared_modifiers!
  raise "Can only distribute over a conjunction!" unless conjunction?
  conjuncts, modifiers  = dependents.reject { |d| d.relation == 'aux' }.partition { |d|  d.relation == @relation or (d.relation == 'adv' and @relation == 'xadv') }
  first_conjunct = conjuncts.shift
  raise "No first conjunct under #{to_n}\n#{to_graph}" unless first_conjunct
  raise "The first conjunct is a misannotated conjunction in #{to_n}\n#{to_graph}" if first_conjunct.conjunction? and first_conjunct.dependents.empty?
  modifiers.each do |m|
    m.head_id = first_conjunct.id
    conjuncts.each { |c| c.add_slash! [m.id, m.relation] }
  end
end
ellipsis?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 303
def ellipsis?
  @empty_token_sort == 'V'
end
find_appositive_head() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 456
def find_appositive_head
  raise "Not an apposition" unless @relation == 'apos'
  if head.conjunction? and head.relation == 'apos'
    head.find_appositive_head
  else
    head
  end
end
find_highest_daughter() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 571
def find_highest_daughter
  dependents.min_by { |d| OBLIQUENESS_HIERARCHY.find_index(d.map_relation[/[^:]*/]) || 1000 }
end
find_postag(possible_postags) click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 465
def find_postag possible_postags
  tag, crit, feats = possible_postags.shift
  if tag.nil?
    # raise "Found no postag"
  elsif crit.call self
    @upos = tag
    @features += ((@features.empty? ? '' : '|') + feats) if feats
  else
    find_postag possible_postags
  end
end
find_relation(possible_relations) click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 477
def find_relation possible_relations
  rel, crit = possible_relations.shift
  if rel.nil?
  # raise "Found no relation"
  elsif crit.call self
    rel
  else
    find_relation possible_relations
  end
end
find_remnant() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 563
def find_remnant
  if r = dependents.select { |d| d.relation == 'remnant' }.first
    r.find_remnant
  else
    self
  end
end
foreign?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 307
def foreign?
  @part_of_speech == 'F-'
end
format_features(features) click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 411
def format_features(features)
  if features == ''
    '_'
  else
    features.split("|").sort.join("|")
  end
end
genitive?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 248
def genitive?
  @morphology =~ /......g.*/
end
has_content?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 311
def has_content?
  @empty_token_sort.nil? or @empty_token_sort == ''
end
has_preposition?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 581
def has_preposition?
  dependents.any? { |d| d.preposition? and d.relation == "case" }
end
head() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 448
def head
  @sentence.tokens.select { |t| t.id == @head_id }.first
end
interjection?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 315
def interjection?
  @part_of_speech == 'I-'
end
invert!(new_dependent_relation = nil, new_head_relation = nil) click to toggle source

Inverts the direction of a dependency relation. By default the labels are also swapped, but new relations can be specified for both the new dependent and the new head.

# File lib/proiel/cli/converters/conll-u.rb, line 633
def invert!(new_dependent_relation = nil, new_head_relation = nil)
  raise "Cannot promote a token under root!" if @head_id == 0
  new_dependent_relation ||= @relation
  new_head_relation ||= head.relation
  new_head_id = head.head_id

  head.head_id = @id
  head.relation = new_dependent_relation
  @head_id = new_head_id
  self.relation = new_head_relation
end
is_empty?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 319
def is_empty?
  !has_content?
end
left_corner() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 390
def left_corner
  ([self] + dependents).sort_by(&:id).first
end
map_morphology(morph) click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 240
def map_morphology morph
res = []
for tag in 0..morph.length - 1
  res << MORPHOLOGY_MAP[MORPHOLOGY_POSITIONAL_TAG_SEQUENCE[tag]][morph[tag]]
end
res.compact.join('|')
end
map_part_of_speech!() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 488
def map_part_of_speech!
  dependents.each(&:map_part_of_speech!)
  possible_postags = POS_MAP[@part_of_speech]
  find_postag possible_postags.dup
  # ugly, but the ugliness comes from UDEP
  @upos = 'ADJ' if @upos == 'DET' and @relation != 'det'
end
map_relation() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 503
def map_relation
  possible_relations = RELATION_MAPPING[@relation]
  case possible_relations
  when String
    possible_relations
  when Array
    x = find_relation possible_relations.dup
  when nil
    # do nothing: the token has already changed its relation
    @relation
  else
    raise "Unknown value #{possible_relations.inspect} for #{@relation}"
  end
end
mediopassive?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 327
def mediopassive?
  (!deponent? and @morphology) ? @morphology[4] =~/[mpe]/ : false
end
negation?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 335
def negation?
  NEGATION_LEMMATA.include?([lemma, part_of_speech, language].join(','))
end
nominal?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 339
def nominal?
  @part_of_speech =~ /\A[NPM]/ or nominalized?
end
nominalized?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 343
def nominalized?
  dependents.any? do |d|
    d.determiner? and ['atr', 'aux', 'det'].include? d.relation
  end
end
particle?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 353
def particle?
  @relation == 'aux' and PARTICLE_LEMMATA.include?([lemma, part_of_speech, language].join(','))
end
passive?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 331
def passive?
  (!deponent? and @morphology) ? @morphology[4] == 'p' : false
end
pid() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 403
def pid
  if pid = @slashes.select { |t, r| r == 'pid' }.first
    @sentence.tokens.select { |t| pid.first == t.id}.first
  else
    nil
  end
end
preposition?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 361
def preposition?
  @part_of_speech == 'R-'
end
process_coordination!() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 607
def process_coordination!
  raise "Only coordinations can be processed this way!" unless conjunction?
  return if dependents.reject { |d| d.relation == 'aux' }.empty?
  distribute_shared_modifiers!
  dependents.reject { |d| d.relation == 'aux' }.sort_by { |d| d.left_corner.id }.first.promote!("conj", "cc")
end
process_copula!() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 575
def process_copula!
  predicates = dependents.select { |d| d.relation == 'xobj' }
  raise "#{predicates.size} predicates under #{to_n}\n#{to_graph}" if predicates.size != 1
  predicates.first.promote!(nil, 'cop')
end
process_ellipsis!() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 538
        def process_ellipsis!
          aux = dependents.select(&:auxiliary?).first
          if aux
            aux.promote! 
            return
          end

          new_head = find_highest_daughter
          new_head.promote!('orphan')
          
#          dependents.each do |d|
            # check if there's a partner with the same relation under the overt node.
            # TODO: this isn't really very convincing when it comes to ADVs
#            if partner = overt.dependents.select { |p| p != self and p.relation == d.relation }.first #inserted p != self
#              partner = partner.find_remnant
#              d.head_id = partner.id
#              d.relation = 'remnant'
            # if there's no partner, just attach under the overt node, preserving the relation
#            else
#              d.head_id = overt.id
#            end
#          end
          @sentence.remove_token!(self)
        end
process_preposition!() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 585
def process_preposition!
  raise "Only prepositions can be processed this way!" unless part_of_speech == 'R-'
  obliques = dependents.select { |d| d.relation == 'obl' }
  raise "#{obliques.size} oblique dependents under #{to_n}\n#{to_graph}" if obliques.size > 1
  return if obliques.empty? #shouldn't really happen, but in practice
  obliques.first.invert!("case") # , "adv")
end
process_subjunction!() click to toggle source

attach subjunctions with 'mark' under their verbs and promote the verb to take over the subjunction's relation. If the verb is empty, the subjunction stays as head.

# File lib/proiel/cli/converters/conll-u.rb, line 521
def process_subjunction!
  # ignore if the subjunction has no dependents or only conj dependents.
  # NB: this requires that the function is called *after* processing conjunctions
  return if dependents.reject { |d| ['conj', 'cc'].include? d.relation }.empty?
  pred = dependents.select { |d| d.relation == 'pred' }
  raise "#{pred.size} PREDs under the subjunction #{to_n}:\n#{@sentence.to_graph}" unless pred.one?
  pred = pred.first
  # promote the subjunction if the verb is empty
  if pred.is_empty?
  pred.dependents.each { |d| d.head_id = id }
  @sentence.remove_token! pred
  # else demote the subjunction
  else
    pred.invert!('mark')
  end
end
promote!(new_sibling_relation = nil, new_dependent_relation = 'aux') click to toggle source

promotes a node to its head's place. The node takes over its former head's relation and all dependents. The new relation for these dependents can be specified; if it is not, they will keep their former relation. The former head is made a dependent of the node (with a specified relation) or, if it is an empty node, destroyed.

# File lib/proiel/cli/converters/conll-u.rb, line 652
def promote!(new_sibling_relation = nil, new_dependent_relation = 'aux')
  raise "Cannot promote a token under root!" if @head_id == 0
  new_head_relation = head.relation
  new_head_id = head.head_id

  # move all dependents of the former head to the new one
  siblings.each do |t|
    t.head_id = @id
    # ugly hack to avoid overwriting the aux relation here (aux siblings aren't really siblings)
    t.relation = new_sibling_relation if (new_sibling_relation and t.relation != 'aux')
  end

  # remove the former head if it was empty
  if head.is_empty?
    @sentence.remove_token!(head)
  # else make it a dependent of the new head
  else
    head.head_id = @id
    head.relation = new_dependent_relation
  end

  @head_id = new_head_id
  # don't use relation=, as we don't want this relation to be
  # copied down a tree of conjunctions
  @relation = new_head_relation
end
pronominal?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 357
def pronominal?
  @part_of_speech =~ /\AP[^st]/ # no evidence that possessives are pronoun/determiner-like
end
proper_noun?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 365
def proper_noun?
  @part_of_speech == 'Ne'
end
relabel_graph!() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 496
def relabel_graph!
  dependents.each(&:relabel_graph!)
  # TODO: if there are iobjs without an obj among the dependents, one of them should be promoted to obj
  @relation = map_relation
  raise "No relation for #{form}" unless @relation
end
relation=(rel) click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 373
def relation=(rel)
  if conjunction?
    dependents.select { |d| d.relation == @relation }.each do |c|
      c.relation = rel
    end
  end
  @relation = rel
end
remove_empties!() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 593
def remove_empties!
  dependents.each(&:remove_empties!)
  if is_empty?
    dependents.each { |d| d.head_id = head_id; d.relation = 'remnant' }
    @sentence.remove_token! self
  end
end
root?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 369
def root?
  @head_id == 0
end
siblings() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 444
def siblings
  @sentence.tokens.select { |t| t.head_id == @head_id } - [self]
end
subgraph_set() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 386
def subgraph_set
  [self] + dependents.map(&:subgraph_set).flatten
end
subjunction?() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 257
def subjunction?
  @part_of_speech == 'G-'
end
to_conll() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 419
def to_conll
  [@id, 
   @form, 
   @lemma, 
   @upos, 
   @part_of_speech, 
   format_features(@features), 
   @head_id, 
   (@head_id == 0 ? 'root' : @relation), # override non-root relations on root until we've found out how to handle unembedded reports etc
   '_', # slashes here
   @citation_part].join("\t")
end
to_graph(indents = 0) click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 440
def to_graph(indents = 0)
  ([("\t" * indents) + (to_n)] + dependents.map { |d| d.to_graph(indents + 1) }).join("\n")
end
to_n() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 436
def to_n
  [@relation, @id, (@form || @empty_token_sort), (@upos || @part_of_speech) ].join('-')
end
to_s() click to toggle source
# File lib/proiel/cli/converters/conll-u.rb, line 432
def to_s
  [@id, @form, @head_id, @relation].join("\t")
end