class PROIEL::Converter::Tiger2

Constants

SCHEMA_FILE

Public Class Methods

declare_annotation(builder, features, annotation_schema) click to toggle source
# File lib/proiel/cli/converters/tiger2.rb, line 52
def declare_annotation(builder, features, annotation_schema)
  builder.annotation do
    features.each do |name, domain|
      # FIXME: we may want to list possible values for some of these
      builder.feature(name: name, domain: domain)
    end

    builder.edgelabel do
      builder.value(name: '--')

      annotation_schema.primary_relations.each do |tag, features|
        builder.value({ name: tag }, features.summary)
      end
    end

    builder.secedgelabel do
      annotation_schema.secondary_relations.each do |tag, features|
        builder.value({name: tag }, features.summary)
      end
    end
  end
end
declare_edgelabels(builder) click to toggle source
# File lib/proiel/cli/converters/tiger2.rb, line 75
def declare_edgelabels(builder)
  builder.feature(name: "label", type: "prim", domain: "edge") do
    declare_primary_edges(builder)
  end

  builder.feature(name: "label", type: "sec", domain: "edge") do
    declare_secedges(builder)
  end

  builder.feature(name: "label", type: "coref", domain: "edge") do
    builder.value(name: "antecedent")
    builder.value(name: "inference")
  end
end
process(tb, options) click to toggle source
# File lib/proiel/cli/converters/tiger2.rb, line 7
def process(tb, options)
  selected_features = [] # TODO
  @features = selected_features.map { |f| [f, 'FREC'] }

  builder = Builder::XmlMarkup.new(target: STDOUT, indent: 2)
  builder.instruct! :xml, version: "1.0", encoding: "UTF-8"

  tb.sources.each do |source|
    @hack = tb.annotation_schema
    write_source(builder, source, tb) do
      source.divs.each do |div|
        div.sentences.each do |sentence|
          write_sentence(builder, sentence)
        end
      end
    end
  end
end
token_attrs(s, t, type) click to toggle source
# File lib/proiel/cli/converters/tiger2.rb, line 107
def token_attrs(s, t, type)
  attrs = {}

  @features.each do |name, domain|
    if domain == 'FREC' or domain == type
      case name
      when :word, :cat
        attrs[name] = t.pro? ? "PRO-#{t.relation.upcase}" : t.form
      when *@semantic_features
        attrs[name] = t.sem_tags_to_hash[attr]
      when :lemma
        attrs[name] = t.lemma
      when :pos
        if t.empty_token_sort
          attrs[name] = t.empty_token_sort + "-"
        else
          attrs[name] = t.pos
        end
      when *MORPHOLOGICAL_FEATURES
        attrs[name] = name.to_s.split("_").map { |a| t.morphology_hash[a.to_sym] || '-' }.join
      else
        if t.respond_to?(name)
          attrs[name] = t.send(name)
        else
          raise "Do not know how to get required attribute #{name}"
        end
      end
      attrs[name] ||= "--"
    end
  end

  attrs
end
write_edges(t, builder) click to toggle source
# File lib/proiel/cli/converters/tiger2.rb, line 175
def write_edges(t, builder)
  # Add an edge between this node and the correspoding terminal node unless
  # this is not a morphtaggable node.
  builder.edge('tiger2:type' => "prim", 'tiger2:target' => "w#{t.id}", :label => '--') if t.is_morphtaggable? or t.empty_token_sort == 'P'

  # Add primary dependency edges including empty pro tokens if we are exporting info structure as well
  t.dependents.each { |d| builder.edge('tiger2:type' => "prim", 'tiger2:target' => "p#{d.id}", :label => d.relation.tag) }

  # Add secondary dependency edges
  get_slashes(t).each do |se|
    builder.edge('tiger2:type' => "sec", 'tiger2:target' => "p#{se.slashee_id}", :label => se.relation.tag)
  end

  builder.edge('tiger2:type' => "coref", 'tiger2:target' => t.antecedent_id, :label => (t.information_status_tag == 'acc_inf' ? "inference" : "antecedent") )
end
write_nonterminals(builder, s) click to toggle source
# File lib/proiel/cli/converters/tiger2.rb, line 141
def write_nonterminals(builder, s)
  builder.nonterminals do
    # Add an empty root node
    h = @features.select { |_, domain| ['FREC', 'NT'].include?(domain) }.map { |name, _| [name, '--'] }.to_h
    h['xml:id'] = "s#{s.id}_root"

    builder.nt(h) do
      s.tokens.reject { |t| t.head or t.pro? }.each do |t|
        builder.edge(idref: "p#{t.id}", label: t.relation)
      end
    end

    # Add other NTs
    s.tokens.each do |t|
      builder.nt(token_attrs(s, t, 'NT').merge('xml:id' => "p#{t.id}")) do
        # Add an edge to the correspoding terminal node
        builder.edge(idref: "w#{t.id}", label: '--')

        # Add primary dependency edges
        t.children.each { |d| builder.edge(idref: "p#{d.id}", label: d.relation) }

        # Add secondary dependency edges
        t.slashes.each do |relation, target_id|
          builder.secedge(idref: "p#{target_id}", label: relation)
        end
      end
    end
  end
end
write_root_edge(t, builder) click to toggle source
# File lib/proiel/cli/converters/tiger2.rb, line 171
def write_root_edge(t, builder)
  builder.edge('tiger2:type' => "prim", 'tiger2:target' => "p#{t.id}", :label => t.relation.tag)
end
write_sentence(builder, s) click to toggle source
# File lib/proiel/cli/converters/tiger2.rb, line 90
def write_sentence(builder, s)
  builder.s('xml:id' => "s#{s.id}") do
    builder.graph(root: "s#{s.id}_root") do
      write_terminals(builder, s)
      write_nonterminals(builder, s)
    end
  end
end
write_source(builder, s, tb) { |builder| ... } click to toggle source
# File lib/proiel/cli/converters/tiger2.rb, line 26
def write_source(builder, s, tb)
  builder.corpus('xml:id' => s.id,
                'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
                'xsi:schemaLocation' => 'http://korpling.german.hu-berlin.de/tiger2/V2.0.5/ http://korpling.german.hu-berlin.de/tiger2/V2.0.5/Tiger2.xsd',
                'xmlns:tiger2' => 'http://korpling.german.hu-berlin.de/tiger2/V2.0.5/',
                'xmlns' => 'http://korpling.german.hu-berlin.de/tiger2/V2.0.5/') do
    builder.head do
      builder.meta do
        builder.name(s.title)
        builder.author('The PROIEL project')
        builder.date(s.export_time.strftime("%F %T %z"))
        builder.description
        builder.format
        builder.history
      end

      declare_annotation(builder, @features,
        tb.annotation_schema)
    end

    builder.body do
      yield builder
    end
  end
end
write_terminals(builder, s) click to toggle source
# File lib/proiel/cli/converters/tiger2.rb, line 99
def write_terminals(builder, s)
  builder.terminals do
    s.tokens.each do |t|
      builder.t(token_attrs(s, t, 'T').merge({ 'xml:id' => "w#{t.id}"}))
    end
  end
end