class Opener::Tokenizer::CLI

CLI wrapper around {Opener::Tokenizer} using Slop.

@!attribute [r] parser

@return [Slop]

Attributes

parser[R]

Public Class Methods

new() click to toggle source
# File lib/opener/tokenizer/cli.rb, line 12
def initialize
  @parser = configure_slop
end

Public Instance Methods

configure_slop() click to toggle source

@return [Slop]

# File lib/opener/tokenizer/cli.rb, line 26
      def configure_slop
        return Slop.new(:strict => false, :indent => 2, :help => true) do
          banner 'Usage: tokenizer [OPTIONS]'

          separator <<-EOF.chomp

About:

    Tokenizer for KAF/plain text documents with support for various languages
    such as Dutch and English. This command reads input from STDIN.

Examples:

    cat example.txt | tokenizer -l en # Manually specify the language
    cat example.kaf | tokenizer       # Uses the xml:lang attribute

Languages:

    * Dutch (nl)
    * English (en)
    * French (fr)
    * German (de)
    * Italian (it)
    * Spanish (es)

KAF Input:

    If you give a KAF file as an input (-k or --kaf) the language is taken from
    the xml:lang attribute inside the file. Else it expects that you give the
    language as an argument (-l or --language)

Example KAF:

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <KAF version="v1.opener" xml:lang="en">
      <raw>This is some text.</raw>
    </KAF>
          EOF

          separator "\nOptions:\n"

          on :v, :version, 'Shows the current version' do
            abort "tokenizer v#{VERSION} on #{RUBY_DESCRIPTION}"
          end

          on :l=, :language=, 'A specific language to use',
            :as      => String,
            :default => DEFAULT_LANGUAGE

          on :k, :kaf, 'Treats the input as a KAF document'
          on :p, :plain, 'Treats the input as plain text'

          run do |opts, args|
            tokenizer = Tokenizer.new(
              :args     => args,
              :kaf      => opts[:plain] ? false : true,
              :language => opts[:language]
            )

            input = STDIN.tty? ? nil : STDIN.read

            puts tokenizer.run(input)
          end
        end
      end
run(argv = ARGV) click to toggle source

@param [Array] argv

# File lib/opener/tokenizer/cli.rb, line 19
def run(argv = ARGV)
  parser.parse(argv)
end