class ChupaText::Command::ChupaText

Constants

AVAILABLE_FORMATS
SIZE

Public Class Methods

new() click to toggle source
# File lib/chupa-text/command/chupa-text.rb, line 42
def initialize
  @input = nil
  @configuration = Configuration.load_default
  @enable_gems = true
  @uri = nil
  @mime_type = nil
  @format = :json
  @mime_formatter_options = {}
  @need_screenshot = true
  @expected_screenshot_size = [200, 200]
  @max_body_size = nil
end
run(*arguments) click to toggle source
# File lib/chupa-text/command/chupa-text.rb, line 23
def run(*arguments)
  chupa_text = new
  chupa_text.run(*arguments)
end

Public Instance Methods

run(*arguments) click to toggle source
# File lib/chupa-text/command/chupa-text.rb, line 55
def run(*arguments)
  return false unless parse_arguments(arguments)

  load_decomposers
  extractor = create_extractor
  data = create_data
  formatter = create_formatter
  formatter.format_start(data)
  extractor.extract(data) do |extracted|
    formatter.format_extracted(extracted)
  end
  formatter.format_finish(data)
  true
end

Private Instance Methods

available_log_levels() click to toggle source
# File lib/chupa-text/command/chupa-text.rb, line 173
def available_log_levels
  [
    "debug",
    "info",
    "warn",
    "error",
    "fatal",
    "unknown",
  ]
end
create_data() click to toggle source
# File lib/chupa-text/command/chupa-text.rb, line 204
def create_data
  if @input.nil?
    data = VirtualFileData.new(@uri, $stdin)
  else
    case @input
    when /\A[a-z]+:\/\//i
      input = URI.parse(@input)
    else
      input = Pathname(@input)
    end
    if @uri
      input.open("rb") do |io|
        data = VirtualFileData.new(@uri, io)
      end
    else
      data = InputData.new(input)
    end
  end
  data.mime_type = @mime_type if @mime_type
  data.need_screenshot = @need_screenshot
  data.expected_screenshot_size = @expected_screenshot_size
  data.max_body_size = @max_body_size
  data
end
create_extractor() click to toggle source
# File lib/chupa-text/command/chupa-text.rb, line 198
def create_extractor
  extractor = Extractor.new
  extractor.apply_configuration(@configuration)
  extractor
end
create_formatter() click to toggle source
# File lib/chupa-text/command/chupa-text.rb, line 229
def create_formatter
  case @format
  when :json
    Formatters::JSON.new($stdout)
  when :text
    Formatters::Text.new($stdout)
  when :mime
    Formatters::MIME.new($stdout, @mime_formatter_options)
  end
end
create_option_parser() click to toggle source
# File lib/chupa-text/command/chupa-text.rb, line 93
def create_option_parser
  parser = OptionParser.new
  parser.banner += " [FILE_OR_URI]"
  parser.version = VERSION

  parser.separator("")
  parser.separator("Generic options")
  parser.on("--configuration=FILE",
            "Reads configuration from FILE.") do |path|
    load_configuration(path)
  end
  parser.on("--disable-gems",
            "Disables decomposers installed by RubyGems.") do
    @enable_gems = false
  end
  parser.on("-I=PATH",
            "Appends PATH to decomposer load path.") do |path|
    $LOAD_PATH << path
  end

  parser.separator("")
  parser.separator("Input related options")
  parser.on("--uri=URI",
            "Input data URI.") do |uri|
    @uri = URI.parse(uri)
  end
  parser.on("--mime-type=MIME_TYPE",
            "Input data MIME type.") do |mime_type|
    @mime_type = mime_type
  end

  parser.separator("")
  parser.separator("Output related options")
  parser.on("--format=FORMAT", AVAILABLE_FORMATS,
            "Output FORMAT.",
            "[#{AVAILABLE_FORMATS.join(', ')}]",
            "(default: #{@format})") do |format|
    @format = format
  end
  parser.on("--mime-boundary=BOUNDARY",
            "Use BOUNDARY for MIME boundary.",
            "(default: Use SHA1 digest of URI)") do |boundary|
    @mime_formatter_options[:boundary] = boundary
  end
  parser.on("--[no-]need-screenshot",
            "Generate screenshot if available.",
            "(default: #{@need_screenshot})") do |boolean|
    @need_screenshot = boolean
  end
  parser.on("--expected-screenshot-size=WIDTHxHEIGHT", SIZE,
            "Expected screenshot size.",
            "(default: #{@expected_screenshot_size.join("x")})") do |size|
    @expected_screenshot_size = size
  end
  parser.on("--max-body-size=BYTE", Integer,
            "The max byte of extracted body.",
            "(default: no limit)") do |size|
    @max_body_size = size
  end

  parser.separator("")
  parser.separator("Log related options:")
  parser.on("--log-output=OUTPUT",
            "Sets log output.",
            "[-(stdout), +(stderr), PATH]",
            "(default: +(stderr))") do |output|
    ENV["CHUPA_TEXT_LOG_OUTPUT"] = output
    ::ChupaText.logger = nil
  end
  parser.on("--log-level=LEVEL", available_log_levels,
            "Sets log level.",
            "[#{available_log_levels.join(', ')}]",
            "(default: #{current_log_level_name})") do |level|
    ENV["CHUPA_TEXT_LOG_LEVEL"] = level
    ::ChupaText.logger = nil
  end

  parser
end
current_log_level_name() click to toggle source
# File lib/chupa-text/command/chupa-text.rb, line 184
def current_log_level_name
  level = ::ChupaText.logger.level
  Logger::Severity.constants.each do |name|
    next if Logger::Severity.const_get(name) != level
    return name.to_s.downcase
  end
  "info"
end
load_configuration(path) click to toggle source
# File lib/chupa-text/command/chupa-text.rb, line 71
def load_configuration(path)
  loader = ConfigurationLoader.new(@configuration)
  loader.load(path)
end
load_decomposers() click to toggle source
# File lib/chupa-text/command/chupa-text.rb, line 193
def load_decomposers
  Decomposers.enable_all_gems if @enable_gems
  Decomposers.load
end
parse_arguments(arguments) click to toggle source
# File lib/chupa-text/command/chupa-text.rb, line 76
def parse_arguments(arguments)
  parser = create_option_parser
  rest = nil
  begin
    rest = parser.parse!(arguments)
  rescue OptionParser::ParseError
    puts($!.message)
    return false
  end
  if rest.size > 1
    puts(parser.help)
    return false
  end
  @input, = rest
  true
end