class PROIEL::Commands::Grep
Public Class Methods
init_with_program(prog)
click to toggle source
# File lib/proiel/cli/commands/grep.rb, line 5 def init_with_program(prog) prog.command(:grep) do |c| c.syntax 'grep [options] pattern filename(s)' c.description 'Search the text' c.option 'level', '--level LEVEL', 'Select level to match. LEVEL should be "token" or "sentence" (default)' c.option 'nostrip', '--nostrip', 'Do not strip whitespace from start and beginning of strings' c.option 'nosubst', '--nosubst', 'Do not substitute whitespace sequences with a single space' c.option 'nocolour', '--nocolour', 'Do not colour code matches' c.option 'ignore-case', '-i', '--ignore-case', 'Ignore uppercase/lowercase' c.action { |args, options| process(args, options) } end end
match(s, citation, object_id, pattern, options)
click to toggle source
# File lib/proiel/cli/commands/grep.rb, line 20 def match(s, citation, object_id, pattern, options) s.strip! unless options['nostrip'] s.gsub!(/\s+/, ' ') unless options['nosubst'] if s[pattern] s.gsub!(pattern) { |m| m.yellow } unless options['nocolour'] puts "#{citation} (ID = #{object_id}) #{s}" end end
merge_citation_parts(citation_upper, citation_lower_start, citation_lower_end = nil)
click to toggle source
# File lib/proiel/cli/commands/grep.rb, line 31 def merge_citation_parts(citation_upper, citation_lower_start, citation_lower_end = nil) citation_lower = if citation_lower_start == citation_lower_end citation_lower_start else [citation_lower_start, citation_lower_end].compact.join('-') end [citation_upper, citation_lower].compact.join(' ') end
process(args, options)
click to toggle source
# File lib/proiel/cli/commands/grep.rb, line 90 def process(args, options) if args.empty? STDERR.puts 'Missing pattern. Use --help for more information.' exit 1 end pattern_string = args.shift pattern = if options['ignore-case'] Regexp.new(pattern_string, Regexp::IGNORECASE) else Regexp.new(pattern_string) end if args.empty? STDERR.puts 'Missing filename(s). Use --help for more information.' exit 1 end options['level'] ||= 'sentence' unless %w(token sentence).include?(options['level']) STDERR.puts 'Invalid matching level. Use --help for more information.' exit 1 end tb = PROIEL::Treebank.new args.each do |filename| STDERR.puts "Reading #{filename}...".green if options['verbose'] tb.load_from_xml(filename) end tb.sources.each do |source| citation_upper = source.citation_part source.divs.each do |div| process_div(citation_upper, div, pattern, options) end end end
process_div(citation_upper, div, pattern, options)
click to toggle source
# File lib/proiel/cli/commands/grep.rb, line 81 def process_div(citation_upper, div, pattern, options) case options['level'] when 'sentence' process_div_for_sentences(citation_upper, div, pattern, options) when 'token' process_div_for_tokens(citation_upper, div, pattern, options) end end
process_div_for_sentences(citation_upper, div, pattern, options)
click to toggle source
# File lib/proiel/cli/commands/grep.rb, line 42 def process_div_for_sentences(citation_upper, div, pattern, options) div.sentences.each do |sentence| s = sentence.presentation_before || '' citation_lower_start = nil citation_lower_end = nil sentence.tokens.each do |token| unless token.is_empty? s += token.presentation_before || '' s += token.form || '' s += token.presentation_after || '' citation_lower_start = token.citation_part if citation_lower_start.nil? citation_lower_end = token.citation_part end end s += sentence.presentation_after || '' citation = merge_citation_parts(citation_upper, citation_lower_start, citation_lower_end) match(s, citation, sentence.id, pattern, options) end end
process_div_for_tokens(citation_upper, div, pattern, options)
click to toggle source
# File lib/proiel/cli/commands/grep.rb, line 66 def process_div_for_tokens(citation_upper, div, pattern, options) div.sentences.each do |sentence| sentence.tokens.each do |token| unless token.is_empty? s = token.presentation_before || '' s += token.form || '' s += token.presentation_after || '' citation = merge_citation_parts(citation_upper, token.citation_part) match(s, citation, token.id, pattern, options) end end end end