class KeywordFilter

Public Class Methods

new(s) click to toggle source
# File lib/keyword-filter.rb, line 10
def initialize(s)
  @keywords = XPath.match(Document.new(open(s)).root, 'records/keyword/phrase/text()')
end

Public Instance Methods

filter(raw_s) click to toggle source
# File lib/keyword-filter.rb, line 14
def filter(raw_s)
  regexp = @keywords.join('|')
  s = raw_s.gsub(/\b(#{regexp})\b/i,'<f>\0</f>')
  doc = Document.new("<root>#{s}</root>")
  filtered_words = XPath.match(doc.root, '*')

  filtered_words.empty? ? raw_s : doc.root
end

Private Instance Methods

open(s) click to toggle source
# File lib/keyword-filter.rb, line 25
def open(s)
  if s[/^https?:\/\//] then  # open a file from a url
    buffer = Kernel.open(s, 'UserAgent' => 'KeywordsFilter-Reader').read
  elsif s[/\</] # input xml string
    buffer = s
  else # local file
    buffer = File.open(s,'r').read
  end
end