class Boilerpipe::SAX::Preprocessor
Public Class Methods
strip(text)
click to toggle source
# File lib/boilerpipe/sax/preprocessor.rb, line 3 def self.strip(text) # script bug - delete script tags text = text.gsub(/\<script.+?<\/script>/im, '') # nokogiri uses libxml for mri and nekohtml for jruby # mri doesn't remove when missing the semicolon text.gsub(/( ) /, '\1; ') end