class Object
Public Instance Methods
detect_type(x)
click to toggle source
# File lib/textminer/mine_utils.rb, line 4 def detect_type(x) ctype = x.headers['content-type'] case ctype when 'text/xml' 'xml' when 'text/plain' 'plain' when 'application/pdf' 'pdf' end end
is_elsevier_wiley(x)
click to toggle source
# File lib/textminer/mine_utils.rb, line 62 def is_elsevier_wiley(x) tmp = x.match 'elsevier|wiley' !tmp.nil? end
make_ext(x)
click to toggle source
# File lib/textminer/mine_utils.rb, line 16 def make_ext(x) case x when 'xml' 'xml' when 'plain' 'txt' when 'pdf' 'pdf' end end
make_path(type)
click to toggle source
# File lib/textminer/mine_utils.rb, line 27 def make_path(type) # id = x.split('article/')[1].split('?')[0] # path = id + '.' + type # return path type = make_ext(type) uuid = UUIDTools::UUID.random_create.to_s path = uuid + '.' + type return path end
parse_links(x, just_urls)
click to toggle source
# File lib/textminer/link_methods_hash.rb, line 50 def parse_links(x, just_urls) if x.nil? return nil else if x.empty? return x else if just_urls if x[0].class != Array # return x[0]['URL'] return x.collect { |x| x['URL'] }.flatten else return x.collect { |x| x.collect { |z| z['URL'] }}.flatten # return x.collect { |x| x['URL'] }.flatten.compact # return x.collect { |x| x.collect { |z| z['URL'] }}.flatten end else return x end end end end
parse_pdf(x)
click to toggle source
# File lib/textminer/mine_utils.rb, line 58 def parse_pdf(x) return Textminer.extract(x) end
parse_plain(x)
click to toggle source
# File lib/textminer/mine_utils.rb, line 53 def parse_plain(x) text = read_disk(x) return text end
parse_xml(x)
click to toggle source
# File lib/textminer/mine_utils.rb, line 47 def parse_xml(x) text = read_disk(x) xml = Nokogiri.parse(text) return xml end
pull_links(x, y)
click to toggle source
# File lib/textminer/link_methods_hash.rb, line 37 def pull_links(x, y) if x['message']['items'].nil? tmp = self['message']['link'] if tmp.nil? return nil else return tmp.select { |z| z['content-type'].match(/#{y}/) }.reject { |c| c.empty? } end else return x['message']['items'].collect { |x| x['link'].select { |z| z['content-type'].match(/#{y}/) } }.reject { |c| c.empty? } end end
read_disk(path)
click to toggle source
# File lib/textminer/mine_utils.rb, line 43 def read_disk(path) return File.read(path) end
singlearray2hash(x)
click to toggle source
# File lib/textminer/tmutils.rb, line 1 def singlearray2hash(x) if x.length == 1 && x.class == Array return x[0] else return x end end
write_disk(res, path)
click to toggle source
# File lib/textminer/mine_utils.rb, line 37 def write_disk(res, path) f = File.new(path, "wb") f.write(res.body) f.close() end