class Indexer101

Public Class Methods

new(filename='indexer.dat', debug: false) click to toggle source
# File lib/indexer101.rb, line 71
def initialize(filename='indexer.dat', debug: false)
  
  @filename, @debug = filename, debug
  
  puts
  puts 'Indexer101'.highlight +  " ready to index".green 
  puts

  @indexer = Index.new()
  
end

Public Instance Methods

build(a=@indexer.index.keys) click to toggle source
# File lib/indexer101.rb, line 83
def build(a=@indexer.index.keys)
  
  t = Time.now
  @indexer.build(a)    
  t2 = Time.now - t
  
  puts "%d words indexed".info % a.length
  puts ("index built in " + ("%.3f" % t2).brown + " seconds").info
  
  self
end
index() click to toggle source
# File lib/indexer101.rb, line 95
def index()
  @indexer.index
end
lookup(s, limit: 10) click to toggle source

enter a few starting characters and lookup will suggest a few keywords useful for an auto suggest feature

# File lib/indexer101.rb, line 208
def lookup(s, limit: 10)

  t = Time.now
  a = scan_path s
  puts ('a: ' + a.inspect[0..100] + '...').debug if @debug
  
  i = scan_key @indexer.h, a
  
  r = @indexer.h.dig(*a[0..i])
  puts ('r: ' + r.inspect[0..100] + '...').debug if @debug
  
  return r if r.is_a? Array
  
  results = scan_leaves(r).sort_by(&:length).take(limit)
  t2 = Time.now - t
  puts ("lookup took " + ("%.3f" % t2).brown + " seconds").info
  
  return results
  
end
read(filename=@filename) click to toggle source
# File lib/indexer101.rb, line 99
def read(filename=@filename)
  
  t = Time.now
  
  File.open(filename) do |f|  
    @indexer = Marshal.load(f)  
  end
  
  t2 = Time.now - t
  
  puts "index contains %d words".info % @indexer.index.length
  puts "index read in " + ("%.2f" % t2).brown + " seconds".info
  
end
save(filename=@filename) click to toggle source
# File lib/indexer101.rb, line 114
def save(filename=@filename)

  File.open(filename, 'w+') do |f|  
    Marshal.dump(@indexer, f)  
  end 
  
end
scan_dxindex(*locations, level: 0) click to toggle source

scan levels: 0 = tags only; 1 = all words in title (including tags)

# File lib/indexer101.rb, line 124
def scan_dxindex(*locations, level: 0)
  
  t = Time.now
  threads = locations.flatten.map do |location|
    
    Thread.new {

      if location.is_a?(Dynarex) or location.is_a?(DxLite) then
    
        Thread.current[:v] = location
    
      elsif location.is_a? String
    
        case File.extname(location)
        when '.xml'
          Thread.current[:v] = Dynarex.new location, debug: @debug
        when '.json'
          Thread.current[:v] = DxLite.new location, debug: @debug
        end
    
      end
    }      
  end
  
  ThreadsWait.all_waits(*threads)
  
  a = threads.map {|x| x[:v]}
  puts '_a: ' + a.inspect if @debug
  t2 = Time.now - t
  puts ("dxindex documents loaded in " + ("%.2f" % t2).brown \
        + " seconds").info
  

  id = 1
  
  a.each do |dx|

    id2 = id
    
    if @debug then
      puts 'dx: ' + dx.class.inspect
      puts 'dx.all: ' + dx.all.inspect
    end
    
    @indexer.uri_index.merge! Hash[dx.all.reverse.map.with_index \
      {|x,i| [id+i, [Time.parse(x.created), x.title, x.url]]}]
            
    dx.all.reverse.each do |x|
              
      case level
      when 0 
        
        x.title.scan(/(\#\w+)/).flatten(1).each do |keyword|
          @indexer.index[keyword.downcase.to_sym] ||= []
          @indexer.index[keyword.downcase.to_sym] << id2
        end
        
      when 1
        
        # \u{A3} = £ <- represented as Unicode to avoid ASCII to UTF-8 error
        x.title.split(/[\s:"!\?\(\)\u{A3}]+(?=[\w#_'-]+)/).each do |keyword|
          @indexer.index[keyword.downcase.to_sym] ||= []
          @indexer.index[keyword.downcase.to_sym] << id2
        end

      end
              
      id2 += 1
      
    end    
    
    id = id2
    
  end
  
end
uri_index() click to toggle source
# File lib/indexer101.rb, line 201
def uri_index()
  @indexer.uri_index
end

Private Instance Methods

scan_key(h, keys, index=0) click to toggle source
# File lib/indexer101.rb, line 272
def scan_key(h, keys, index=0)

  r = h.fetch keys[index]

  puts ('r: ' + r.inspect[0..100] + '...').debug if @debug
  
  if r.is_a?(Hash) and index+1 < keys.length and r.fetch keys[index+1] then
    scan_key r, keys, index+1
  else
    index
  end 

end
scan_leaves(h) click to toggle source
# File lib/indexer101.rb, line 286
def scan_leaves(h)

  h.inject([]) do |r,x|
    key, value = x

    if value.is_a? Array then
      r += value 
    else
      r += scan_leaves value
    end

    r
  end
end
scan_path(s, length=0) click to toggle source
# File lib/indexer101.rb, line 301
def scan_path(s, length=0)
  
  puts 'inside scan_path'.info if @debug
  
  r = [s[0..length].to_sym]
  
  if length < s.length - 1 then
    r += scan_path(s, length+1)
  else
    r
  end
end