class Jekyll::LunrJsSearch::Indexer
Public Class Methods
new(config = {})
click to toggle source
Calls superclass method
# File lib/jekyll_lunr_js_search/indexer.rb, line 10 def initialize(config = {}) super(config) @lunr_config = { 'excludes' => [], 'strip_index_html' => false, 'min_length' => 3, 'stopwords' => 'stopwords.txt', 'fields' => { 'title' => 10, 'categories' => 20, 'tags' => 20, 'body' => 1 }, 'js_dir' => 'js' }.merge!(config['lunr_search'] || {}) @js_dir = @lunr_config['js_dir'] gem_lunr = File.join(File.dirname(__FILE__), "../../build/lunr.min.js") @lunr_path = File.exist?(gem_lunr) ? gem_lunr : File.join(@js_dir, File.basename(gem_lunr)) raise "Could not find #{@lunr_path}" if !File.exist?(@lunr_path) lunr_src = open(@lunr_path).read ctx = ExecJS.compile(lunr_src) @lunr_version = ctx.eval('lunr.version') @docs = {} @excludes = @lunr_config['excludes'] # if web host supports index.html as default doc, then optionally exclude it from the url @strip_index_html = @lunr_config['strip_index_html'] # stop word exclusion configuration @min_length = @lunr_config['min_length'] @stopwords_file = @lunr_config['stopwords'] end
Public Instance Methods
generate(site)
click to toggle source
Index all pages except pages matching any value in config or with date The main content from each page is extracted and saved to disk as json
# File lib/jekyll_lunr_js_search/indexer.rb, line 48 def generate(site) Jekyll.logger.info "Lunr:", 'Creating search index...' @site = site # gather pages and posts items = pages_to_index(site) content_renderer = PageRenderer.new(site) index = [] index_js = open(@lunr_path).read index_js << "\nvar idx = lunr(function() {\n" index_js << "\tthis.ref('id');\n" @lunr_config['fields'].each_pair do |name, boost| index_js << "\tthis.field('#{name}', {'boost': #{boost}});\n" end items.each_with_index do |item, i| entry = SearchEntry.create(item, content_renderer) entry.strip_index_suffix_from_url! if @strip_index_html entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file) doc = { "id" => i, "title" => getTitle(entry), "url" => entry.url, "date" => entry.date, "categories" => entry.categories, "tags" => entry.tags, "is_post" => entry.is_post, "body" => entry.body } index_js << "\tthis.add(" << ::JSON.generate(doc, quirks_mode: true) << ");\n" doc.delete("body") @docs[i] = doc Jekyll.logger.debug "Lunr:", (entry.title ? "#{entry.title} (#{entry.url})" : entry.url) end index_js << "});" FileUtils.mkdir_p(File.join(site.dest, @js_dir)) filename = File.join(@js_dir, 'index.json') ctx = ExecJS.compile(index_js) index = ctx.eval('JSON.stringify(idx)') total = { "docs" => @docs, "index" => ::JSON.parse(index) } filepath = File.join(site.dest, filename) File.open(filepath, "w") { |f| f.write(JSON.dump(total)) } Jekyll.logger.info "Lunr:", "Index ready (lunr.js v#{@lunr_version})" added_files = [filename] site_js = File.join(site.dest, @js_dir) # If we're using the gem, add the lunr and search JS files to the _site if File.expand_path(site_js) != File.dirname(@lunr_path) extras = Dir.glob(File.join(File.dirname(@lunr_path), "search.min.js")) FileUtils.cp(extras, site_js) extras.map! { |min| File.join(@js_dir, File.basename(min)) } Jekyll.logger.debug "Lunr:", "Added JavaScript to #{@js_dir}" added_files.push(*extras) end # Keep the written files from being cleaned by Jekyll added_files.each do |filename| site.static_files << SearchIndexFile.new(site, site.dest, "/", filename) end end
Private Instance Methods
getTitle(entry)
click to toggle source
# File lib/jekyll_lunr_js_search/indexer.rb, line 124 def getTitle(entry) if entry.title entry.title # elsif entry.url == "/index.html" # "Main page" else entry.url end end
output_ext(doc)
click to toggle source
# File lib/jekyll_lunr_js_search/indexer.rb, line 139 def output_ext(doc) if doc.is_a?(Jekyll::Document) Jekyll::Renderer.new(@site, doc).output_ext else doc.output_ext end end
pages_to_index(site)
click to toggle source
# File lib/jekyll_lunr_js_search/indexer.rb, line 147 def pages_to_index(site) items = [] # deep copy pages and documents (all collections, including posts) site.pages.each {|page| items << page.dup } site.documents.each {|document| items << document.dup } # only process files that will be converted to .html and only non excluded files items.select! {|i| i.respond_to?(:output_ext) && output_ext(i) == '.html' && ! @excludes.any? {|s| (i.url =~ Regexp.new(s)) != nil } } items.reject! {|i| i.data['exclude_from_search'] } items end
stopwords()
click to toggle source
load the stopwords file
# File lib/jekyll_lunr_js_search/indexer.rb, line 135 def stopwords @stopwords ||= IO.readlines(@stopwords_file).map { |l| l.strip } end