class Searchkick::IndexOptions

Attributes

options[R]

Public Class Methods

new(index) click to toggle source
# File lib/searchkick/index_options.rb, line 5
def initialize(index)
  @options = index.options
end

Public Instance Methods

add_search_synonyms(settings) click to toggle source
# File lib/searchkick/index_options.rb, line 490
def add_search_synonyms(settings)
  search_synonyms = options[:search_synonyms] || []
  search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
  if search_synonyms.is_a?(String) || search_synonyms.any?
    if search_synonyms.is_a?(String)
      synonym_graph = {
        type: "synonym_graph",
        synonyms_path: search_synonyms
      }
      synonym_graph[:updateable] = true unless below73?
    else
      synonym_graph = {
        type: "synonym_graph",
        # TODO confirm this is correct
        synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
      }
    end
    settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph

    if ["japanese", "japanese2"].include?(options[:language])
      [:searchkick_search, :searchkick_search2].each do |analyzer|
        settings[:analysis][:analyzer][analyzer][:filter].insert(4, "searchkick_synonym_graph")
      end
    else
      [:searchkick_search2, :searchkick_word_search].each do |analyzer|
        unless settings[:analysis][:analyzer][analyzer].key?(:filter)
          raise Error, "Search synonyms are not supported yet for language"
        end

        settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
      end
    end
  end
end
add_synonyms(settings) click to toggle source
# File lib/searchkick/index_options.rb, line 464
def add_synonyms(settings)
  synonyms = options[:synonyms] || []
  synonyms = synonyms.call if synonyms.respond_to?(:call)
  if synonyms.any?
    settings[:analysis][:filter][:searchkick_synonym] = {
      type: "synonym",
      # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
      synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
    }
    # choosing a place for the synonym filter when stemming is not easy
    # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
    # TODO use a snowball stemmer on synonyms when creating the token filter

    # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
    # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
    # - Only apply the synonym expansion at index time
    # - Don't have the synonym filter applied search
    # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
    settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")

    %w(word_start word_middle word_end).each do |type|
      settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
    end
  end
end
below73?() click to toggle source
# File lib/searchkick/index_options.rb, line 548
def below73?
  Searchkick.server_below?("7.3.0")
end
default_analyzer() click to toggle source
# File lib/searchkick/index_options.rb, line 544
def default_analyzer
  :searchkick_index
end
default_type() click to toggle source
# File lib/searchkick/index_options.rb, line 540
def default_type
  "text"
end
generate_mappings() click to toggle source
# File lib/searchkick/index_options.rb, line 343
def generate_mappings
  mapping = {}

  keyword_mapping = {type: "keyword"}
  keyword_mapping[:ignore_above] = options[:ignore_above] || 30000

  # conversions
  Array(options[:conversions]).each do |conversions_field|
    mapping[conversions_field] = {
      type: "nested",
      properties: {
        query: {type: default_type, analyzer: "searchkick_keyword"},
        count: {type: "integer"}
      }
    }
  end

  mapping_options =
    [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
      .to_h { |type| [type, (options[type] || []).map(&:to_s)] }

  word = options[:word] != false && (!options[:match] || options[:match] == :word)

  mapping_options[:searchable].delete("_all")

  analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer.to_s}

  mapping_options.values.flatten.uniq.each do |field|
    fields = {}

    if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
      fields[field] = {type: default_type, index: false}
    else
      fields[field] = keyword_mapping
    end

    if !options[:searchable] || mapping_options[:searchable].include?(field)
      if word
        fields[:analyzed] = analyzed_field_options

        if mapping_options[:highlight].include?(field)
          fields[:analyzed][:term_vector] = "with_positions_offsets"
        end
      end

      mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
        if options[:match] == type || f.include?(field)
          fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
        end
      end
    end

    mapping[field] = fields[field].merge(fields: fields.except(field))
  end

  (options[:locations] || []).map(&:to_s).each do |field|
    mapping[field] = {
      type: "geo_point"
    }
  end

  options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
  (options[:geo_shape] || {}).each do |field, shape_options|
    mapping[field] = shape_options.merge(type: "geo_shape")
  end

  if options[:inheritance]
    mapping[:type] = keyword_mapping
  end

  routing = {}
  if options[:routing]
    routing = {required: true}
    unless options[:routing] == true
      routing[:path] = options[:routing].to_s
    end
  end

  dynamic_fields = {
    # analyzed field must be the default field for include_in_all
    # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
    # however, we can include the not_analyzed field in _all
    # and the _all index analyzer will take care of it
    "{name}" => keyword_mapping
  }

  if options.key?(:filterable)
    dynamic_fields["{name}"] = {type: default_type, index: false}
  end

  unless options[:searchable]
    if options[:match] && options[:match] != :word
      dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
    end

    if word
      dynamic_fields[:analyzed] = analyzed_field_options
    end
  end

  # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
  multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))

  mappings = {
    properties: mapping,
    _routing: routing,
    # https://gist.github.com/kimchy/2898285
    dynamic_templates: [
      {
        string_template: {
          match: "*",
          match_mapping_type: "string",
          mapping: multi_field
        }
      }
    ]
  }

  mappings
end
generate_settings() click to toggle source
# File lib/searchkick/index_options.rb, line 30
def generate_settings
  language = options[:language]
  language = language.call if language.respond_to?(:call)

  settings = {
    analysis: {
      analyzer: {
        searchkick_keyword: {
          type: "custom",
          tokenizer: "keyword",
          filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
        },
        default_analyzer => {
          type: "custom",
          # character filters -> tokenizer -> token filters
          # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
          char_filter: ["ampersand"],
          tokenizer: "standard",
          # synonym should come last, after stemming and shingle
          # shingle must come before searchkick_stemmer
          filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
        },
        searchkick_search: {
          type: "custom",
          char_filter: ["ampersand"],
          tokenizer: "standard",
          filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
        },
        searchkick_search2: {
          type: "custom",
          char_filter: ["ampersand"],
          tokenizer: "standard",
          filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
        },
        # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
        searchkick_autocomplete_search: {
          type: "custom",
          tokenizer: "keyword",
          filter: ["lowercase", "asciifolding"]
        },
        searchkick_word_search: {
          type: "custom",
          tokenizer: "standard",
          filter: ["lowercase", "asciifolding"]
        },
        searchkick_suggest_index: {
          type: "custom",
          tokenizer: "standard",
          filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
        },
        searchkick_text_start_index: {
          type: "custom",
          tokenizer: "keyword",
          filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
        },
        searchkick_text_middle_index: {
          type: "custom",
          tokenizer: "keyword",
          filter: ["lowercase", "asciifolding", "searchkick_ngram"]
        },
        searchkick_text_end_index: {
          type: "custom",
          tokenizer: "keyword",
          filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
        },
        searchkick_word_start_index: {
          type: "custom",
          tokenizer: "standard",
          filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
        },
        searchkick_word_middle_index: {
          type: "custom",
          tokenizer: "standard",
          filter: ["lowercase", "asciifolding", "searchkick_ngram"]
        },
        searchkick_word_end_index: {
          type: "custom",
          tokenizer: "standard",
          filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
        }
      },
      filter: {
        searchkick_index_shingle: {
          type: "shingle",
          token_separator: ""
        },
        # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
        searchkick_search_shingle: {
          type: "shingle",
          token_separator: "",
          output_unigrams: false,
          output_unigrams_if_no_shingles: true
        },
        searchkick_suggest_shingle: {
          type: "shingle",
          max_shingle_size: 5
        },
        searchkick_edge_ngram: {
          type: "edge_ngram",
          min_gram: 1,
          max_gram: 50
        },
        searchkick_ngram: {
          type: "ngram",
          min_gram: 1,
          max_gram: 50
        },
        searchkick_stemmer: {
          # use stemmer if language is lowercase, snowball otherwise
          type: language == language.to_s.downcase ? "stemmer" : "snowball",
          language: language || "English"
        }
      },
      char_filter: {
        # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
        # &_to_and
        ampersand: {
          type: "mapping",
          mappings: ["&=> and "]
        }
      }
    }
  }

  raise ArgumentError, "Can't pass both language and stemmer" if options[:stemmer] && language
  update_language(settings, language)
  update_stemming(settings)

  if Searchkick.env == "test"
    settings[:number_of_shards] = 1
    settings[:number_of_replicas] = 0
  end

  if options[:similarity]
    settings[:similarity] = {default: {type: options[:similarity]}}
  end

  settings[:index] = {
    max_ngram_diff: 49,
    max_shingle_diff: 4
  }

  if options[:case_sensitive]
    settings[:analysis][:analyzer].each do |_, analyzer|
      analyzer[:filter].delete("lowercase")
    end
  end

  add_synonyms(settings)
  add_search_synonyms(settings)

  if options[:special_characters] == false
    settings[:analysis][:analyzer].each_value do |analyzer_settings|
      analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
    end
  end

  settings
end
index_options() click to toggle source
# File lib/searchkick/index_options.rb, line 9
def index_options
  # mortal symbols are garbage collected in Ruby 2.2+
  custom_settings = (options[:settings] || {}).deep_symbolize_keys
  custom_mappings = (options[:mappings] || {}).deep_symbolize_keys

  if options[:mappings] && !options[:merge_mappings]
    settings = custom_settings
    mappings = custom_mappings
  else
    settings = generate_settings.deep_symbolize_keys.deep_merge(custom_settings)
    mappings = generate_mappings.deep_symbolize_keys.deep_merge(custom_mappings)
  end

  set_deep_paging(settings) if options[:deep_paging] || options[:max_result_window]

  {
    settings: settings,
    mappings: mappings
  }
end
index_type() click to toggle source
# File lib/searchkick/index_options.rb, line 532
def index_type
  @index_type ||= begin
    index_type = options[:_type]
    index_type = index_type.call if index_type.respond_to?(:call)
    index_type
  end
end
set_deep_paging(settings) click to toggle source
# File lib/searchkick/index_options.rb, line 525
def set_deep_paging(settings)
  if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
    settings[:index] ||= {}
    settings[:index][:max_result_window] = options[:max_result_window] || 1_000_000_000
  end
end
update_language(settings, language) click to toggle source
# File lib/searchkick/index_options.rb, line 190
def update_language(settings, language)
  case language
  when "chinese"
    settings[:analysis][:analyzer].merge!(
      default_analyzer => {
        type: "ik_smart"
      },
      searchkick_search: {
        type: "ik_smart"
      },
      searchkick_search2: {
        type: "ik_max_word"
      }
    )
  when "chinese2", "smartcn"
    settings[:analysis][:analyzer].merge!(
      default_analyzer => {
        type: "smartcn"
      },
      searchkick_search: {
        type: "smartcn"
      },
      searchkick_search2: {
        type: "smartcn"
      }
    )
  when "japanese", "japanese2"
    analyzer = {
      type: "custom",
      tokenizer: "kuromoji_tokenizer",
      filter: [
        "kuromoji_baseform",
        "kuromoji_part_of_speech",
        "cjk_width",
        "ja_stop",
        "searchkick_stemmer",
        "lowercase"
      ]
    }
    settings[:analysis][:analyzer].merge!(
      default_analyzer => analyzer.deep_dup,
      searchkick_search: analyzer.deep_dup,
      searchkick_search2: analyzer.deep_dup
    )
    settings[:analysis][:filter][:searchkick_stemmer] = {
      type: "kuromoji_stemmer"
    }
  when "korean"
    settings[:analysis][:analyzer].merge!(
      default_analyzer => {
        type: "openkoreantext-analyzer"
      },
      searchkick_search: {
        type: "openkoreantext-analyzer"
      },
      searchkick_search2: {
        type: "openkoreantext-analyzer"
      }
    )
  when "korean2"
    settings[:analysis][:analyzer].merge!(
      default_analyzer => {
        type: "nori"
      },
      searchkick_search: {
        type: "nori"
      },
      searchkick_search2: {
        type: "nori"
      }
    )
  when "vietnamese"
    settings[:analysis][:analyzer].merge!(
      default_analyzer => {
        type: "vi_analyzer"
      },
      searchkick_search: {
        type: "vi_analyzer"
      },
      searchkick_search2: {
        type: "vi_analyzer"
      }
    )
  when "polish", "ukrainian"
    settings[:analysis][:analyzer].merge!(
      default_analyzer => {
        type: language
      },
      searchkick_search: {
        type: language
      },
      searchkick_search2: {
        type: language
      }
    )
  end
end
update_stemming(settings) click to toggle source
# File lib/searchkick/index_options.rb, line 288
def update_stemming(settings)
  if options[:stemmer]
    stemmer = options[:stemmer]
    # could also support snowball and stemmer
    case stemmer[:type]
    when "hunspell"
      # supports all token filter options
      settings[:analysis][:filter][:searchkick_stemmer] = stemmer
    else
      raise ArgumentError, "Unknown stemmer: #{stemmer[:type]}"
    end
  end

  stem = options[:stem]

  # language analyzer used
  stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"

  if stem == false
    settings[:analysis][:filter].delete(:searchkick_stemmer)
    settings[:analysis][:analyzer].each do |_, analyzer|
      analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
    end
  end

  if options[:stemmer_override]
    stemmer_override = {
      type: "stemmer_override"
    }
    if options[:stemmer_override].is_a?(String)
      stemmer_override[:rules_path] = options[:stemmer_override]
    else
      stemmer_override[:rules] = options[:stemmer_override]
    end
    settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override

    settings[:analysis][:analyzer].each do |_, analyzer|
      stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
      analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
    end
  end

  if options[:stem_exclusion]
    settings[:analysis][:filter][:searchkick_stem_exclusion] = {
      type: "keyword_marker",
      keywords: options[:stem_exclusion]
    }

    settings[:analysis][:analyzer].each do |_, analyzer|
      stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
      analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
    end
  end
end