module TfIdfSimilarity::MatrixMethods
Private Instance Methods
@param [Integer] index the column index @return [GSL::Vector::View,NArray,NMatrix,Vector] a column
# File lib/tf-idf-similarity/matrix_methods.rb, line 67 def column(index) case @library when :narray @matrix[index, true] else @matrix.column(index) end end
@return [Float] the number of columns in the matrix
# File lib/tf-idf-similarity/matrix_methods.rb, line 89 def column_size case @library when :gsl, :nmatrix @matrix.shape[1] when :narray @matrix.shape[0] else @matrix.column_size end end
@param [Integer] row index @param [Integer] column index
# File lib/tf-idf-similarity/matrix_methods.rb, line 45 def get(i, j) case @library when :narray @matrix[j, i] else @matrix[i, j] end end
@param [Array<Array>] array matrix rows @return [GSL::Matrix,NArray,NMatrix,Matrix] a matrix
# File lib/tf-idf-similarity/matrix_methods.rb, line 122 def initialize_matrix(array) case @library when :gsl GSL::Matrix[*array] when :narray NArray[*array] when :nmatrix # @see https://github.com/SciRuby/nmatrix/issues/91#issuecomment-18870619 NMatrix.new(:dense, [array.size, array.empty? ? 0 : array[0].size], array.flatten, :float64) else Matrix[*array] end end
# File lib/tf-idf-similarity/matrix_methods.rb, line 146 def log(number) case @library when :gsl GSL::Sf::log(number) when :narray NMath.log(number) else Math.log(number) end end
@param [GSL::Matrix,NArray,NMatrix,Matrix] matrix a matrix @return [GSL::Matrix,NArray,NMatrix,Matrix] the product
# File lib/tf-idf-similarity/matrix_methods.rb, line 137 def multiply_self(matrix) case @library when :nmatrix matrix.transpose.dot(matrix) else matrix.transpose * matrix end end
@return [GSL::Matrix,NArray,NMatrix,Matrix] all document vectors as unit vectors
@note Lucene normalizes document length differently.
# File lib/tf-idf-similarity/matrix_methods.rb, line 8 def normalize case @library when :gsl @matrix.clone.each_col do |column| unless column.isnull? column.normalize! end end when :narray # @see https://github.com/masa16/narray/issues/21 norm = NMath.sqrt((@matrix ** 2).sum(1).reshape(@matrix.shape[0], 1)) norm[norm.where2[1]] = 1.0 # avoid division by zero NMatrix.refer(@matrix / norm) # must be NMatrix for matrix multiplication when :nmatrix # @see https://github.com/SciRuby/nmatrix/issues/38 normal = NMatrix.new(:dense, @matrix.shape, 0, :float64) (0...@matrix.shape[1]).each do |j| column = @matrix.column(j) norm = Math.sqrt(column.transpose.dot(column)[0, 0]) (0...@matrix.shape[0]).each do |i| normal[i, j] = norm.zero? ? 0 : @matrix[i, j] / norm end end normal else Matrix.columns(@matrix.column_vectors.map do |column| if column.to_a.all?(&:zero?) column elsif column.respond_to?(:normalize) column.normalize else column / Math.sqrt(column.inner_product(column)) end end) end end
@param [Integer] index the row index @return [GSL::Vector::View,NArray,NMatrix,Vector] a row
# File lib/tf-idf-similarity/matrix_methods.rb, line 56 def row(index) case @library when :narray @matrix[true, index] else @matrix.row(index) end end
@return [Float] the number of rows in the matrix
# File lib/tf-idf-similarity/matrix_methods.rb, line 77 def row_size case @library when :gsl, :nmatrix @matrix.shape[0] when :narray @matrix.shape[1] else @matrix.row_size end end
# File lib/tf-idf-similarity/matrix_methods.rb, line 157 def sqrt(number) case @library when :narray NMath.sqrt(number) else Math.sqrt(number) end end
@return [Float] the sum of all values in the matrix
# File lib/tf-idf-similarity/matrix_methods.rb, line 111 def sum case @library when :narray @matrix.sum else values.reduce(0, :+) end end
@return [Array<Float>] the matrix's values
# File lib/tf-idf-similarity/matrix_methods.rb, line 101 def values case @library when :nmatrix @matrix.each.to_a # faster than NMatrix's `to_a` and `to_flat_a` else @matrix.to_a.flatten end end