class MusicDetector::FeatureVectorExtractor
Attributes
config[R]
Public Class Methods
new(config)
click to toggle source
@param [MusicDetector::Configuration] config config to process music file
# File lib/music_detector/feature_vector_extractor.rb, line 10 def initialize(config) @config = config end
Public Instance Methods
extract_from(file:, seektime:, duration:)
click to toggle source
@param [String] file path of the input audio file @param [Float] seektime seek time in the audio file (in seconds) @param [Float] duration duration in the audio file (in seconds) @return [NArray<Float>] extracted feature vector
# File lib/music_detector/feature_vector_extractor.rb, line 18 def extract_from(file:, seektime:, duration:) # read audio file wave, samplerate = read_audio_file(file: file, seektime: seektime, duration: duration) # do fft and make frequency spectrum length = wave.length / 2 spectrum = NumRu::FFTW3.fft(wave, NumRu::FFTW3::FORWARD).abs[0...length] frequencies = NArray.to_na((0...length).map { |i| i.to_f / length * samplerate / 2 }) # frequencies of each element in spectrum # bandpath filter (to faster computation) spectrum, frequencies = band_path_filter(spectrum: spectrum, frequencies: frequencies) # prepare for analysis log_frequencies = NMath::log(frequencies) log_temperament = NMath::log(equal_temperament) log_bin_freq_half_bandwidth = (log_temperament[1] - log_temperament[0]) / 2.0 log_in_tune_freq_threshold = log_bin_freq_half_bandwidth * @config.in_tune_cents / 100.0 log_out_of_tune_freq_threshold = log_bin_freq_half_bandwidth * @config.out_of_tune_cents / 100.0 log_temperament.map do |log_bin_center_freq| # indices of the target bin (for spectrum) log_bin_indices = (log_frequencies - log_bin_center_freq).abs < log_bin_freq_half_bandwidth # extract the target bin target_log_frequencies = log_frequencies[log_bin_indices] target_spectrum = spectrum[log_bin_indices] # calc ratio between in-tune and out-of-tune powers target_in_tune_indices = (target_log_frequencies - log_bin_center_freq).abs <= log_in_tune_freq_threshold target_out_of_tune_indices = (target_log_frequencies - log_bin_center_freq).abs > log_out_of_tune_freq_threshold in_tune_power = target_spectrum[target_in_tune_indices].mean out_of_tune_power = target_spectrum[target_out_of_tune_indices].mean in_tune_power / out_of_tune_power end.sort.reverse end
Private Instance Methods
band_path_filter(spectrum:, frequencies:)
click to toggle source
# File lib/music_detector/feature_vector_extractor.rb, line 92 def band_path_filter(spectrum:, frequencies:) hpf_freq = @config.a * 2 ** ((@config.temperament_range.first - 1) / 12.0) lpf_freq = @config.a * 2 ** ((@config.temperament_range.last + 1) / 12.0) bpf_indices = (hpf_freq < frequencies) * (frequencies < lpf_freq) spectrum = spectrum[bpf_indices] frequencies = frequencies[bpf_indices] [spectrum, frequencies] end
equal_temperament()
click to toggle source
# File lib/music_detector/feature_vector_extractor.rb, line 102 def equal_temperament NArray.to_na(@config.temperament_range.map { |i| @config.a * 2 ** (i / 12.0) }) end
read_audio_file(file:, seektime:, duration:)
click to toggle source
# File lib/music_detector/feature_vector_extractor.rb, line 59 def read_audio_file(file:, seektime:, duration:) mono_wave = nil samplerate = nil WaveFile::Reader.new(file) do |sound| samplerate = sound.format.sample_rate channels = sound.format.channels # seek sound.read((seektime * samplerate).round) # read sample_count = (duration * samplerate).round mono_wave = NArray.sint(sample_count) sound.read(sample_count).samples.each.with_index do |sample, i| case sample when Array mono_wave[i] = sample.inject(&:+) / channels # normalize to monoral mono_wave[i] *= (2 ** (bits_per_sample - 1)) if Float === sample.first when Fixnum mono_wave[i] = sample when Float mono_wave[i] = sample * (2 ** (bits_per_sample - 1)) else raise StandardError("unsupported file: #{file}") end end end [mono_wave, samplerate] end