class SRTParse
Public Instance Methods
duration(line)
click to toggle source
find the duration get every duration line and convert the time_to to seconds and overwrite duration
# File lib/srtparser_library.rb, line 26 def duration(line) timestamp = line.split("-->") time_to = timestamp.last.gsub(/,/,".").split(":") duration = to_sec(time_to[0], "hours") + to_sec(time_to[1], "minutes") + to_sec(time_to[2], "seconds") return duration.round(2) end
find_avarages(results)
click to toggle source
finds the avarages by taking the existing hash
# File lib/srtparser_library.rb, line 3 def find_avarages(results) results["average_symbols_per_line"] = (results["number_of_symbols"].to_f/results["number_of_lines"]).round(2) results["average_symbols_per_sentence"] = (results["number_of_symbols"].to_f/results["number_of_sentences"]).round(2) results["average_duration"] = (results["duration"]/results["number_of_subtitles"]).round(2) return results end
max_symbols_per_line(line, max_symbols)
click to toggle source
find the max symbols per line by getting the current line and the max_symbols amount
# File lib/srtparser_library.rb, line 35 def max_symbols_per_line(line, max_symbols) current_line_symbols = line.scan(/[~!@\#$%^&*()\-{}\[\]|”:><?\/]/).count if current_line_symbols > max_symbols then return current_line_symbols else return max_symbols end end
parse_file(path_to_file)
click to toggle source
# File lib/srtparser_library.rb, line 44 def parse_file(path_to_file) #define Hash and vars results = Hash.new(0) lines_after_blank = 0 #reading file line by line File.open(path_to_file, "r") do |infile| infile.each_line do |line| # if line is blank # null lines_after_blank back to 0 if line =~ /^[\s]*$\n/ then lines_after_blank = 0 next end case lines_after_blank # when the line is the number of subtitle # get it and count it when 0 then results["number_of_subtitles"] = line.to_i # when the line is the duration line # get the time to from the line and overwrite it on the results["duration"] # we get the last duration when 1 then results["duration"] = duration(line) # when the line is not blank or the first two after blank it means it is text line # do some counting else results["number_of_words"] += line.gsub(/[[:punct:]]/, '').split.length results["number_of_symbols"] += line.scan(/[~!@\#$%^&*()\-{}\[\]|”:><?\/]/).count results["number_of_lines"] += 1 results["max_symbols_per_line"] = max_symbols_per_line( line, results["max_symbols_per_line"]) results["number_of_sentences"] += line.scan(/[^\.!?]+[\.!?]/).count end # after each line bomb the line_after_blank lines_after_blank += 1 end end # return results found earlier # add avarage amounts return find_avarages(results) end
to_sec(time, current_type)
click to toggle source
conver hours, minutes and seconds+miliseconds to seconds
# File lib/srtparser_library.rb, line 11 def to_sec(time, current_type) case current_type when "hours" return time.to_i*3600 when "minutes" return time.to_i*60 when "seconds" return time.to_f else return "ERR" end end