class VTT
Library to handle VTT
Files
Uses the translator available to do the necessary language operations as defined by the AllFather
Constants
- SUPPORTED_TRANSFORMATIONS
Public Class Methods
# File lib/vtt.rb, line 19 def initialize(cc_file) @cc_file = cc_file raise "Invalid VTT file provided" unless is_valid? end
Public Instance Methods
Returns the inferred language in an array
# File lib/vtt.rb, line 72 def infer_languages lang = nil begin sample_text = get_text(@cc_file, 100) lang = @translator.infer_language(sample_text) rescue StandardError => e puts "Error while detecting the language due to #{e.message}" end [lang] end
Method to add required set of validations specific to caption type
# File lib/vtt.rb, line 86 def is_valid? # Do any VTT specific validations here if @cc_file =~ /^.*\.(vtt)$/ return true end # TODO: Check if it's required to do a File read to see if the 1st line is WEBVTT # to handle cases where invalid file is named with vtt extension return false end
AllFather#set_translator
# File lib/vtt.rb, line 24 def set_translator(translator) super(translator) @translator = translator end
# File lib/vtt.rb, line 96 def supported_transformations return SUPPORTED_TRANSFORMATIONS end
AllFather#transform_to
# File lib/vtt.rb, line 100 def transform_to(types, src_lang, target_lang, output_dir) # Let's start off with some validations super(types, src_lang, target_lang, output_dir) # Suffix output dir with File seperator output_dir = "#{output_dir}#{File::Separator}" unless output_dir.end_with?(File::Separator) translate = false if target_lang && !target_lang.empty? translate = true if @translator.nil? raise StandardError.new("Cannot infer language as engine options are not provided") end end # Prepare the output files for each type file_map = {} types.each do |type| output_file = File.basename(@cc_file, File.extname(@cc_file)) output_file << "_#{target_lang}" if translate output_file << extension_from_type(type) out_file = "#{output_dir}#{output_file}" if create_file(TYPE_VTT, type, out_file, target_lang) file_map[type] = out_file else raise StandardError.new("Failed to create output file for type #{type}") end end # Read the file and prepare the cue model cue_info = nil ccfile = File.open(@cc_file, 'r:UTF-8', &:read) message = "" collect_msg = false cue_index = 1 ccfile.each_line do | line | if line.strip.empty? collect_msg = false next end time_points = line.scan(/^((\d\d:)+\d\d[.,]\d\d\d)\s-->\s((\d\d:)+\d\d[.,]\d\d\d)/) if time_points.empty? if collect_msg message << line end else collect_msg = false unless message.empty? cue_info.message = translated_msg(translate, message, src_lang, target_lang) write_cue(cue_info, file_map) message = "" cue_index += 1 end # This is a cue point. Fetch timestamps cue_info = CueInfo.new(AllFather::TYPE_VTT) cue_info.index = cue_index cue_info.start = time_points[0][0] cue_info.end = time_points[0][2] start_units = time_details(cue_info.start, TYPE_VTT) end_units = time_details(cue_info.end, TYPE_VTT) cue_info.start_time_units = start_units cue_info.end_time_units = end_units collect_msg = true end end cue_info.message = translated_msg(translate, message, src_lang, target_lang) unless message.empty? write_cue(cue_info, file_map, true) end
AllFather#translate
# File lib/vtt.rb, line 29 def translate(src_lang, dest_lang, out_file) super(src_lang, dest_lang, out_file) begin ccfile = File.open(@cc_file, 'r:UTF-8', &:read) outfile = File.open(out_file, "w") text_collection = false text_sample = "" ccfile.each_line do | line | if line =~ /^((\d\d:)+\d\d[.,]\d\d\d)\s-->\s((\d\d:)+\d\d[.,]\d\d\d)/ text_collection = true outfile.puts line elsif line.strip.empty? && !text_sample.empty? json_text = JSON.parse(text_sample) rescue nil if json_text.nil? trans_resp = @translator.translate(text_sample, src_lang, dest_lang) outfile.puts trans_resp outfile.puts else outfile.puts text_sample outfile.puts end text_sample = "" text_collection = false elsif text_collection text_sample << line else outfile.puts line end end if !text_sample.empty? trans_resp = @translator.translate(text_sample, src_lang, dest_lang) outfile.puts trans_resp outfile.puts end ensure outfile.close end end
Private Instance Methods
Method to get a minimal amount of key text that excludes any tags or control information for the engine to meaninfully and correctly infer the language being referred to in ths VTT
# File lib/vtt.rb, line 205 def get_text(vtt_file, num_chars) begin ccfile = File.open(vtt_file, 'r:UTF-8', &:read) text_collection = false text_sample = "" ccfile.each_line do |line| if line =~ /^((\d\d:)+\d\d[.,]\d\d\d)\s-->\s((\d\d:)+\d\d[.,]\d\d\d)/ text_collection = true elsif line.strip.empty? text_collection = false elsif text_collection && text_sample.length < (num_chars + 1) text_sample << line end break if text_sample.length > (num_chars + 1) end ensure ccfile.close rescue nil end return text_sample[0, num_chars] end
Method to translate a given text message based on following conditions
-
If translate is false, the message is returned as is
-
If
src_lang
andtarget_lang
are same then the message is returned as is -
If
src_lang
is nil or empty then this caption file will be inspected to infer language and if it's same as target_lang, then again the message shall be returned as is -
Otherwise, returns a translated text
Raise¶ ↑
-
LangDetectionFailureException - If failed to infer the language
# File lib/vtt.rb, line 182 def translated_msg(translate, message, src_lang, target_lang) return message unless translate use_src = nil if (src_lang.nil? || src_lang.empty?) # We don't need to infer again and again begin @inferred_src_lang ||= infer_languages.first rescue StandardError => e raise LangDetectionFailureException.new("Failed to infer language due to #{e.message}") end use_src = @inferred_src_lang else use_src = src_lang end return message if use_src.eql?(target_lang) @translator.translate(message, use_src, target_lang) end