module MasterSplitter
This module holds everything. It includes 6 methods and 4 constants.
Constants
- FILE_NAME_FINDER
For capturing only name of a file.
- MAX_CHUNK_SIZE
Max size of each read/write from/to files.
- STANDARD_SLICE_NAMING_FORMAT
Naming format of a sliced file.
- VERSION
For holding the gems version.
Public Instance Methods
With this method you can join a couple of file which their names does not follow the standard format. You can pass it a name for the output file and a directory to store it. Example:
>> custom_joiner(["first.pdf", "second.pdf"], output_file_name: "book.pdf", output_dir: "Desktop/")
Arguments:
slice_names: (Array) options: (Hash)
# File lib/master_splitter/joiner.rb, line 14 def custom_joiner(slice_names, options={}) output_dir = options[:output_dir] output_file_name = options[:output_file_name] slice_names.each do |slice_name| unless File.exists? slice_name raise Exception, "file '#{slice_name}' does not exist." end end output_file_name ||= slice_names[0] if output_dir output_file_name = File.join(output_dir, output_file_name) end join(output_file_name, slice_names) end
With this method you can split a file to slices which you can specify size of each slice. Sum of all slice sizes must be equal to the size of the orginal fille. Needless to say, sizes must be in bytes. Example:
>> custom_splitter("file.pdf", [1232, 5432], output_dir: "Desktop/")
Arguments:
source_file_name: (String) slice_sizes: (Array) options: (Hash)
# File lib/master_splitter/splitter.rb, line 17 def custom_splitter(source_file_name, slice_sizes, options={}) slice_names = [] output_dir = options[:output_dir] sum_of_sizes = slice_sizes.each(&:+) source = File.open(source_file_name, 'rb') if sum_of_sizes != source.size source.close raise Exception, "sum of slice sizes does not equal size of source file." end source.close slice_sizes.count.times do |i| temp = ("%3d"%[i + 1]).gsub(" ", "0") if output_dir slice_name = source_file_name if source_file_name.include?("/") slice_name = FILE_NAME_FINDER.match(source_file_name)[1] end slice_names << File.join(output_dir, [slice_name, temp].join('.')) else slice_names << [source_file_name, temp].join('.') end end #end of iteration split(source_file_name, slice_names, slice_sizes) end
This method does the actual joining of slices. It gets an Array of slice names and name of the output file.
Example:
>> join("book.pdf", ["book.pdf.001", "book.pdf.002"])
Arguments:
output_file_name: (String) slice_names: (Array)
# File lib/master_splitter/joiner.rb, line 87 def join(output_file_name, slice_names) output_file = File.open(output_file_name, 'wb') slice_names.each do |slice_name| slice = File.open(slice_name, 'rb') bytes_to_read = slice.size while bytes_to_read > 0 chunk = MAX_CHUNK_SIZE chunk = bytes_to_read if (bytes_to_read < MAX_CHUNK_SIZE) output_file.write(slice.read(chunk)) bytes_to_read -= chunk end #end of while slice.close end #end of each output_file.close end
This method does the actual splitting of file. It gets the name of the source file and two arrays. One contains names of the slices and the other their sizes. Example:
>> split("book.pdf", ["book.pdf.001", "book.pdf.002"], [6456, 6456])
Arguments:
source_file_name: (String) slice_names: (Array) slice_sizes: (Array)
# File lib/master_splitter/splitter.rb, line 94 def split(source_file_name, slice_names, slice_sizes) source = File.open(source_file_name, 'rb') slice_names.size.times do |i| slice = File.open(slice_names[i], 'wb') bytes_to_write = slice_sizes[i] while bytes_to_write > 0 chunk = MAX_CHUNK_SIZE chunk = bytes_to_write if(bytes_to_write < MAX_CHUNK_SIZE) slice.write(source.read(chunk)) bytes_to_write -= chunk end #end of while slice.close end #end of iteration source.close end
This method joins slices of a splitted file which their names follow the standard format. You just have to pass it the name of the first slice. Remember that all slices must be in the same directory. Example:
>> standard_joiner("path/to/first_slice.pdf.001", output_file_name: "book.pdf", output_dir: "Desktop/")
Arguments:
first_slice_name: (String) options: (Hash)
# File lib/master_splitter/joiner.rb, line 42 def standard_joiner(first_slice_name, options={}) output_dir = options[:output_dir] output_file_name = options[:output_file_name] slice_names = [] match_result = STANDARD_SLICE_NAMING_FORMAT. match(first_slice_name) if match_result if match_result[1].include?("/") && output_dir output_file_name ||= FILE_NAME_FINDER.match(match_result[1])[1] else output_file_name ||= match_result[1] end slice_number = match_result[2].to_i while true temp = ("%3d"%[slice_number]).gsub(" ", "0") slice_name = [match_result[1], temp].join('.') if File.exists?(slice_name) slice_names << slice_name slice_number += 1 else break end end #end of while if output_dir output_file_name = File.join(output_dir, output_file_name) end join(output_file_name, slice_names) else raise Exception, %q{Wrong naming format for the first slice!} end end
This method splits a given file to a specified number of slices, equally. Example:
>> standard_splitter("file.pdf", number_of_slices: 5, output_dir: "Desktop/")
Arguments:
source_file_name: (String) number_of_slices: (Fixnum) options: (Hash)
# File lib/master_splitter/splitter.rb, line 55 def standard_splitter(source_file_name, number_of_slices, options={}) slice_sizes = [] slice_names = [] output_dir = options[:output_dir] source = File.open(source_file_name, 'rb') source_size = source.size slice_size = source_size / number_of_slices slice_name = source_file_name if source_file_name.include?('/') slice_name = FILE_NAME_FINDER.match(source_file_name)[1] end number_of_slices.times do |n| slice_sizes << slice_size temp = ("%3d"%[n + 1]).gsub(" ", "0") if output_dir slice_names << File.join(output_dir, [slice_name, temp].join('.')) else slice_names << [source_file_name, temp].join('.') end end remain_bytes = source_size - (slice_size * number_of_slices) slice_sizes[-1] += remain_bytes source.close split(source_file_name, slice_names, slice_sizes) end