class ExcelToCsv::ExcelFile
Constants
- FORCE_WIN_OLE
Public Class Methods
new()
click to toggle source
# File lib/exceltocsv/excel_file.rb, line 28 def initialize() @date_RE = Regexp.new(/\d{4,4}\/\d{2,2}\/\d{2,2}/) @date_with_dashes_RE = Regexp.new(/\d{4,4}-\d{2,2}-\d{2,2}/) @date_with_time_RE = Regexp.new(/\d{2,2}:\d{2,2}:\d{2,2}/) end
Public Instance Methods
clean_csv(filename)
click to toggle source
# File lib/exceltocsv/excel_file.rb, line 137 def clean_csv(filename) max_row_length = 0 CSV.foreach(filename) do |row| row_len = 0 i = 0 row.each do |item| row_len = i if !item.nil? && !item.empty? i += 1 end max_row_length = row_len if row_len > max_row_length end puts "Max row length: #{max_row_length.to_s}" if verbose? tmp_file = filename.to_s + ".tmp.csv" CSV.open(tmp_file, "wb") do |tmp_csv| # Used to track empty lines empty_found = false CSV.foreach(filename) do |row| i = 0 clean_row = [] while(i <= max_row_length) do clean_row << row[i] i += 1 end # We need to stop output on 2nd empty row break if empty_row?(clean_row) && empty_found empty_found = empty_row?(clean_row) tmp_csv << clean_row end # CSV read end # CSV write # Replace original file with tmpfile. FileUtils.rm filename FileUtils.mv tmp_file, filename end
clean_int_value(a_cell)
click to toggle source
If the result is n.000… Remove the unecessary zeros.
# File lib/exceltocsv/excel_file.rb, line 225 def clean_int_value(a_cell) if(a_cell.match(/\.[0]+$/)) cary = a_cell.split(".") a_cell = cary[0] end a_cell end
empty_row?(row)
click to toggle source
Return true if row contains no data
# File lib/exceltocsv/excel_file.rb, line 176 def empty_row?(row) is_empty = true row.each do |item| is_empty = false if item && !item.empty? end is_empty end
format_date(a_cell)
click to toggle source
If the cell is a date, format it to MM/DD/YYYY, stripping time.
# File lib/exceltocsv/excel_file.rb, line 234 def format_date(a_cell) isdate = true if(nil != (dt = a_cell.match(@date_RE))) isdate = true if(isdate || (nil != (dt = a_cell.match(@date_with_dashes_RE))) ) isdate = true if(isdate || (nil != (dt = a_cell.match(@date_with_time_RE))) ) if isdate begin mod_dt = DateTime.parse(a_cell) cary = "#{mod_dt.month}/#{mod_dt.day}/#{mod_dt.year}" if(true == verbose?) puts "" puts "*** Converted date to #{cary} ***" puts "" end a_cell = cary rescue ArgumentError => e # Either this is not a date, or the date format is unrecognized, # nothing to see here, moving on. end end a_cell end
prepare_outdir(outdir)
click to toggle source
# File lib/exceltocsv/excel_file.rb, line 256 def prepare_outdir(outdir) if( !File.directory?(outdir) ) FileUtils.makedirs("#{outdir}") end end
process_cell_value(a_cell)
click to toggle source
# File lib/exceltocsv/excel_file.rb, line 184 def process_cell_value(a_cell) # Truncate the number to 3 decimal places if numeric. a_cell = truncate_decimal(a_cell) # Remove leading and trailing spaces. a_cell = a_cell.to_s.strip # If the result is n.000... Remove the unecessary zeros. a_cell = clean_int_value(a_cell) # If the result is a date, remove time. a_cell = format_date(a_cell) # Surround the cell value with quotes when it contains a comma. a_cell = '"' + a_cell + '"' if a_cell.include?(',') a_cell end
set_flag(flg)
click to toggle source
# File lib/exceltocsv/excel_file.rb, line 47 def set_flag(flg) if (flg == "-v") @verbose = true end end
truncate_decimal(a_cell)
click to toggle source
Truncates a decimal to 3 decimal places if numeric and remove trailing zeros, if more than one decimal place. returns a string
# File lib/exceltocsv/excel_file.rb, line 206 def truncate_decimal(a_cell) if(a_cell.is_a?(Numeric)) a_cell = truncate_decimal_to_string(a_cell, 3) # Truncate zeros (unless there is only 1 decimal place) # eg. 12.10 => 12.1 # 12.0 => 12.0 a_cell = BigDecimal.new(a_cell).to_s("F") end a_cell end
truncate_decimal_to_string(num, places)
click to toggle source
Truncates a decimal and converts it to a string. num: decimal to truncate places: number of decimal places to truncate at
# File lib/exceltocsv/excel_file.rb, line 220 def truncate_decimal_to_string(num, places) "%.#{places}f" % num end
verbose?()
click to toggle source
# File lib/exceltocsv/excel_file.rb, line 53 def verbose?() @verbose ||= false end
winPath(filepath)
click to toggle source
# File lib/exceltocsv/excel_file.rb, line 262 def winPath(filepath) parts = filepath.split("/") mspath = nil for part in parts if(mspath == nil) mspath = [] mspath << part else mspath << "\\" << part end end mspath end
xl_app()
click to toggle source
# File lib/exceltocsv/excel_file.rb, line 34 def xl_app return @xl_app unless @xl_app.nil? if OS.windows? and defined?(FORCE_WIN_OLE) require_relative 'win_excel' @xl_app = WinExcel.new else # CrossPlatformExcel is faster (like, by 30x). require_relative 'cross_platform_excel' @xl_app = CrossPlatformExcel.new end @xl_app end
xl_to_csv(infile, outfile)
click to toggle source
Convert the 1st sheet in an xls(x) file to a csv file.
# File lib/exceltocsv/excel_file.rb, line 58 def xl_to_csv(infile, outfile) filepath = File.expand_path(infile) puts "xl_to_csv: #{infile} => #{outfile}" if verbose? unless File.exists?(filepath) puts "Unable to find file." puts " #{filepath}" return end # Open an Excel file xl_app.open_workbook filepath # Build a list of work sheets to dump to file. sheets_in_file = [] sheet_saved_count = 0 xl_app.worksheet_names.each do |sheetname| if( sheetname.match(/CQDS/) || sheetname.match(/PLK/) ) sheets_in_file << sheetname puts "Converting sheet #{sheetname}" if verbose? sheet_saved_count += 1 end end if (1 > sheet_saved_count) puts "*** No sheets labeled 'PLK' or 'CQDS' ***" puts "Verify #{infile} is formatted correctly." # Close Excel xl_app.close_workbook return end # Write sheet data to file. File.open(outfile, "w") do |f| data = xl_app.worksheet_data(sheets_in_file[0]) for row in data row_data = [] for a_cell in row row_data << process_cell_value(a_cell) end contains_data = false # Determine if the row contains any data. for cell in row_data if(cell.match(/[^,\r\n]+/)) contains_data = true end end # Insert an empty line if the row contains no data. if(true == contains_data) f << row_data.join(",") f << "\n" if(true == verbose?) puts "#{row_data}" end else f << "\n" if(true == verbose?) puts "\n" end end end end # Strip empty data from end of lines clean_csv(outfile) # Close Excel xl_app.close_workbook end