class Muzzy::FiletypeDetector

read file content and detect filetype is csv or tsv or others

Attributes

filepath[R]
filetype[R]
first_row[R]
second_row[R]

Public Class Methods

new(filepath) click to toggle source
# File lib/muzzy/filetype_detector.rb, line 8
def initialize(filepath)
  @filepath = filepath
end

Public Instance Methods

csv?() click to toggle source
# File lib/muzzy/filetype_detector.rb, line 16
def csv?
  detect || @filetype == 'csv'
end
tsv?() click to toggle source
# File lib/muzzy/filetype_detector.rb, line 12
def tsv?
  detect || @filetype == 'tsv'
end
unknown?() click to toggle source
# File lib/muzzy/filetype_detector.rb, line 20
def unknown?
  detect || @filetype == 'unknown'
end

Private Instance Methods

detect() click to toggle source
# File lib/muzzy/filetype_detector.rb, line 30
def detect
  return unless @filetype.nil?

  if tsv_ext?
    @first_row, @second_row = Muzzy::Util.fetch_header_and_first_row(@filepath, "\t")
    @filetype = 'tsv'
    return
  end

  ## csv(,) or csv(\t) or something

  csv_header_row, csv_first_row = Muzzy::Util.fetch_header_and_first_row(@filepath, ",")
  tsv_header_row, tsv_first_row = Muzzy::Util.fetch_header_and_first_row(@filepath, "\t")
  if csv_header_row == -1 && tsv_header_row == -1
    @first_row, @second_row, @filetype = -1, -1, 'unknown'
    return
  end

  if csv_header_row == -1
    @first_row, @second_row, @filetype = tsv_header_row, tsv_first_row, 'tsv'
    return
  end
  if tsv_header_row == -1
    @first_row, @second_row, @filetype = csv_header_row, csv_first_row, 'csv'
    return
  end

  ## rare case

  if csv_header_row.length > tsv_header_row.length
    @first_row, @second_row, @filetype = csv_header_row, csv_first_row, 'csv'
    return
  else
    @first_row, @second_row, @filetype = tsv_header_row, tsv_first_row, 'tsv'
    return
  end

  if csv_header_row.length == 1 && tsv_first_row.length == 1
    # single col file treat as csv
    @first_row, @second_row, @filetype = csv_header_row, csv_first_row, 'csv'
    return
  end

  @first_row, @second_row, @filetype = -1, -1, 'unknown'
end
tsv_ext?() click to toggle source
# File lib/muzzy/filetype_detector.rb, line 26
def tsv_ext?
  File.basename(@filepath) =~ /\.tsv\z/
end