class LineReader
Public Class Methods
from_file( path )
click to toggle source
# File lib/textutils/reader/line_reader.rb, line 58 def self.from_file( path ) ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark) ## - see textutils/utils.rb text = File.read_utf8( path ) self.from_string( text ) end
from_string( text )
click to toggle source
# File lib/textutils/reader/line_reader.rb, line 65 def self.from_string( text ) LineReader.new( text: text ) end
from_zip( zip_file, entry_path )
click to toggle source
# File lib/textutils/reader/line_reader.rb, line 31 def self.from_zip( zip_file, entry_path ) entry = zip_file.find_entry( entry_path ) ## todo/fix: add force encoding to utf-8 ?? ## check!!! ## clean/prepprocess lines ## e.g. CR/LF (/r/n) to LF (e.g. /n) text = entry.get_input_stream().read() ## NOTE: needs logger ref; only available in instance methods; use global logger for now logger = LogUtils::Logger.root logger.debug "text.encoding.name (before): #{text.encoding.name}" ##### # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here ## NB: # for now "hardcoded" to utf8 - what else can we do? # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation text = text.force_encoding( Encoding::UTF_8 ) logger.debug "text.encoding.name (after): #{text.encoding.name}" ## todo: # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path ) self.from_string( text ) end
new( arg )
click to toggle source
# File lib/textutils/reader/line_reader.rb, line 70 def initialize( arg ) if arg.is_a?( String ) ## old style (deprecated) - pass in filepath as string path = arg logger.info "LineReader.new - deprecated API - use LineReader.from_file() instead" @text = File.read_utf8( path ) else ## assume it's a hash opts = arg @text = opts[:text] end end
Public Instance Methods
each_line() { |line| ... }
click to toggle source
# File lib/textutils/reader/line_reader.rb, line 81 def each_line @text.each_line do |line| # comments allow: # 1) ##### (shell/ruby style) # 2) -- comment here (haskel/?? style) # 3) % comment here (tex/latex style) if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/ # skip komments and do NOT copy to result (keep comments secret!) logger.debug 'skipping comment line' next end if line =~ /^\s*$/ # kommentar oder leerzeile überspringen logger.debug 'skipping blank line' next end # pass 1) remove possible trailing eol comment ## e.g -> nyc, New York # Sample EOL Comment Here (with or without commas,,,,) ## becomes -> nyc, New York line = line.sub( /\s+#.+$/, '' ) # pass 2) remove leading and trailing whitespace line = line.strip yield( line ) end # each lines end