class HashReader

fix: move into TextUtils namespace/module!!

Public Class Methods

from_file( path ) click to toggle source
# File lib/textutils/reader/hash_reader.rb, line 37
def self.from_file( path )
  ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
  ## - see textutils/utils.rb
  text = File.read_utf8( path )
  self.from_string( text )
end
from_string( text ) click to toggle source
# File lib/textutils/reader/hash_reader.rb, line 44
def self.from_string( text )
  HashReader.new( text: text )
end
from_zip( zip_file, entry_path ) click to toggle source
# File lib/textutils/reader/hash_reader.rb, line 10
  def self.from_zip( zip_file, entry_path )
    entry = zip_file.find_entry( entry_path )

    ## todo/fix: add force encoding to utf-8 ??
    ##  check!!!
    ##  clean/prepprocess lines
    ##  e.g. CR/LF (/r/n) to LF (e.g. /n)
    text = entry.get_input_stream().read()

    ## NOTE: needs logger ref; only available in instance methods; use global logger for now
    logger = LogUtils::Logger.root
    logger.debug "text.encoding.name (before): #{text.encoding.name}"
#####
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
## NB:
# for now "hardcoded" to utf8 - what else can we do?
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
    text = text.force_encoding( Encoding::UTF_8 )
    logger.debug "text.encoding.name (after): #{text.encoding.name}"     

    ## todo:
    # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
    ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )

    self.from_string( text )
  end
new( arg ) click to toggle source
# File lib/textutils/reader/hash_reader.rb, line 48
def initialize( arg )

  if arg.is_a?( String )  ## old style (deprecated) - pass in filepath as string
    path = arg
    logger.info "HashReader.new - deprecated API - use HashReader.from_file() instead"
    text = File.read_utf8( path )
  else   ## assume it's a hash
    opts = arg
    text = opts[:text]
  end

  ### hack for syck yaml parser (e.g.ruby 1.9.2) (cannot handle !!null)
  ##   change it to !null to get plain nil
  ##   w/ both syck and psych/libyml

  text = text.gsub( '!!null', '!null' )
 
  ### hacks for yaml
  
  ### see yaml gotschas
  ##  - http://www.perlmonks.org/?node_id=738671
  ##  -

  ## replace all tabs w/ two spaces and issue a warning
  ## nb: yaml does NOT support tabs see why here -> yaml.org/faq.html
  
  text = text.gsub( "\t" ) do |_|
    logger.warn "hash reader - found tab (\t) replacing w/ two spaces; yaml forbids tabs; see yaml.org/faq.html (path=#{path})"
    '  '  # replace w/ two spaces
  end

  ## quote implicit boolean types on,no,n,y

  ## nb: escape only if key e.g. no: or "free standing" value on its own line e.g.
  ##   no: no

  text = text.gsub( /^([ ]*)(ON|On|on|OFF|Off|off|YES|Yes|yes|NO|No|no|Y|y|N|n)[ ]*:/ ) do |value|
    logger.warn "hash reader - found implicit bool (#{$1}#{$2}) for key; adding quotes to turn into string; see yaml.org/refcard.html (path=#{path})"
    # nb: preserve leading spaces for structure - might be significant
    "#{$1}'#{$2}':"  # add quotes to turn it into a string (not bool e.g. true|false)
  end

  ## nb: value must be freestanding (only allow optional eol comment)
  ##  do not escape if part of string sequence e.g.
  ##  key: nb,nn,no,se   => nb,nn,'no',se  -- avoid!!
  #
  #  check: need we add true|false too???

  text = text.gsub( /:[ ]+(ON|On|on|OFF|Off|off|YES|Yes|yes|NO|No|no|Y|y|N|n)[ ]*($| #.*$)/ ) do |value|
    logger.warn "hash reader - found implicit bool (#{$1}) for value; adding quotes to turn into string; see yaml.org/refcard.html (path=#{path})"
    ": '#{$1}'"  # add quotes to turn it into a string (not bool e.g. true|false)
  end

  
  @hash = YAML.load( text )
end

Public Instance Methods

each() { |key, value| ... } click to toggle source

nb: returns all values as strings

# File lib/textutils/reader/hash_reader.rb, line 109
def each
  @hash.each do |key_wild, value_wild|
    # normalize
    # - key n value as string (not symbols, bool? int? array?)
    # - remove leading and trailing whitespace
    key   = key_wild.to_s.strip
    value = value_wild.to_s.strip
    
    logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value}<<"
  
    yield( key, value )
  end
end
each_typed() { |key, value| ... } click to toggle source

todo: what name to use: each_object or each_typed ???

or use new TypedHashReader class or similar??
# File lib/textutils/reader/hash_reader.rb, line 127
def each_typed
  @hash.each do |key_wild, value_wild|
    # normalize
    # - key n value as string (not symbols, bool? int? array?)
    # - remove leading and trailing whitespace
    key   = key_wild.to_s.strip
    
    if value_wild.is_a?( String )
      value = value_wild.strip
    else
      value = value_wild
    end
    
    logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value}<<"
  
    yield( key, value )
  end
end