class Alexandria::PseudoMarcParser

A really simple regex-based parser to grab data out of marc text records.

Constants

BNF_FR_MAPPINGS
USMARC_MAPPINGS

Public Class Methods

get_fields(data, type, stripping, mappings = USMARC_MAPPINGS) click to toggle source
# File lib/alexandria/pseudo_marc_parser.rb, line 44
def self.get_fields(data, type, stripping, mappings = USMARC_MAPPINGS)
  field = ""
  mappings[type][1..mappings[type].length - 1].each do |part|
    if data.first[part]
      part_data = data.first[part].strip
      if part_data =~ stripping
        part_data = Regexp.last_match[1]
        part_data = part_data.strip
      end
      field += ": " if field != ""
      field += part_data
    end
  end
  field = nil if field == ""
  field
end
marc_text_to_book(marc, mappings = USMARC_MAPPINGS) click to toggle source
# File lib/alexandria/pseudo_marc_parser.rb, line 61
def self.marc_text_to_book(marc, mappings = USMARC_MAPPINGS)
  details = marc_text_to_details(marc)
  return if details.empty?

  title = nil
  title_data = details[mappings[:title][0]]
  if title_data
    title_data_all = get_fields(title_data, :title, %r{(.*)[/:]$}, mappings)
    title = title_data_all if title_data_all
  end

  authors = []
  author_data = details[mappings[:authors][0]]
  author_data&.each do |ad|
    author = ad[mappings[:authors][1]]
    if author
      author = author.strip
      author = Regexp.last_match[1] if author =~ /(.*),$/
      authors << author
    end
  end

  isbn = nil
  binding = nil
  isbn_data = details[mappings[:isbn][0]]
  if isbn_data && isbn_data.first[mappings[:isbn][1]] =~ /([-0-9xX]+)/
    isbn = Regexp.last_match[1]
  end

  binding_data = details[mappings[:binding][0]]
  if binding_data &&
      binding_data.first[mappings[:binding][1]] =~ /([a-zA-Z][a-z\s]+[a-z])/
    binding = Regexp.last_match[1]
  end

  publisher = nil
  publisher_data = details[mappings[:publisher][0]]
  publisher = publisher_data.first[mappings[:publisher][1]] if publisher_data

  year = nil
  publication_data = details[mappings[:year][0]]
  if publication_data
    year = publication_data.first[mappings[:year][1]]
    year = Regexp.last_match[1].to_i if year =~ /(\d+)/
  end

  notes = ""
  notes_data = details[mappings[:notes][0]]
  notes_data&.each do |note|
    txt = note[mappings[:notes][1]]
    notes += txt if txt
  end

  if title.nil? && isbn.nil?
    # probably didn't undertand the MARC dialect
    return nil
  end

  book = Alexandria::Book.new(title, authors, isbn,
                              publisher, year, binding)
  book.notes = notes unless notes.empty?
  book
end
marc_text_to_details(marc) click to toggle source
# File lib/alexandria/pseudo_marc_parser.rb, line 125
def self.marc_text_to_details(marc)
  details = {}
  marc&.each_line do |line|
    if line =~ /(\d+)\s*(.+)/
      code = Regexp.last_match[1]
      data = Regexp.last_match[2]

      this_line_data = {}

      d_idx = 0
      while d_idx < data.size
        d_str = data[d_idx..]
        idx = d_str =~ /\$([a-z]) ([^$]+)/
        break unless idx

        sub_code = Regexp.last_match[1]
        sub_data = Regexp.last_match[2]
        this_line_data[sub_code] = sub_data
        d_idx += idx + 2 # (2 extra to push beyond this '$a' etc.)
      end

      unless this_line_data.empty?
        details[code] = [] unless details.key?(code)
        details[code] << this_line_data
      end
    end
  end
  details
end