class Oddb2xml::SwissmedicInfoExtractor

Public Instance Methods

to_hash() click to toggle source
# File lib/oddb2xml/extractor.rb, line 377
def to_hash
  data = Hash.new { |h, k| h[k] = [] }
  return data unless @xml.size > 0
  result = MedicalInformationsContent.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
  result.medicalInformation.each do |pac|
    lang = pac.lang.to_s
    next unless /de|fr/.match?(lang)
    item = {}
    item[:refdata] = true
    item[:data_origin] = "swissmedic_info"
    item[:name] = (name = pac.title) ? name : ""
    item[:owner] = (ownr = pac.authHolder) ? ownr : ""
    item[:style] = Nokogiri::HTML.fragment(pac.style).to_html(encoding: "UTF-8")
    html = Nokogiri::HTML.fragment(pac.content.force_encoding("UTF-8"))
    item[:paragraph] = html
    numbers = /(\d{5})[,\s]*(\d{5})?|(\d{5})[,\s]*(\d{5})?[,\s]*(\d{5})?/.match(html)
    if numbers
      [$1, $2, $3].compact.each do |n| # plural
        item[:monid] = n
        data[lang] << item
      end
    end
  end
  data
end