module Metanorma::Standoc::Cleanup
Constants
- ABBRnoSYM
- ELEMS_ALLOW_NOTES
- IDREF
- IGNORE_DUMBQUOTES
- ISO_PUBLISHER_XPATH
- LOCALITY_RE
- LOCALITY_REGEX_STR
extending localities to cover ISO referencing
- MATHML_NS
- NORM_REF
- NO_SYMABBR
- REQRECPER
- SECTION_CONTAINERS
- SYMABBR
- SYMnoABBR
- TERMDEF_BLOCKS
- TERM_CLAUSE
- TEXT_ELEMS
- UNITSML_NS
Public Instance Methods
# File lib/metanorma/standoc/cleanup_maths.rb, line 83 def add_misc_container(xmldoc) unless ins = xmldoc.at("//misc-container") a = xmldoc.at("//termdocsource") || xmldoc.at("//bibdata") a.next = "<misc-container/>" ins = xmldoc.at("//misc-container") end ins end
# File lib/metanorma/standoc/cleanup_ref_dl.rb, line 74 def add_to_hash(bib, key, val) Metanorma::Utils::set_nested_value(bib, key.split("."), val) end
# File lib/metanorma/standoc/cleanup_block.rb, line 110 def align_callouts_to_annotations(xmldoc) xmldoc.xpath("//sourcecode").each do |x| callouts = x.elements.select { |e| e.name == "callout" } annotations = x.elements.select { |e| e.name == "annotation" } callouts.size == annotations.size and link_callouts_to_annotations(callouts, annotations) end end
# File lib/metanorma/standoc/cleanup_terms.rb, line 56 def alternate_termdefinitions(xmldoc) xmldoc.xpath("//term").each do |t| t.xpath("./definition").each do |d| d1 = d.next_element or next if (v = d.at("./verbal-definition")) && !d.at("./non-verbal-representation") && !d1.at("./verbal-definition") && nv = d1.at("./non-verbal-representation") v.next = nv.remove d1.remove end end end end
# File lib/metanorma/standoc/cleanup_inline.rb, line 126 def anchor_cleanup(elem) anchor_cleanup1(elem) xreftarget_cleanup(elem) contenthash_id_cleanup(elem) end
# File lib/metanorma/standoc/cleanup_inline.rb, line 132 def anchor_cleanup1(elem) elem.xpath(IDREF).each do |s| if (ret = Metanorma::Utils::to_ncname(s.value)) != (orig = s.value) s.value = ret output = s.parent.dup output.children.remove @log.add("Anchors", s.parent, "normalised identifier in #{output} from #{orig}") end end end
# File lib/metanorma/standoc/cleanup_maths.rb, line 19 def asciimath2mathml(text) text = text.gsub(%r{<stem type="AsciiMath">(.+?)</stem>}m) do "<amathstem>#{HTMLEntities.new.decode($1)}</amathstem>" end text = Html2Doc.asciimath_to_mathml(text, ["<amathstem>", "</amathstem>"]) x = Nokogiri::XML(text) x.xpath("//*[local-name() = 'math'][not(parent::stem)]").each do |y| y.wrap("<stem type='MathML'></stem>") end x.to_xml end
# File lib/metanorma/standoc/cleanup_maths.rb, line 113 def asciimath2unitsml_options { multiplier: :space } end
# File lib/metanorma/standoc/cleanup_section.rb, line 45 def bibabstract_location(xml) xml.at("//bibdata/script") || xml.at("//bibdata/language") || xml.at("//bibdata/contributor[not(following-sibling::contributor)]") || xml.at("//bibdata/date[not(following-sibling::date)]") || xml.at("//docnumber") || xml.at("//bibdata/docidentifier"\ "[not(following-sibling::docidentifier)]") || xml.at("//bibdata/uri[not(following-sibling::uri)]") || xml.at("//bibdata/title[not(following-sibling::title)]") end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 139 def bibdata_anchor_cleanup(xmldoc) xmldoc.xpath("//bibdata//bibitem | //bibdata//note").each do |b| b.delete("id") end end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 133 def bibdata_cleanup(xmldoc) bibdata_anchor_cleanup(xmldoc) bibdata_docidentifier_cleanup(xmldoc) biblio_indirect_erefs(xmldoc, @internal_eref_namespaces&.uniq) end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 145 def bibdata_docidentifier_cleanup(xmldoc) ins = xmldoc.at("//bibdata/docidentifier") xmldoc.xpath("//bibdata/docidentifier").each_with_index do |b, i| next if i.zero? ins.next = b.remove ins = ins.next end end
# File lib/metanorma/standoc/cleanup_ref.rb, line 163 def bibitem_cleanup(xmldoc) bibitem_nested_id(xmldoc) ref_dl_cleanup(xmldoc) fetch_local_bibitem(xmldoc) end
# File lib/metanorma/standoc/cleanup_ref.rb, line 157 def bibitem_nested_id(xmldoc) xmldoc.xpath("//bibitem//bibitem").each do |b| b.delete("id") end end
# File lib/metanorma/standoc/cleanup_ref.rb, line 83 def biblio_cleanup(xmldoc) biblio_reorder(xmldoc) biblio_nested(xmldoc) biblio_renumber(xmldoc) biblio_no_ext(xmldoc) end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 203 def biblio_indirect_erefs(xmldoc, prefixes) prefixes&.each do |prefix| refs = gather_indirect_erefs(xmldoc, prefix) refs = resolve_local_indirect_erefs(xmldoc, refs, prefix) refs.empty? and next insert_indirect_biblio(xmldoc, refs, prefix) end end
# File lib/metanorma/standoc/cleanup_ref.rb, line 94 def biblio_nested(xmldoc) xmldoc.xpath("//references[references]").each do |t| t.name = "clause" t.xpath("./references").each { |r| r["normative"] = t["normative"] } t.delete("normative") end end
# File lib/metanorma/standoc/cleanup_ref.rb, line 90 def biblio_no_ext(xmldoc) xmldoc.xpath("//bibitem/ext").each(&:remove) end
default presuppose that all citations in biblio numbered consecutively, but that standards codes are preserved as is: only numeric references are renumbered
# File lib/metanorma/standoc/cleanup_ref.rb, line 51 def biblio_renumber(xmldoc) i = 0 xmldoc.xpath("//bibliography//references | //clause//references | "\ "//annex//references").each do |r| next if r["normative"] == "true" r.xpath("./bibitem[not(@hidden = 'true')]").each do |b| i += 1 next unless docid = b.at("./docidentifier[@type = 'metanorma']") next unless /^\[\d+\]$/.match?(docid.text) docid.children = "[#{i}]" end end end
# File lib/metanorma/standoc/cleanup_ref.rb, line 7 def biblio_reorder(xmldoc) xmldoc.xpath("//references[@normative = 'false']").each do |r| biblio_reorder1(r) end end
# File lib/metanorma/standoc/cleanup_ref.rb, line 13 def biblio_reorder1(refs) fold_notes_into_biblio(refs) bib = sort_biblio(refs.xpath("./bibitem")) insert = refs&.at("./bibitem")&.previous_element refs.xpath("./bibitem").each(&:remove) bib.reverse.each do |b| insert and insert.next = b.to_xml or refs.children.first.add_previous_sibling b.to_xml end extract_notes_from_biblio(refs) refs.xpath("./references").each { |r| biblio_reorder1(r) } end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 123 def boilerplate(xml, conv) file = boilerplate_file(xml) if @boilerplateauthority file = File.join(@localdir, @boilerplateauthority) end (!file.nil? and File.exists?(file)) or return conv.populate_template(File.read(file, encoding: "UTF-8"), nil) end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 101 def boilerplate_cleanup(xmldoc) isodoc = boilerplate_isodoc(xmldoc) termdef_boilerplate_cleanup(xmldoc) termdef_boilerplate_insert(xmldoc, isodoc) unwrap_boilerplate_clauses(xmldoc, self.class::TERM_CLAUSE) f = xmldoc.at(self.class::NORM_REF) and norm_ref_preface(f) initial_boilerplate(xmldoc, isodoc) end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 119 def boilerplate_file(_xmldoc) File.join(@libdir, "boilerplate.xml") end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 63 def boilerplate_isodoc(xmldoc) x = xmldoc.dup x.root.add_namespace(nil, self.class::XML_NAMESPACE) xml = Nokogiri::XML(x.to_xml) @isodoc ||= isodoc(@lang, @script) @isodoc.info(xml, nil) @isodoc end
# File lib/metanorma/standoc/cleanup_inline.rb, line 25 def bookmark_cleanup(xmldoc) li_bookmark_cleanup(xmldoc) dt_bookmark_cleanup(xmldoc) end
# File lib/metanorma/standoc/cleanup_inline.rb, line 30 def bookmark_to_id(elem, bookmark) parent = bookmark.parent elem["id"] = bookmark.remove["id"] strip_initial_space(parent) end
allows us to deal with doc relation localities, temporarily stashed to “bpart”
# File lib/metanorma/standoc/cleanup.rb, line 106 def bpart_cleanup(xmldoc) xmldoc.xpath("//relation/bpart").each do |x| extract_localities(x) x.replace(x.children) end end
# File lib/metanorma/standoc/cleanup_block.rb, line 127 def callout_cleanup(xmldoc) merge_annotations_into_sourcecode(xmldoc) align_callouts_to_annotations(xmldoc) end
# File lib/metanorma/standoc/cleanup_amend.rb, line 4 def change_clauses(x) x.xpath("//clause[@change]").each do |c| a = create_amend(c) end end
# File lib/metanorma/standoc/cleanup_section.rb, line 153 def clausebefore_cleanup(xmldoc) preface_clausebefore_cleanup(xmldoc) sections_clausebefore_cleanup(xmldoc) end
# File lib/metanorma/standoc/cleanup.rb, line 26 def cleanup(xmldoc) element_name_cleanup(xmldoc) sections_cleanup(xmldoc) obligations_cleanup(xmldoc) table_cleanup(xmldoc) formula_cleanup(xmldoc) form_cleanup(xmldoc) sourcecode_cleanup(xmldoc) figure_cleanup(xmldoc) element_name_cleanup(xmldoc) ref_cleanup(xmldoc) note_cleanup(xmldoc) clausebefore_cleanup(xmldoc) floatingtitle_cleanup(xmldoc) bibitem_cleanup(xmldoc) normref_cleanup(xmldoc) biblio_cleanup(xmldoc) reference_names(xmldoc) symbols_cleanup(xmldoc) xref_cleanup(xmldoc) concept_cleanup(xmldoc) related_cleanup(xmldoc) origin_cleanup(xmldoc) bookmark_cleanup(xmldoc) termdef_cleanup(xmldoc) RelatonIev::iev_cleanup(xmldoc, @bibdb) element_name_cleanup(xmldoc) index_cleanup(xmldoc) bpart_cleanup(xmldoc) quotesource_cleanup(xmldoc) callout_cleanup(xmldoc) footnote_cleanup(xmldoc) mathml_cleanup(xmldoc) script_cleanup(xmldoc) docidentifier_cleanup(xmldoc) requirement_cleanup(xmldoc) bibdata_cleanup(xmldoc) svgmap_cleanup(xmldoc) boilerplate_cleanup(xmldoc) toc_cleanup(xmldoc) metadata_cleanup(xmldoc) smartquotes_cleanup(xmldoc) variant_cleanup(xmldoc) para_cleanup(xmldoc) empty_element_cleanup(xmldoc) img_cleanup(xmldoc) anchor_cleanup(xmldoc) xmldoc end
# File lib/metanorma/standoc/cleanup_inline.rb, line 59 def concept_cleanup(xmldoc) xmldoc.xpath("//concept[not(termxref)]").each do |x| term = x.at("./refterm") term&.remove if term&.text&.empty? concept_cleanup1(x) end end
# File lib/metanorma/standoc/cleanup_inline.rb, line 67 def concept_cleanup1(elem) elem.children.remove if elem&.children&.text&.strip&.empty? key_extract_locality(elem) if /:/.match?(elem["key"]) then concept_termbase_cleanup(elem) elsif refid? elem["key"] then concept_eref_cleanup(elem) else concept_xref_cleanup(elem) end elem.delete("key") end
# File lib/metanorma/standoc/cleanup_inline.rb, line 105 def concept_eref_cleanup(elem) t = elem&.at("./xrefrender")&.remove&.children&.to_xml l = elem&.at("./locality")&.remove&.children&.to_xml elem.add_child "<eref bibitemid='#{elem['key']}'>#{l}</eref>" extract_localities(elem.elements[-1]) elem.elements[-1].add_child(t) if t end
# File lib/metanorma/standoc/cleanup_inline.rb, line 93 def concept_termbase_cleanup(elem) t = elem&.at("./xrefrender")&.remove&.children termbase, key = elem["key"].split(/:/, 2) elem.add_child(%(<termref base="#{termbase}" target="#{key}">) + "#{t&.to_xml}</termref>") end
# File lib/metanorma/standoc/cleanup_inline.rb, line 100 def concept_xref_cleanup(elem) t = elem&.at("./xrefrender")&.remove&.children elem.add_child(%(<xref target="#{elem['key']}">#{t&.to_xml}</xref>)) end
# File lib/metanorma/standoc/cleanup_inline.rb, line 184 def contenthash(elem) Digest::MD5.hexdigest("#{elem.path}////#{elem.text}") .sub(/^(.{8})(.{4})(.{4})(.{4})(.{12})$/, "_\\1-\\2-\\3-\\4-\\5") end
# File lib/metanorma/standoc/cleanup_inline.rb, line 161 def contenthash_id_cleanup(doc) ids = contenthash_id_make(doc) contenthash_id_update_refs(doc, ids) end
# File lib/metanorma/standoc/cleanup_inline.rb, line 166 def contenthash_id_make(doc) doc.xpath("//*[@id]").each_with_object({}) do |x, m| next unless guid?(x["id"]) m[x["id"]] = contenthash(x) x["id"] = m[x["id"]] end end
# File lib/metanorma/standoc/cleanup_inline.rb, line 175 def contenthash_id_update_refs(doc, ids) [%w(review from), %w(review to), %w(callout target), %w(eref bibitemid), %w(citation bibitemid), %w(xref target), %w(xref to)].each do |a| doc.xpath("//#{a[0]}").each do |x| ids[x[a[1]]] and x[a[1]] = ids[x[a[1]]] end end end
# File lib/metanorma/standoc/cleanup_amend.rb, line 10 def create_amend(c) a = c.add_child("<amend id='_#{UUIDTools::UUID.random_create}'/>").first c.elements.each do |e| e.parent = a unless %w(amend title).include? e.name end create_amend1(c, a) end
# File lib/metanorma/standoc/cleanup_amend.rb, line 18 def create_amend1(c, a) create_amend2(c, a) d = a.at("./description") d.xpath(".//autonumber").each { |e| d.previous = e } d.xpath(".//p[normalize-space(.)='']").each { |e| e.remove } move_attrs_to_amend(c, a) a end
# File lib/metanorma/standoc/cleanup_amend.rb, line 27 def create_amend2(c, a) q = a.at("./quote") and q.name = "newcontent" if q.nil? a.children = "<description>#{a.children.to_xml}</description>" else pre = q&.xpath("./preceding-sibling::*")&.remove post = q&.xpath("./following-sibling::*")&.remove pre.empty? or a << "<description>#{pre.to_xml}</description>" a << q.remove post.empty? or a << "<description>#{post.to_xml}</description>" end end
# File lib/metanorma/standoc/cleanup_ref_dl.rb, line 59 def dd_bib_extract(dtd) return nil if dtd.children.empty? dtd.at("./dl") and return dl_bib_extract(dtd) elems = dtd.remove.elements return p_unwrap(dtd) unless elems.size == 1 && %w(ol ul).include?(elems[0].name) ret = [] elems[0].xpath("./li").each do |li| ret << p_unwrap(li) end ret end
# File lib/metanorma/standoc/cleanup_table.rb, line 4 def dl1_table_cleanup(xmldoc) q = "//table/following-sibling::*[1][self::dl]" xmldoc.xpath(q).each do |s| s["key"] == "true" and s.previous_element << s.remove end end
move Key dl after table footer
# File lib/metanorma/standoc/cleanup_table.rb, line 12 def dl2_table_cleanup(xmldoc) q = "//table/following-sibling::*[1][self::p]" xmldoc.xpath(q).each do |s| if s.text =~ /^\s*key[^a-z]*$/i && s&.next_element&.name == "dl" s.next_element["key"] = "true" s.previous_element << s.next_element.remove s.remove end end end
definition list, with at most one level of unordered lists
# File lib/metanorma/standoc/cleanup_ref_dl.rb, line 79 def dl_bib_extract(c, nested = false) dl = c.at("./dl") or return bib = {} key = "" dl.xpath("./dt | ./dd").each do |dtd| (dtd.name == "dt" and key = dtd.text.sub(/:+$/, "")) or add_to_hash(bib, key, dd_bib_extract(dtd)) end c.xpath("./clause").each do |c1| key = c1&.at("./title")&.text&.downcase&.strip next unless %w(contributor relation series).include? key add_to_hash(bib, key, dl_bib_extract(c1, true)) end if !nested && c.at("./title") title = c.at("./title").remove.children.to_xml bib["title"] = [bib["title"]] if bib["title"].is_a? Hash bib["title"] = [bib["title"]] if bib["title"].is_a? String bib["title"] = [] unless bib["title"] bib["title"] << title if !title.empty? end bib end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 136 def dl_to_designation(dlist) prev = dlist.previous_element unless %w(preferred admitted deprecates related).include? prev&.name @log.add("AsciiDoc Input", dlist, "Metadata definition list does "\ "not follow a term designation") return nil end prev end
# File lib/metanorma/standoc/cleanup.rb, line 76 def docidentifier_cleanup(xmldoc); end
# File lib/metanorma/standoc/cleanup_inline.rb, line 46 def dt_bookmark_cleanup(xmldoc) xmldoc.xpath("//dt[descendant::bookmark]").each do |x| if x.at("./*[1][local-name() = 'p']/"\ "*[1][local-name() = 'bookmark']") && empty_text_before_first_element(x.elements[0]) bookmark_to_id(x, x.elements[0].elements[0]) elsif x.at("./*[1][local-name() = 'bookmark']") && empty_text_before_first_element(x) bookmark_to_id(x, x.elements[0]) end end end
# File lib/metanorma/standoc/cleanup_text.rb, line 52 def dumb2smart_quotes(xmldoc) (xmldoc.xpath("//*[child::text()]") - xmldoc.xpath(IGNORE_DUMBQUOTES)) .each do |x| x.children.each do |n| next unless n.text? /[-'"(<>]|\.\.|\dx/.match(n) or next n.replace(Metanorma::Utils::smartformat(n.text)) end end end
# File lib/metanorma/standoc/cleanup_text.rb, line 65 def dumbquote_cleanup(xmldoc) xmldoc.traverse do |n| next unless n.text? n.replace(n.text.gsub(/(?<=\p{Alnum})\u2019(?=\p{Alpha})/, "'")) # . end end
# File lib/metanorma/standoc/cleanup.rb, line 100 def element_name_cleanup(xmldoc) xmldoc.traverse { |n| n.name = n.name.gsub(/_/, "-") } end
# File lib/metanorma/standoc/cleanup.rb, line 92 def empty_element_cleanup(xmldoc) xmldoc.xpath("//#{TEXT_ELEMS.join(' | //')}").each do |x| next if x.name == "name" && x.parent.name == "expression" x.remove if x.children.empty? end end
# File lib/metanorma/standoc/cleanup_inline.rb, line 7 def empty_text_before_first_element(elem) elem.children.each do |c| return false if c.text? && /\S/.match(c.text) return true if c.element? end true end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 4 def external_terms_boilerplate(sources) @i18n.l10n( @i18n.external_terms_boilerplate.gsub(/%/, sources || "???"), @lang, @script ) end
# File lib/metanorma/standoc/cleanup_ref_dl.rb, line 34 def extract_from_p(tag, bib, key) return unless bib[tag] "<#{key}>#{bib[tag].at('p').children}</#{key}>" end
# File lib/metanorma/standoc/cleanup_xref.rb, line 21 def extract_localities(elem) f = elem&.children&.first or return f.text? or return head = f.remove.text tail = elem&.children&.remove extract_localities1(elem, head) tail and elem << tail end
# File lib/metanorma/standoc/cleanup_xref.rb, line 30 def extract_localities1(elem, text) b = elem.add_child("<localityStack/>").first if LOCALITY_RE.match text while (m = LOCALITY_RE.match text) ref = m[:ref] ? "<referenceFrom>#{tq m[:ref]}</referenceFrom>" : "" refto = m[:to] ? "<referenceTo>#{tq m[:to]}</referenceTo>" : "" b.add_child("<locality type='#{locality_label(m)}'>#{ref}#{refto}"\ "</locality>") text = m[:text] b = elem.add_child("<localityStack/>").first if m[:punct] == ";" end elem.add_child(text) if text end
# File lib/metanorma/standoc/cleanup_ref.rb, line 35 def extract_notes_from_biblio(refs) refs.xpath("./bibitem").each do |r| r.xpath("./note[@appended]").reverse.each do |n| n.delete("appended") r.next = n end end end
# File lib/metanorma/standoc/cleanup_symbols.rb, line 26 def extract_symbols_list(dlist) dl_out = [] dlist.xpath("./dt | ./dd").each do |dtd| if dtd.name == "dt" dl_out << { dt: dtd.remove, key: symbol_key(dtd) } else dl_out.last[:dd] = dtd.remove end end dl_out end
if citation uri points to local file, get bibitem from it
# File lib/metanorma/standoc/cleanup_ref.rb, line 147 def fetch_local_bibitem(xmldoc) xmldoc.xpath("//bibitem[formattedref][uri[@type = 'citation']]") .each do |b| uri = b&.at("./uri[@type = 'citation']")&.text bibitem = read_local_bibitem(uri) or next bibitem["id"] = b["id"] b.replace(bibitem) end end
# File lib/metanorma/standoc/cleanup_ref.rb, line 125 def fetch_termbase(_termbase, _id) "" end
# File lib/metanorma/standoc/cleanup_block.rb, line 76 def figure_cleanup(xmldoc) figure_footnote_cleanup(xmldoc) figure_dl_cleanup1(xmldoc) figure_dl_cleanup2(xmldoc) subfigure_cleanup(xmldoc) end
# File lib/metanorma/standoc/cleanup_block.rb, line 46 def figure_dl_cleanup1(xmldoc) q = "//figure/following-sibling::*[self::dl]" xmldoc.xpath(q).each do |s| s["key"] == "true" and s.previous_element << s.remove end end
include key definition list inside figure
# File lib/metanorma/standoc/cleanup_block.rb, line 54 def figure_dl_cleanup2(xmldoc) q = "//figure/following-sibling::*[self::p]" xmldoc.xpath(q).each do |s| if s.text =~ /^\s*key[^a-z]*$/i && s&.next_element&.name == "dl" s.next_element["key"] = "true" s.previous_element << s.next_element.remove s.remove end end end
include footnotes inside figure if they are the only content of the paras following
# File lib/metanorma/standoc/cleanup_footnotes.rb, line 15 def figure_footnote_cleanup(xmldoc) nomatches = false until nomatches q = "//figure/following-sibling::*[1][self::p and *[1][self::fn]]" nomatches = true xmldoc.xpath(q).each do |s| next if s.children.map { |c| c.text? && /[[:alpha:]]/.match(c.text) }.any? s.previous_element << s.first_element_child.remove s.remove nomatches = false end end end
# File lib/metanorma/standoc/cleanup_section.rb, line 203 def floating_title_preface2sections(xmldoc) t = xmldoc.at("//preface/floating-title") or return s = xmldoc.at("//sections") unless t.next_element s.children.first.previous = t.remove end end
# File lib/metanorma/standoc/cleanup_section.rb, line 184 def floatingtitle_cleanup(xmldoc) pop_floating_title(xmldoc) floating_title_preface2sections(xmldoc) end
# File lib/metanorma/standoc/cleanup_ref.rb, line 26 def fold_notes_into_biblio(refs) refs.xpath("./bibitem").each do |r| while r&.next_element&.name == "note" r.next_element["appended"] = true r << r.next_element.remove end end end
# File lib/metanorma/standoc/cleanup_footnotes.rb, line 85 def footnote_block_cleanup(xmldoc) xmldoc.xpath("//footnoteblock").each do |f| f.name = 'fn' if id = xmldoc.at("//*[@id = '#{f.text}']") f.children = id.remove.children else @log.add("Crossreferences", f, "Could not resolve footnoteblock:[#{f.text}]") f.children = "[ERROR]" end end end
# File lib/metanorma/standoc/cleanup_footnotes.rb, line 98 def footnote_cleanup(xmldoc) footnote_block_cleanup(xmldoc) title_footnote_move(xmldoc) table_footnote_renumber(xmldoc) other_footnote_renumber(xmldoc) xmldoc.xpath("//fn").each do |fn| fn.delete("table") end end
# File lib/metanorma/standoc/cleanup_footnotes.rb, line 8 def footnote_content(fn) c = fn.children.respond_to?(:to_xml) ? fn.children.to_xml : fn.children c.gsub(/ id="[^"]+"/, "") end
# File lib/metanorma/standoc/cleanup_block.rb, line 164 def form_cleanup(xmldoc) xmldoc.xpath("//select").each do |s| while s&.next_element&.name == "option" s << s.next_element end end end
# File lib/metanorma/standoc/cleanup_ref.rb, line 102 def format_ref(ref, type) return @isodoc.docid_prefix(type, ref) if type != "metanorma" return "[#{ref}]" if /^\d+$/.match(ref) && !/^\[.*\]$/.match(ref) ref end
include where definition list inside stem block
# File lib/metanorma/standoc/cleanup_block.rb, line 23 def formula_cleanup(formula) formula_cleanup_where1(formula) formula_cleanup_where2(formula) end
# File lib/metanorma/standoc/cleanup_block.rb, line 28 def formula_cleanup_where1(formula) q = "//formula/following-sibling::*[1][self::dl]" formula.xpath(q).each do |s| s["key"] == "true" and s.previous_element << s.remove end end
# File lib/metanorma/standoc/cleanup_block.rb, line 35 def formula_cleanup_where2(formula) q = "//formula/following-sibling::*[1][self::p]" formula.xpath(q).each do |s| if s.text =~ /^\s*where[^a-z]*$/i && s&.next_element&.name == "dl" s.next_element["key"] = "true" s.previous_element << s.next_element.remove s.remove end end end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 155 def gather_indirect_erefs(xmldoc, prefix) xmldoc.xpath("//eref[@type = '#{prefix}']") .each_with_object({}) do |e, m| e.delete("type") m[e["bibitemid"]] = true end.keys end
# File lib/metanorma/standoc/cleanup_maths.rb, line 102 def gather_unitsml(unitsml, xmldoc, tag) tags = xmldoc.xpath(".//m:#{tag}", "m" => UNITSML_NS) .each_with_object({}) do |x, m| m[x["id"]] = x.remove end return if tags.empty? set = unitsml.add_child("<#{tag}Set/>").first tags.each_value { |v| set << v } end
# File lib/metanorma/standoc/cleanup_terms.rb, line 33 def generate_termdefinitions(xmldoc) xmldoc.xpath("//term[not(definition)]").each do |d| first_child = d.at(TERMDEF_BLOCKS) || next t = Nokogiri::XML::Element.new("definition", xmldoc) first_child.replace(t) t << first_child.remove d.xpath(TERMDEF_BLOCKS).each do |n| t << n.remove end end end
# File lib/metanorma/standoc/cleanup_section_names.rb, line 4 def get_or_make_title(node) unless node.at("./title") if node.children.empty? node << "<title/>" else node.children.first.previous = "<title/>" end end node.at("./title") end
# File lib/metanorma/standoc/cleanup_symbols.rb, line 20 def grkletters(text) text.gsub(/\b(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa| lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi| psi|omega)\b/xi, "&\\1;") end
# File lib/metanorma/standoc/cleanup_image.rb, line 10 def guid?(str) /^_[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/i .match(str) end
# File lib/metanorma/standoc/cleanup_table.rb, line 34 def header_rows_cleanup(xmldoc) xmldoc.xpath("//table[@headerrows]").each do |s| thead = insert_thead(s) (thead.xpath("./tr").size...s["headerrows"].to_i).each do row = s.at("./tbody/tr") row.parent = thead end thead.xpath(".//td").each { |n| n.name = "th" } s.delete("headerrows") end end
# File lib/metanorma/standoc/cleanup_image.rb, line 61 def img_cleanup(xmldoc) return xmldoc unless @datauriimage xmldoc.xpath("//image").each do |i| i["src"] = Metanorma::Utils::datauri(i["src"], @localdir) end end
# File lib/metanorma/standoc/cleanup_terms.rb, line 118 def index_cleanup(xmldoc) return unless @index_terms xmldoc.xpath("//preferred").each do |p| index_cleanup1(p.at("./expression/name | ./letter-symbol/name"), p.xpath("./field-of-application | ./usage-info") &.map(&:text)&.join(", ")) end xmldoc.xpath("//definitions/dl/dt").each do |p| index_cleanup1(p, "") end end
# File lib/metanorma/standoc/cleanup_terms.rb, line 131 def index_cleanup1(term, fieldofappl) return unless term idx = term.children.dup fieldofappl.empty? or idx << ", <#{fieldofappl}>" term << "<index><primary>#{idx.to_xml}</primary></index>" end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 176 def indirect_eref_to_xref(eref, ident) loc = eref&.at("./localityStack[locality[@type = 'anchor']]") &.remove&.text || eref&.at("./locality[@type = 'anchor']")&.remove&.text || ident eref.name = "xref" eref.delete("bibitemid") eref.delete("citeas") eref["target"] = loc unless eref.document.at("//*[@id = '#{loc}']") eref.children = %(** Missing target #{loc}) eref["target"] = ident end end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 110 def initial_boilerplate(xml, isodoc) return if xml.at("//boilerplate") preface = xml.at("//preface") || xml.at("//sections") || xml.at("//annex") || xml.at("//references") || return b = boilerplate(xml, isodoc) or return preface.previous = b end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 28 def initial_formula(elem) elem.elements.size == 1 && # para contains just stem expression !elem.at("./preceding-sibling::p | ./preceding-sibling::dl | "\ "./preceding-sibling::ol | ./preceding-sibling::ul") end
# File lib/metanorma/standoc/cleanup_block.rb, line 16 def inject_id(xmldoc, path) xmldoc.xpath(path).each do |x| x["id"] ||= Metanorma::Utils::anchor_or_uuid end end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 163 def insert_indirect_biblio(xmldoc, refs, prefix) ins = xmldoc.at("bibliography") or xmldoc.root << "<bibliography/>" and ins = xmldoc.at("bibliography") ins = ins.add_child("<references hidden='true' normative='false'/>").first refs.each do |x| ins << <<~BIB <bibitem id="#{x}" type="internal"> <docidentifier type="repository">#{x.sub(/^#{prefix}_/, "#{prefix}/")}</docidentifier> </bibitem> BIB end end
# File lib/metanorma/standoc/cleanup_table.rb, line 23 def insert_thead(table) thead = table.at("./thead") return thead unless thead.nil? if tname = table.at("./name") thead = tname.add_next_sibling("<thead/>").first return thead end table.children.first.add_previous_sibling("<thead/>").first end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 11 def internal_external_terms_boilerplate(sources) @i18n.l10n( @i18n.internal_external_terms_boilerplate.gsub(/%/, sources || "??"), @lang, @script ) end
# File lib/metanorma/standoc/cleanup_inline.rb, line 86 def key_extract_locality(elem) return unless /,/.match?(elem["key"]) elem.add_child("<locality>#{elem['key'].sub(/^[^,]+,/, '')}</locality>") elem["key"] = elem["key"].sub(/,.*$/, "") end
# File lib/metanorma/standoc/cleanup_inline.rb, line 36 def li_bookmark_cleanup(xmldoc) xmldoc.xpath("//li[descendant::bookmark]").each do |x| if x.at("./*[1][local-name() = 'p']/"\ "*[1][local-name() = 'bookmark']") && empty_text_before_first_element(x.elements[0]) bookmark_to_id(x, x.elements[0].elements[0]) end end end
# File lib/metanorma/standoc/cleanup_block.rb, line 103 def link_callouts_to_annotations(callouts, annotations) callouts.each_with_index do |c, i| c["target"] = "_#{UUIDTools::UUID.random_create}" annotations[i]["id"] = c["target"] end end
# File lib/metanorma/standoc/cleanup_ref_dl.rb, line 51 def link_unwrap(para) elems = para.elements if elems.size == 1 && elems[0].name == "link" para.at("./link").replace(elems[0]["target"].strip) end para end
# File lib/metanorma/standoc/cleanup_xref.rb, line 43 def locality_label(match) loc = match[:locality] || match[:locality2] /^locality:/.match?(loc) ? loc : loc&.downcase end
# File lib/metanorma/standoc/cleanup_section.rb, line 29 def make_abstract(xml, sect) if xml.at("//abstract[not(ancestor::bibitem)]") preface = sect.at("//preface") || sect.add_previous_sibling("<preface/>").first abstract = xml.at("//abstract[not(ancestor::bibitem)]").remove preface.prepend_child abstract.remove bibabstract = bibabstract_location(xml) dupabstract = abstract.dup dupabstract.traverse { |n| n.remove_attribute("id") } dupabstract.remove_attribute("language") dupabstract.remove_attribute("script") dupabstract&.at("./title")&.remove bibabstract.next = dupabstract end end
# File lib/metanorma/standoc/cleanup_section.rb, line 80 def make_annexes(xml) xml.xpath("//*[@annex]").each do |y| y.delete("annex") next if y.name == "annex" || !y.ancestors("annex").empty? y.wrap("<annex/>") y.parent["id"] = "_#{UUIDTools::UUID.random_create}" y.parent["obligation"] = y["obligation"] y.parent["language"] = y["language"] y.parent["script"] = y["script"] end end
# File lib/metanorma/standoc/cleanup_section.rb, line 56 def make_bibliography(xml, sect) if xml.at("//sections/references") biblio = sect.add_next_sibling("<bibliography/>").first xml.xpath("//sections/references").each do |r| biblio.add_child r.remove end end end
# File lib/metanorma/standoc/cleanup_section.rb, line 65 def make_indexsect(xml, sect) xml.xpath("//sections/indexsect").reverse_each do |r| sect.next = r.remove end end
# File lib/metanorma/standoc/cleanup_section.rb, line 10 def make_preface(xml, sect) if xml.at("//foreword | //introduction | //acknowledgements | "\ "//*[@preface]") preface = sect.add_previous_sibling("<preface/>").first f = xml.at("//foreword") and preface.add_child f.remove f = xml.at("//introduction") and preface.add_child f.remove move_clauses_into_preface(xml, preface) f = xml.at("//acknowledgements") and preface.add_child f.remove end make_abstract(xml, sect) end
# File lib/metanorma/standoc/cleanup_maths.rb, line 208 def mathml_cleanup(xmldoc) unitsml = Asciimath2UnitsML::Conv.new(asciimath2unitsml_options) xmldoc.xpath("//stem[@type = 'MathML']").each do |x| xml_unescape_mathml(x) mathml_namespace(x) mathml_preserve_space(x) unitsml.MathML2UnitsML(x) mathml_mathvariant(x) mathml_italicise(x) end mathml_unitsML(xmldoc) end
presuppose multichar mi upright, singlechar mi MathML default italic
# File lib/metanorma/standoc/cleanup_maths.rb, line 60 def mathml_italicise(xml) xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]", "m" => MATHML_NS).each do |i| char = HTMLEntities.new.decode(i.text) i["mathvariant"] = "normal" if mi_italicise?(char) end end
# File lib/metanorma/standoc/cleanup_maths.rb, line 199 def mathml_mathvariant(math) math.xpath(".//*[@mathvariant]").each do |outer| outer.xpath(".//*[@mathvariant]").each do |inner| inner["mathvariant"] = mathvariant_override(outer["mathvariant"], inner["mathvariant"]) end end end
# File lib/metanorma/standoc/cleanup_maths.rb, line 54 def mathml_mi_italics { uppergreek: true, upperroman: true, lowergreek: true, lowerroman: true } end
# File lib/metanorma/standoc/cleanup_maths.rb, line 50 def mathml_namespace(stem) stem.xpath("./math").each { |x| x.default_namespace = MATHML_NS } end
# File lib/metanorma/standoc/cleanup_maths.rb, line 43 def mathml_preserve_space(math) math.xpath(".//m:mtext", "m" => MATHML_NS).each do |x| x.children = x.children.to_xml .gsub(/^\s/, " ").gsub(/\s$/, " ") end end
# File lib/metanorma/standoc/cleanup_maths.rb, line 92 def mathml_unitsML(xmldoc) return unless xmldoc.at(".//m:*", "m" => UNITSML_NS) misc = add_misc_container(xmldoc) unitsml = misc.add_child("<UnitsML xmlns='#{UNITSML_NS}'/>").first %w(Unit CountedItem Quantity Dimension Prefix).each do |t| gather_unitsml(unitsml, xmldoc, t) end end
# File lib/metanorma/standoc/cleanup_maths.rb, line 117 def mathvariant_override(inner, outer) case outer when "bold" case inner when "normal" then "bold" when "italic" then "bold-italic" when "fraktur" then "bold-fraktur" when "script" then "bold-script" when "sans-serif" then "bold-sans-serif" when "sans-serif-italic" then "sans-serif-bold-italic" else inner end when "italic" case inner when "normal" then "italic" when "bold" then "bold-italic" when "sans-serif" then "sans-serif-italic" when "bold-sans-serif" then "sans-serif-bold-italic" else inner end when "bold-italic" case inner when "normal", "bold", "italic" then "bold-italic" when "sans-serif", "bold-sans-serif", "sans-serif-italic" "sans-serif-bold-italic" else inner end when "fraktur" case inner when "normal" then "fraktur" when "bold" then "bold-fraktur" else inner end when "bold-fraktur" case inner when "normal", "fraktur" then "bold-fraktur" else inner end when "script" case inner when "normal" then "script" when "bold" then "bold-script" else inner end when "bold-script" case inner when "normal", "script" then "bold-script" else inner end when "sans-serif" case inner when "normal" then "sans-serif" when "bold" then "bold-sans-serif" when "italic" then "sans-serif-italic" when "bold-italic" then "sans-serif-bold-italic" else inner end when "bold-sans-serif" case inner when "normal", "bold", "sans-serif" then "bold-sans-serif" when "italic", "bold-italic", "sans-serif-italic" "sans-serif-bold-italic" else inner end when "sans-serif-italic" case inner when "normal", "italic", "sans-serif" then "sans-serif-italic" when "bold", "bold-italic", "sans-serif-bold" "sans-serif-bold-italic" else inner end when "sans-serif-bold-italic" case inner when "normal", "italic", "sans-serif", "sans-serif-italic", "bold", "bold-italic", "sans-serif-bold" "sans-serif-bold-italic" else inner end else inner end end
# File lib/metanorma/standoc/cleanup_section.rb, line 93 def maxlevel(xml) max = 5 xml.xpath("//clause[@level]").each do |c| max = c["level"].to_i if max < c["level"].to_i end max end
# File lib/metanorma/standoc/cleanup_block.rb, line 119 def merge_annotations_into_sourcecode(xmldoc) xmldoc.xpath("//sourcecode").each do |x| while x&.next_element&.name == "annotation" x.next_element.parent = x end end end
# File lib/metanorma/standoc/cleanup.rb, line 148 def metadata_cleanup(xmldoc) return if @metadata_attrs.nil? || @metadata_attrs.empty? ins = xmldoc.at("//misc-container") || xmldoc.at("//bibdata").after("<misc-container/>").next_element ins << @metadata_attrs end
# File lib/metanorma/standoc/cleanup_maths.rb, line 68 def mi_italicise?(char) return false if char.length > 1 if /\p{Greek}/.match?(char) (/\p{Lower}/.match(char) && !mathml_mi_italics[:lowergreek]) || (/\p{Upper}/.match(char) && !mathml_mi_italics[:uppergreek]) elsif /\p{Latin}/.match?(char) (/\p{Lower}/.match(char) && !mathml_mi_italics[:lowerroman]) || (/\p{Upper}/.match(char) && !mathml_mi_italics[:upperroman]) else false end end
# File lib/metanorma/standoc/cleanup_amend.rb, line 40 def move_attrs_to_amend(c, a) %w(change path path_end title).each do |e| next unless c[e] a[e] = c[e] c.delete(e) end return unless a["locality"] loc = a.children.add_previous_sibling("<location/>") extract_localities1(loc, a["locality"]) loc1 = loc.at("./localityStack") and loc.replace(loc1.elements) a.delete("locality") end
# File lib/metanorma/standoc/cleanup_section.rb, line 22 def move_clauses_into_preface(xml, preface) xml.xpath("//*[@preface]").each do |c| c.delete("preface") preface.add_child c.remove end end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 44 def norm_ref_preface(ref) if ref.at("./note[@type = 'boilerplate']") unwrap_boilerplate_clauses(ref, ".") else refs = ref.elements.select do |e| %w(references bibitem).include? e.name end pref = refs.empty? ? @i18n.norm_empty_pref : @i18n.norm_with_refs_pref ref.at("./title").next = "<p>#{pref}</p>" end end
# File lib/metanorma/standoc/cleanup_ref.rb, line 75 def normref_cleanup(xmldoc) r = xmldoc.at(self.class::NORM_REF) || return preface = ((r.xpath("./title/following-sibling::*") & # intersection r.xpath("./bibitem[1]/preceding-sibling::*")) - r.xpath("./note[@type = 'boilerplate']/descendant-or-self::*")) preface.each(&:remove) end
if a note is at the end of a section, it is left alone if a note is followed by a non-note block, it is moved inside its preceding block if it is not delimited (so there was no way of making that block include the note)
# File lib/metanorma/standoc/cleanup_block.rb, line 89 def note_cleanup(xmldoc) q = "//note[following-sibling::*[not(local-name() = 'note')]]" xmldoc.xpath(q).each do |n| next if n["keep-separate"] == "true" || !n.ancestors("table").empty? prev = n.previous_element || next n.parent = prev if ELEMS_ALLOW_NOTES.include? prev.name end xmldoc.xpath("//note[@keep-separate] | "\ "//termnote[@keep-separate]").each do |n| n.delete("keep-separate") end end
move notes into table
# File lib/metanorma/standoc/cleanup_table.rb, line 54 def notes_table_cleanup(xmldoc) nomatches = false until nomatches nomatches = true xmldoc.xpath("//table/following-sibling::*[1]"\ "[self::note[not(@keep-separate = 'true')]]").each do |n| n.delete("keep-separate") n.previous_element << n.remove nomatches = false end end end
# File lib/metanorma/standoc/cleanup_section.rb, line 121 def obligations_cleanup(xml) obligations_cleanup_info(xml) obligations_cleanup_norm(xml) obligations_cleanup_inherit(xml) end
# File lib/metanorma/standoc/cleanup_section.rb, line 127 def obligations_cleanup_info(xml) s = xml.at("//foreword") and s["obligation"] = "informative" s = xml.at("//introduction") and s["obligation"] = "informative" s = xml.at("//acknowledgements") and s["obligation"] = "informative" xml.xpath("//references").each { |r| r["obligation"] = "informative" } xml.xpath("//preface//clause").each do |r| r["obligation"] = "informative" end end
# File lib/metanorma/standoc/cleanup_section.rb, line 144 def obligations_cleanup_inherit(xml) xml.xpath("//annex | //clause[not(ancestor::boilerplate)]").each do |r| r["obligation"] = "normative" unless r["obligation"] end xml.xpath(Utils::SUBCLAUSE_XPATH).each do |r| o = r&.at("./ancestor::*/@obligation")&.text and r["obligation"] = o end end
# File lib/metanorma/standoc/cleanup_section.rb, line 137 def obligations_cleanup_norm(xml) s = xml.at("//clause[@type = 'scope']") and s["obligation"] = "normative" xml.xpath("//terms").each { |r| r["obligation"] = "normative" } xml.xpath("//definitions").each { |r| r["obligation"] = "normative" } end
# File lib/metanorma/standoc/cleanup_xref.rb, line 92 def origin_cleanup(xmldoc) xmldoc.xpath("//origin/concept[termref]").each do |x| t = x.at("./termref") x.replace(t) end xmldoc.xpath("//origin").each do |x| x["citeas"] = @anchors&.dig(x["bibitemid"], :xref) or @log.add("Crossreferences", x, "#{x['bibitemid']} does not have a corresponding anchor "\ "ID in the bibliography!") extract_localities(x) unless x.children.empty? end end
# File lib/metanorma/standoc/cleanup_footnotes.rb, line 67 def other_footnote_renumber(xmldoc) seen = {} i = 0 xmldoc.xpath("//fn").each do |fn| i, seen = other_footnote_renumber1(fn, i, seen) end end
# File lib/metanorma/standoc/cleanup_footnotes.rb, line 53 def other_footnote_renumber1(fn, i, seen) unless fn["table"] content = footnote_content(fn) if seen[content] then outnum = seen[content] else i += 1 outnum = i seen[content] = outnum end fn["reference"] = outnum.to_s end [i, seen] end
if the content is a single paragraph, replace it with its children single links replaced with uri
# File lib/metanorma/standoc/cleanup_ref_dl.rb, line 42 def p_unwrap(para) elems = para.elements if elems.size == 1 && elems[0].name == "p" link_unwrap(elems[0]).children.to_xml.strip else para.to_xml.strip end end
# File lib/metanorma/standoc/cleanup_block.rb, line 7 def para_cleanup(xmldoc) ["//p[not(ancestor::bibdata)]", "//ol[not(ancestor::bibdata)]", "//ul[not(ancestor::bibdata)]", "//quote[not(ancestor::bibdata)]", "//note[not(ancestor::bibitem or "\ "ancestor::table or ancestor::bibdata)]"].each do |w| inject_id(xmldoc, w) end end
# File lib/metanorma/standoc/cleanup_section.rb, line 189 def pop_floating_title(xmldoc) loop do found = false xmldoc.xpath("//floating-title").each do |t| next unless t.next_element.nil? next if %w(sections annex preface).include? t.parent.name t.parent.parent << t found = true end break unless found end end
# File lib/metanorma/standoc/cleanup_section.rb, line 158 def preface_clausebefore_cleanup(xmldoc) return unless xmldoc.at("//preface") unless ins = xmldoc.at("//preface").children.first xmldoc.at("//preface") << " " ins = xmldoc.at("//preface").children.first end xmldoc.xpath("//preface//*[@beforeclauses = 'true']").each do |x| x.delete("beforeclauses") ins.previous = x.remove end end
# File lib/metanorma/standoc/cleanup_xref.rb, line 86 def quotesource_cleanup(xmldoc) xmldoc.xpath("//quote/source | //terms/source").each do |x| xref_to_eref(x) end end
# File lib/metanorma/standoc/cleanup_ref.rb, line 129 def read_local_bibitem(uri) return nil if %r{^https?://}.match?(uri) file = "#{@localdir}#{uri}.rxl" File.file?(file) or file = "#{@localdir}#{uri}.xml" File.file?(file) or return nil xml = Nokogiri::XML(File.read(file, encoding: "utf-8")) ret = xml.at("//*[local-name() = 'bibdata']") or return nil ret = Nokogiri::XML(ret.to_xml .sub(%r{(<bibdata[^>]*?) xmlns=("[^"]+"|'[^']+')}, "\\1")).root ret.name = "bibitem" ins = ret.at("./*[local-name() = 'docidentifier']") or return nil ins.previous = %{<uri type="citation">#{uri}</uri>} ret&.at("./*[local-name() = 'ext']")&.remove ret end
move ref before p
# File lib/metanorma/standoc/cleanup_ref.rb, line 68 def ref_cleanup(xmldoc) xmldoc.xpath("//p/ref").each do |r| parent = r.parent parent.previous = r.remove end end
# File lib/metanorma/standoc/cleanup_ref_dl.rb, line 7 def ref_dl_cleanup(xmldoc) xmldoc.xpath("//clause[@bibitem = 'true']").each do |c| bib = dl_bib_extract(c) or next validate_ref_dl(bib, c) bibitemxml = RelatonBib::BibliographicItem.from_hash(bib).to_xml or next bibitem = Nokogiri::XML(bibitemxml) bibitem.root["id"] = c["id"] if c["id"] && !/^_/.match(c["id"]) c.replace(bibitem.root) end end
# File lib/metanorma/standoc/cleanup_ref.rb, line 115 def reference_names(xmldoc) xmldoc.xpath("//bibitem[not(ancestor::bibitem)]").each do |ref| # isopub = ref.at(ISO_PUBLISHER_XPATH) docid = ref.at("./docidentifier[@type = 'metanorma']") || ref.at("./docidentifier[not(@type = 'DOI')]") or next reference = format_ref(docid.text, docid["type"]) @anchors[ref["id"]] = { xref: reference } end end
# File lib/metanorma/standoc/cleanup_section_names.rb, line 15 def replace_title(doc, xpath, text, first = false) return unless text doc.xpath(xpath).each_with_index do |node, i| next if first && !i.zero? title = get_or_make_title(node) fn = title.xpath("./fn") fn.each(&:remove) title.children = text fn.each { |n| title << n } end end
# File lib/metanorma/standoc/cleanup_reqt.rb, line 97 def reqt_dl_to_classif(ins, reqt, dlist) if a = reqt.at("./classification[last()]") then ins = a end dlist.xpath("./dt[text()='classification']").each do |e| val = e.at("./following::dd/p") || e.at("./following::dd") req_classif_parse(val.text).each do |r| ins.next = "<classification><tag>#{r[0]}</tag>"\ "<value>#{r[1]}</value></classification>" ins = ins.next end end ins end
# File lib/metanorma/standoc/cleanup_reqt.rb, line 4 def requirement_cleanup(xmldoc) requirement_metadata(xmldoc) requirement_inherit(xmldoc) requirement_descriptions(xmldoc) end
# File lib/metanorma/standoc/cleanup_reqt.rb, line 61 def requirement_description_cleanup1(reqt) while d = reqt.at("./description[following-sibling::*[1]"\ "[self::description]]") n = d.next.remove d << n.children end reqt.xpath("./description[normalize-space(.)='']").each do |r| r.replace("\n") end end
# File lib/metanorma/standoc/cleanup_reqt.rb, line 50 def requirement_description_wrap(reqt, text) return if (text.element? && (reqt_subpart(text.name) || %w(requirement recommendation permission).include?(text.name))) || (text.text.strip.empty? && !text.at(".//xref | .//eref | .//link")) t = Nokogiri::XML::Element.new("description", reqt) text.before(t) t.children = text.remove end
# File lib/metanorma/standoc/cleanup_reqt.rb, line 40 def requirement_descriptions(xmldoc) xmldoc.xpath(REQRECPER).each do |r| r.xpath(".//p[not(./*)][normalize-space(.)='']").each(&:remove) r.children.each do |e| requirement_description_wrap(r, e) end requirement_description_cleanup1(r) end end
# File lib/metanorma/standoc/cleanup_reqt.rb, line 12 def requirement_inherit(xmldoc) xmldoc.xpath(REQRECPER).each do |r| ins = requirement_inherit_insert(r) r.xpath("./*//inherit").each { |i| ins.previous = i } end end
# File lib/metanorma/standoc/cleanup_reqt.rb, line 19 def requirement_inherit_insert(reqt) ins = reqt.at("./classification") || reqt.at( "./description | ./measurementtarget | ./specification | "\ "./verification | ./import | ./description | ./component | "\ "./requirement | ./recommendation | ./permission", ) and return ins requirement_inherit_insert1(reqt) end
# File lib/metanorma/standoc/cleanup_reqt.rb, line 28 def requirement_inherit_insert1(reqt) if t = reqt.at("./title") t.next = " " t.next else if reqt.children.empty? then reqt.add_child(" ") else reqt.children.first.previous = " " end reqt.children.first end end
# File lib/metanorma/standoc/cleanup_reqt.rb, line 72 def requirement_metadata(xmldoc) xmldoc.xpath(REQRECPER).each do |r| dl = r&.at("./dl[@metadata = 'true']")&.remove or next requirement_metadata1(r, dl, r.at("./title")) end end
# File lib/metanorma/standoc/cleanup_reqt.rb, line 83 def requirement_metadata1(reqt, dlist, ins) unless ins reqt.children.first.previous = " " ins = reqt.children.first end %w(obligation model type).each do |a| dl_to_attrs(reqt, dlist, a) end requirement_metadata1_tags.each do |a| ins = dl_to_elems(ins, reqt, dlist, a) end reqt_dl_to_classif(ins, reqt, dlist) end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 190 def resolve_local_indirect_erefs(xmldoc, refs, prefix) refs.each_with_object([]) do |r, m| id = r.sub(/^#{prefix}_/, "") if n = xmldoc.at("//*[@id = '#{id}']") and n.at("./ancestor-or-self::*[@type = '#{prefix}']") xmldoc.xpath("//eref[@bibitemid = '#{r}']").each do |e| indirect_eref_to_xref(e, id) end else m << r end end end
# File lib/metanorma/standoc/cleanup_block.rb, line 144 def safe_noko(text, doc) Nokogiri::XML::Text.new(text, doc).to_xml( encoding: "US-ASCII", save_with: Nokogiri::XML::Node::SaveOptions::NO_DECLARATION, ) end
it seems Nokogiri::XML is treating the content of <script> as cdata, because of its use in HTML. Bad nokogiri. Undoing that, since we use script as a normal tag
# File lib/metanorma/standoc/cleanup.rb, line 88 def script_cleanup(xmldoc) xmldoc.xpath("//script").each { |x| x.content = x.to_str } end
# File lib/metanorma/standoc/cleanup_section_names.rb, line 39 def section_names_refs_cleanup(xml) replace_title(xml, "//bibliography/references[@normative = 'true']", @i18n&.normref, true) replace_title(xml, "//bibliography/references[@normative = 'false']", @i18n&.bibliography, true) end
# File lib/metanorma/standoc/cleanup_section_names.rb, line 54 def section_names_terms_cleanup(xml) replace_title(xml, "//definitions[@type = 'symbols']", @i18n&.symbols) replace_title(xml, "//definitions[@type = 'abbreviated_terms']", @i18n&.abbrev) replace_title(xml, "//definitions[not(@type)]", @i18n&.symbolsabbrev) replace_title(xml, "//terms#{SYMnoABBR} | //clause[.//terms]#{SYMnoABBR}", @i18n&.termsdefsymbols, true) replace_title(xml, "//terms#{ABBRnoSYM} | //clause[.//terms]#{ABBRnoSYM}", @i18n&.termsdefabbrev, true) replace_title(xml, "//terms#{SYMABBR} | //clause[.//terms]#{SYMABBR}", @i18n&.termsdefsymbolsabbrev, true) replace_title(xml, "//terms#{NO_SYMABBR} | //clause[.//terms]#{NO_SYMABBR}", @i18n&.termsdefsymbolsabbrev, true) replace_title( xml, "//terms[not(.//definitions)] | //clause[.//terms][not(.//definitions)]", @i18n&.termsdef, true ) end
# File lib/metanorma/standoc/cleanup_section.rb, line 171 def sections_clausebefore_cleanup(xmldoc) return unless xmldoc.at("//sections") unless ins = xmldoc.at("//sections").children.first xmldoc.at("//sections") << " " ins = xmldoc.at("//sections").children.first end xmldoc.xpath("//sections//*[@beforeclauses = 'true']").each do |x| x.delete("beforeclauses") ins.previous = x.remove end end
# File lib/metanorma/standoc/cleanup_section.rb, line 113 def sections_cleanup(xml) sections_order_cleanup(xml) sections_level_cleanup(xml) sections_names_cleanup(xml) sections_variant_title_cleanup(xml) change_clauses(xml) end
# File lib/metanorma/standoc/cleanup_section.rb, line 101 def sections_level_cleanup(xml) m = maxlevel(xml) return if m < 6 m.downto(6).each do |l| xml.xpath("//clause[@level = '#{l}']").each do |c| c.delete("level") c.previous_element << c.remove end end end
# File lib/metanorma/standoc/cleanup_section_names.rb, line 29 def sections_names_cleanup(xml) replace_title(xml, "//clause[@type = 'scope']", @i18n&.scope) replace_title(xml, "//preface//abstract", @i18n&.abstract) replace_title(xml, "//foreword", @i18n&.foreword) replace_title(xml, "//introduction", @i18n&.introduction) replace_title(xml, "//acknowledgements", @i18n&.acknowledgements) section_names_refs_cleanup(xml) section_names_terms_cleanup(xml) end
# File lib/metanorma/standoc/cleanup_section.rb, line 71 def sections_order_cleanup(xml) s = xml.at("//sections") make_preface(xml, s) make_annexes(xml) make_indexsect(xml, s) make_bibliography(xml, s) xml.xpath("//sections/annex").reverse_each { |r| s.next = r.remove } end
# File lib/metanorma/standoc/cleanup_section_names.rb, line 78 def sections_variant_title_cleanup(xml) path = SECTION_CONTAINERS.map { |x| "./ancestor::#{x}" }.join(" | ") xml.xpath("//p[@variant_title]").each do |p| p.name = "variant-title" p.delete("id") p.delete("variant_title") p.xpath("(#{path})[last()]").each do |sect| (ins = sect.at("./title") and ins.next = p) or sect.children.first.previous = p end end end
# File lib/metanorma/standoc/cleanup_text.rb, line 18 def smartquotes_cleanup(xmldoc) xmldoc.xpath("//date").each { |d| Metanorma::Utils::endash_date(d) } if @smartquotes then smartquotes_cleanup1(xmldoc) else dumbquote_cleanup(xmldoc) end end
# File lib/metanorma/standoc/cleanup_text.rb, line 25 def smartquotes_cleanup1(xmldoc) uninterrupt_quotes_around_xml(xmldoc) dumb2smart_quotes(xmldoc) end
# File lib/metanorma/standoc/cleanup_ref.rb, line 44 def sort_biblio(bib) bib end
# File lib/metanorma/standoc/cleanup_block.rb, line 132 def sourcecode_cleanup(xmldoc) xmldoc.xpath("//sourcecode").each do |x| x.traverse do |n| next unless n.text? next unless /#{Regexp.escape(@sourcecode_markup_start)}/ .match?(n.text) n.replace(sourcecode_markup(n)) end end end
# File lib/metanorma/standoc/cleanup_block.rb, line 151 def sourcecode_markup(node) node.text.split(/(#{Regexp.escape(@sourcecode_markup_start)}| #{Regexp.escape(@sourcecode_markup_end)})/x) .each_slice(4).map.with_object([]) do |a, acc| acc << safe_noko(a[0], node.document) next unless a.size == 4 acc << Asciidoctor.convert( a[2], doctype: :inline, backend: (self&.backend&.to_sym || :standoc) ) end.join end
# File lib/metanorma/standoc/cleanup_terms.rb, line 45 def split_termdefinitions(xmldoc) xmldoc.xpath("//definition").each do |d| if d.at("./p | ./ol | ./dl | ./ul") d.children = "<verbal-definition>#{d.children}</verbal-definition>" else d.children = "<non-verbal-representation>"\ "#{d.children}</non-verbal-representation>" end end end
# File lib/metanorma/standoc/cleanup_inline.rb, line 15 def strip_initial_space(elem) return unless elem.children[0].text? if /\S/.match?(elem.children[0].text) elem.children[0].content = elem.children[0].text.gsub(/^ /, "") else elem.children[0].remove end end
examples containing only figures become subfigures of figures
# File lib/metanorma/standoc/cleanup_block.rb, line 66 def subfigure_cleanup(xmldoc) xmldoc.xpath("//example[figure]").each do |e| next unless e.elements.map(&:name).reject do |m| %w(name figure).include? m end.empty? e.name = "figure" end end
# File lib/metanorma/standoc/cleanup_image.rb, line 4 def svgmap_cleanup(xmldoc) svgmap_moveattrs(xmldoc) svgmap_populate(xmldoc) Metanorma::Utils::svgmap_rewrite(xmldoc, @localdir) end
# File lib/metanorma/standoc/cleanup_image.rb, line 15 def svgmap_moveattrs(xmldoc) xmldoc.xpath("//svgmap").each do |s| f = s.at(".//figure") or next (t = s.at("./name")) && !f.at("./name") and f.children.first.previous = t.remove if s["id"] && guid?(f["id"]) f["id"] = s["id"] s.delete("id") end svgmap_moveattrs1(s, f) end end
# File lib/metanorma/standoc/cleanup_image.rb, line 28 def svgmap_moveattrs1(svgmap, figure) %w(unnumbered number subsequence keep-with-next keep-lines-together tag multilingual-rendering).each do |a| next if figure[a] || !svgmap[a] figure[a] = svgmap[a] svgmap.delete(a) end end
# File lib/metanorma/standoc/cleanup_image.rb, line 38 def svgmap_populate(xmldoc) xmldoc.xpath("//svgmap").each do |s| s1 = s.dup s.children.remove f = s1.at(".//figure") and s << f s1.xpath(".//li").each do |li| t = li&.at(".//eref | .//link | .//xref") or next href = t.xpath("./following-sibling::node()") href.empty? or s << %[<target href="#{svgmap_target(href)}">#{t.to_xml}</target>] end end end
# File lib/metanorma/standoc/cleanup_image.rb, line 52 def svgmap_target(nodeset) nodeset.each do |n| next unless n.name == "link" n.children = n["target"] end nodeset.text.sub(/^[,; ]/, "").strip end
Indices sort after letter but before any following letter (x, x_m, x_1, xa); we use colon to force that sort order. Numbers sort after letters; we use thorn to force that sort order.
# File lib/metanorma/standoc/cleanup_symbols.rb, line 7 def symbol_key(sym) key = sym.dup key.traverse do |n| n.name == "math" and n.replace(grkletters(MathML2AsciiMath.m2a(n.to_xml))) end ret = Nokogiri::XML(key.to_xml) HTMLEntities.new.decode(ret.text.downcase) .gsub(/[\[\]{}<>()]/, "").gsub(/\s/m, "") .gsub(/[[:punct:]]|[_^]/, ":\\0").gsub(/`/, "") .gsub(/[0-9]+/, "þ\\0") end
# File lib/metanorma/standoc/cleanup_symbols.rb, line 38 def symbols_cleanup(docxml) docxml.xpath("//definitions/dl").each do |dl| dl_out = extract_symbols_list(dl) dl_out.sort! { |a, b| a[:key] <=> b[:key] || a[:dt] <=> b[:dt] } dl.children = dl_out.map { |d| d[:dt].to_s + d[:dd].to_s }.join("\n") end docxml end
# File lib/metanorma/standoc/cleanup_table.rb, line 46 def table_cleanup(xmldoc) dl1_table_cleanup(xmldoc) dl2_table_cleanup(xmldoc) notes_table_cleanup(xmldoc) header_rows_cleanup(xmldoc) end
# File lib/metanorma/standoc/cleanup_footnotes.rb, line 43 def table_footnote_renumber(xmldoc) xmldoc.xpath("//table | //figure").each do |t| seen = {} i = 0 t.xpath(".//fn[not(ancestor::name)]").each do |fn| i, seen = table_footnote_renumber1(fn, i, seen) end end end
# File lib/metanorma/standoc/cleanup_footnotes.rb, line 30 def table_footnote_renumber1(fn, i, seen) content = footnote_content(fn) if seen[content] then outnum = seen[content] else i += 1 outnum = i seen[content] = outnum end fn["reference"] = (outnum - 1 + "a".ord).chr fn["table"] = true [i, seen] end
# File lib/metanorma/standoc/cleanup_terms.rb, line 76 def term_children_cleanup(xmldoc) xmldoc.xpath("//terms[terms]").each { |t| t.name = "clause" } xmldoc.xpath("//term").each do |t| %w(termnote termexample termsource term).each do |w| t.xpath("./#{w}").each { |n| t << n.remove } end end end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 18 def term_defs_boilerplate(div, source, term, _preface, isodoc) a = @i18n.term_def_boilerplate and div.next = a source.each do |s| @anchors[s["bibitemid"]] or @log.add("Crossreferences", nil, "term source #{s['bibitemid']} not referenced") end a = if source.empty? && term.nil? @i18n.no_terms_boilerplate else term_defs_boilerplate_cont(source, term, isodoc) end a and div.next = a end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 33 def term_defs_boilerplate_cont(src, term, isodoc) sources = isodoc.sentence_join(src.map do |s| %{<eref bibitemid="#{s['bibitemid']}"/>} end) if src.empty? then @i18n.internal_terms_boilerplate elsif term.nil? then external_terms_boilerplate(sources) else internal_external_terms_boilerplate(sources) end end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 183 def term_designation_reorder(xmldoc) xmldoc.xpath("//term").each do |t| %w(preferred admitted deprecates related) .each_with_object([]) do |tag, m| t.xpath("./#{tag}").each { |x| m << x.remove } end.reverse.each do |x| t.children.first.previous = x end end end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 122 def term_dl_to_designation_category(prev, category) cat = prev.at(".//expression/grammar/#{category}") /,/.match?(cat&.text) and cat.replace(cat.text.split(/,\s*/) .map { |x| "<#{category}>#{x}</#{category}>" }.join) end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 66 def term_dl_to_designation_metadata(prev, dlist) %w(absent geographic-area).each do |a| dl_to_attrs(prev, dlist, a) end %w(field-of-application usage-info).reverse.each do |a| dl_to_elems(prev.at("./expression"), prev, dlist, a) end end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 109 def term_dl_to_expression_grammar(prev, dlist) prev.at(".//expression") or return prev.at(".//expression") << "<grammar><sentinel/></grammar>" %w(gender number isPreposition isParticiple isAdjective isAdverb isNoun grammar-value).reverse.each do |a| dl_to_elems(prev.at(".//expression/grammar/*"), prev.elements.last, dlist, a) end term_dl_to_designation_category(prev, "gender") term_dl_to_designation_category(prev, "number") prev.at(".//expression/grammar/sentinel").remove end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 84 def term_dl_to_expression_metadata(prev, dlist) term_dl_to_expression_root_metadata(prev, dlist) term_dl_to_expression_name_metadata(prev, dlist) term_to_letter_symbol(prev, dlist) end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 101 def term_dl_to_expression_name_metadata(prev, dlist) %w(abbreviation-type pronunciation).reverse.each do |a| dl_to_elems(prev.at("./expression/name"), prev, dlist, a) end g = dlist.at("./dt[text()='grammar']/following::dd//dl") and term_dl_to_expression_grammar(prev, g) end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 90 def term_dl_to_expression_root_metadata(prev, dlist) %w(isInternational).each do |a| p = prev.at("./expression | ./letter-symbol | ./graphical-symbol") dl_to_attrs(p, dlist, a) end %w(language script type).each do |a| p = prev.at("./expression") or next dl_to_attrs(p, dlist, a) end end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 44 def term_dl_to_metadata(xmldoc) xmldoc.xpath("//term[dl[@metadata = 'true']]").each do |t| t.xpath("./dl[@metadata = 'true']").each do |dl| prev = related2pref(dl_to_designation(dl)) or next term_dl_to_designation_metadata(prev, dl) term_dl_to_term_metadata(prev, dl) term_dl_to_expression_metadata(prev, dl) dl.remove end end end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 56 def term_dl_to_term_metadata(prev, dlist) return unless prev.name == "preferred" && prev.at("./preceding-sibling::preferred").nil? ins = term_element_insert_point(prev) %w(domain subject).each do |a| ins = dl_to_elems(ins, prev.parent, dlist, a) end end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 75 def term_element_insert_point(prev) ins = prev while %w(preferred admitted deprecates related domain dl) .include? ins&.next_element&.name ins = ins.next_element end ins end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 146 def term_nonverbal_designations(xmldoc) xmldoc.xpath("//term/preferred | //term/admitted | //term/deprecates") .each do |d| d.text.strip.empty? or next n = d.next_element if %w(formula figure).include?(n&.name) term_nonverbal_designations1(d, n) else d.at("./expression/name") or d.children = term_expr("") end end end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 159 def term_nonverbal_designations1(desgn, elem) desgn = related2pref(desgn) if elem.name == "figure" elem.at("./name").remove desgn.children = "<graphical-symbol>#{elem.remove.to_xml}</graphical-symbol>" else desgn.children = term_expr(elem.at("./stem").to_xml) elem.remove end end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 171 def term_termsource_to_designation(xmldoc) xmldoc.xpath("//term/termsource").each do |t| p = t.previous_element while %w(domain subject).include? p&.name p = p.previous_element end %w(preferred admitted deprecates related).include?(p&.name) or next related2pref(p) << t.remove end end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 129 def term_to_letter_symbol(prev, dlist) ls = dlist.at("./dt[text()='letter-symbol']/following::dd/p") return unless ls&.text == "true" prev.at(".//expression").name = "letter-symbol" end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 72 def termdef_boilerplate_cleanup(xmldoc) # termdef_remove_initial_paras(xmldoc) end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 90 def termdef_boilerplate_insert(xmldoc, isodoc, once = false) xmldoc.xpath(self.class::TERM_CLAUSE).each do |f| next if f.at("./clause[@type = 'boilerplate']") term_defs_boilerplate(f.at("./title"), xmldoc.xpath(".//termdocsource"), f.at(".//term"), f.at(".//p"), isodoc) break if once end end
# File lib/metanorma/standoc/cleanup_terms.rb, line 101 def termdef_cleanup(xmldoc) termdef_unnest_cleanup(xmldoc) Metanorma::Standoc::TermLookupCleanup.new(xmldoc, @log).call term_nonverbal_designations(xmldoc) term_dl_to_metadata(xmldoc) term_termsource_to_designation(xmldoc) term_designation_reorder(xmldoc) termdef_from_termbase(xmldoc) termdomain_cleanup(xmldoc) termdef_stem_cleanup(xmldoc) termdefinition_cleanup(xmldoc) termdomain1_cleanup(xmldoc) termnote_example_cleanup(xmldoc) term_children_cleanup(xmldoc) termdocsource_cleanup(xmldoc) end
# File lib/metanorma/standoc/cleanup_terms.rb, line 85 def termdef_from_termbase(xmldoc) xmldoc.xpath("//term").each do |x| if (c = x.at("./origin/termref")) && !x.at("./definition") x.at("./origin").previous = fetch_termbase(c["base"], c.text) end end end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 76 def termdef_remove_initial_paras(xmldoc) xmldoc.xpath("//terms/p | //terms/ul").each(&:remove) end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 15 def termdef_stem2admitted(xmldoc) xmldoc.xpath("//term/p/stem").each do |a| if initial_formula(a.parent) parent = a.parent parent.replace("<admitted>#{term_expr(a.to_xml)}</admitted>") end end xmldoc.xpath("//term/formula").each do |a| initial_formula(a) and a.replace("<admitted>#{term_expr(a.children.to_xml)}</admitted>") end end
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 4 def termdef_stem_cleanup(xmldoc) termdef_stem2admitted(xmldoc) xmldoc.xpath("//term//expression/name[stem]").each do |n| test = n.dup test.at("./stem").remove next unless test.text.strip.empty? n.parent.name = "letter-symbol" end end
release termdef tags from surrounding paras
# File lib/metanorma/standoc/cleanup_terms_designations.rb, line 35 def termdef_unnest_cleanup(xmldoc) desgn = "//p/admitted | //p/deprecates | //p/preferred | //p//related" nodes = xmldoc.xpath(desgn) while !nodes.empty? nodes[0].parent.replace(nodes[0].parent.children) nodes = xmldoc.xpath(desgn) end end
# File lib/metanorma/standoc/cleanup_terms.rb, line 23 def termdefinition_cleanup(xmldoc) generate_termdefinitions(xmldoc) split_termdefinitions(xmldoc) alternate_termdefinitions(xmldoc) end
# File lib/metanorma/standoc/cleanup_terms.rb, line 71 def termdocsource_cleanup(xmldoc) f = xmldoc.at("//preface | //sections") xmldoc.xpath("//termdocsource").each { |s| f.previous = s.remove } end
# File lib/metanorma/standoc/cleanup_terms.rb, line 16 def termdomain1_cleanup(xmldoc) xmldoc.xpath("//term").each do |t| d = t.xpath("./domain | ./subject").last or next defn = d.at("../definition") and defn.previous = d.remove end end
# File lib/metanorma/standoc/cleanup_terms.rb, line 7 def termdomain_cleanup(xmldoc) xmldoc.xpath("//p/domain").each do |a| parent = a.parent prev = parent.previous prev.next = a.remove parent.text.strip.empty? and parent.remove end end
# File lib/metanorma/standoc/cleanup_terms.rb, line 93 def termnote_example_cleanup(xmldoc) %w(note example).each do |w| xmldoc.xpath("//term#{w}[not(ancestor::term)]").each do |x| x.name = w end end end
# File lib/metanorma/standoc/cleanup_text.rb, line 4 def textcleanup(result) text = result.flatten.map { |l| l.sub(/\s*$/, "") } * "\n" !@keepasciimath and text = asciimath2mathml(text) text = text.gsub(/\s+<fn /, "<fn ") text.gsub(%r{<passthrough\s+formats="metanorma">([^<]*) </passthrough>}mx) { HTMLEntities.new.decode($1) } end
# File lib/metanorma/standoc/cleanup_footnotes.rb, line 75 def title_footnote_move(xmldoc) ins = xmldoc.at("//bibdata/language") xmldoc.xpath("//bibdata/title//fn").each do |f| f.name = "note" f["type"] = "title-footnote" f.delete("reference") ins.previous = f.remove end end
# File lib/metanorma/standoc/cleanup_inline.rb, line 113 def to_xreftarget(str) return Metanorma::Utils::to_ncname(str) unless /^[^#]+#.+$/.match?(str) /^(?<pref>[^#]+)#(?<suff>.+)$/ =~ str pref = pref.gsub(%r([#{Metanorma::Utils::NAMECHAR}])o, "_") suff = suff.gsub(%r([#{Metanorma::Utils::NAMECHAR}])o, "_") "#{pref}##{suff}" end
# File lib/metanorma/standoc/cleanup_toc.rb, line 4 def toc_cleanup(xmldoc) toc_cleanup_para(xmldoc) xmldoc.xpath("//toc").each { |t| toc_cleanup1(t, xmldoc) } toc_cleanup_clause(xmldoc) toc_metadata(xmldoc) end
# File lib/metanorma/standoc/cleanup_toc.rb, line 43 def toc_cleanup1(toc, xmldoc) depth = 1 ret = "" toc_index(toc, xmldoc).each do |x| ret = toc_cleanup1_entry(x, depth, ret) depth = x[:depth] end toc.children = "<ul>#{ret}</ul>" end
# File lib/metanorma/standoc/cleanup_toc.rb, line 53 def toc_cleanup1_entry(entry, depth, ret) if depth > entry[:depth] ret += "</ul></li>" * (depth - entry[:depth]) elsif depth < entry[:depth] ret += "<li><ul>" * (entry[:depth] - depth) end ret + "<li><xref target='#{entry[:target]}'>#{entry[:text]}</xref></li>" end
# File lib/metanorma/standoc/cleanup_toc.rb, line 62 def toc_cleanup_clause(xmldoc) xmldoc .xpath("//clause[@type = 'toc'] | //annex[@type = 'toc']").each do |c| c.xpath(".//ul[not(ancestor::ul)]").each do |ul| toc_cleanup_clause_entry(xmldoc, ul) ul.replace("<toc>#{ul.to_xml}</toc>") end end end
# File lib/metanorma/standoc/cleanup_toc.rb, line 72 def toc_cleanup_clause_entry(xmldoc, list) list.xpath(".//xref[not(text())]").each do |x| c1 = xmldoc.at("//*[@id = '#{x['target']}']") t = c1.at("./variant-title[@type = 'toc']") || c1.at("./title") x << t.dup.children end end
# File lib/metanorma/standoc/cleanup_toc.rb, line 11 def toc_cleanup_para(xmldoc) xmldoc.xpath("//p[toc]").each do |x| x.xpath("./toc").reverse.each do |t| x.next = t end x.remove if x.text.strip.empty? end end
# File lib/metanorma/standoc/cleanup_toc.rb, line 20 def toc_index(toc, xmldoc) depths = toc_index_depths(toc) depths.keys.each_with_object([]) do |key, arr| xmldoc.xpath(key).each do |x| arr << toc_index1(key, x, depths) end end.sort_by { |a| a[:line] } end
# File lib/metanorma/standoc/cleanup_toc.rb, line 29 def toc_index1(key, entry, depths) t = entry.at("./following-sibling::variant-title[@type = 'toc']") and entry = t { text: entry.children.to_xml, depth: depths[key].to_i, target: entry.xpath("(./ancestor-or-self::*/@id)[last()]")[0].text, line: entry.line } end
# File lib/metanorma/standoc/cleanup_toc.rb, line 37 def toc_index_depths(toc) toc.xpath("./toc-xpath").each_with_object({}) do |x, m| m[x.text] = x["depth"] end end
# File lib/metanorma/standoc/cleanup_toc.rb, line 80 def toc_metadata(xmldoc) return unless @htmltoclevels || @doctoclevels || @toclevels ins = xmldoc.at("//misc-container") || xmldoc.at("//bibdata").after("<misc-container/>").next_element toc_metadata1(ins) end
# File lib/metanorma/standoc/cleanup_toc.rb, line 88 def toc_metadata1(ins) [[@toclevels, "TOC Heading Levels"], [@toclevels, "TOC Heading Levels"], [@toclevels, "TOC Heading Levels"]].each do |n| n[0] and ins << "<presentation-metadata><name>#{n[1]}</name>"\ "<value>#{n[0]}</value></presentation-metadata>" end end
# File lib/metanorma/standoc/cleanup_xref.rb, line 17 def tq(text) text.sub(/^"/, "").sub(/"$/, "") end
“abc<tag/>”, def => “abc”,<tag/> def
# File lib/metanorma/standoc/cleanup_text.rb, line 31 def uninterrupt_quotes_around_xml(xmldoc) xmldoc.traverse do |n| next unless n.text? && n&.previous&.element? next unless /^['"]/.match?(n.text) next unless n.previous.ancestors("pre, tt, sourcecode, stem, figure") .empty? uninterrupt_quotes_around_xml1(n.previous) end end
# File lib/metanorma/standoc/cleanup_text.rb, line 42 def uninterrupt_quotes_around_xml1(elem) prev = elem.at(".//preceding::text()[1]") or return /\S$/.match?(prev.text) or return foll = elem.at(".//following::text()[1]") m = /^(["'][[:punct:]]*)(\s|$)/ .match(HTMLEntities.new.decode(foll&.text)) or return foll.content = foll.text.sub(/^(["'][[:punct:]]*)/, "") prev.content = "#{prev.text}#{m[1]}" end
# File lib/metanorma/standoc/cleanup_boilerplate.rb, line 80 def unwrap_boilerplate_clauses(xmldoc, xpath) xmldoc.xpath(xpath).each do |f| f.xpath(".//clause[@type = 'boilerplate'] | "\ ".//note[@type = 'boilerplate']").each do |c| c&.at("./title")&.remove c.replace(c.children) end end end
# File lib/metanorma/standoc/cleanup_ref_dl.rb, line 18 def validate_ref_dl(bib, c) id = bib["id"] id ||= c["id"] unless /^_/.match?(c["id"]) # do not accept implicit id unless id @log.add("Anchors", c, "The following reference is missing an anchor:\n" + c.to_xml) return end @refids << id bib["title"] or @log.add("Bibliography", c, "Reference #{id} is missing a title") bib["docid"] or @log.add("Bibliography", c, "Reference #{id} is missing a document identifier (docid)") end
# File lib/metanorma/standoc/cleanup.rb, line 113 def variant_cleanup(xmldoc) variant_space_cleanup(xmldoc) xmldoc.xpath("//*[variant]").each do |c| next unless c.children.any? do |n| n.name != "variant" && (!n.text? || !n.text.gsub(/\s/, "").empty?) end variant_cleanup1(c) end xmldoc.xpath("//variantwrap").each { |n| n.name = "variant" } end
# File lib/metanorma/standoc/cleanup.rb, line 125 def variant_cleanup1(elem) elem.xpath("./variant").each do |n| if n.at_xpath("preceding-sibling::node()"\ "[not(self::text()[not(normalize-space())])][1]"\ "[self::variantwrap]") n.previous_element << n else n.replace("<variantwrap/>").first << n end end end
# File lib/metanorma/standoc/cleanup.rb, line 137 def variant_space_cleanup(xmldoc) xmldoc.xpath("//*[variant]").each do |c| next if c.next.nil? || c.next.next.nil? if c.next.text? && c.next.next.name == "variant" c.next.text.gsub(/\s/, "").empty? and c.next.remove end end end
# File lib/metanorma/standoc/cleanup_maths.rb, line 32 def xml_unescape_mathml(xml) return if xml.children.any? { |y| y.element? } math = xml.text.gsub(/</, "<").gsub(/>/, ">") .gsub(/"/, '"').gsub(/'/, "'").gsub(/&/, "&") .gsub(/<[^: \r\n\t\/]+:/, "<").gsub(/<\/[^ \r\n\t:]+:/, "</") xml.children = math end
# File lib/metanorma/standoc/cleanup_xref.rb, line 60 def xref_cleanup(xmldoc) xmldoc.xpath("//xref").each do |x| /:/.match(x["target"]) and xref_to_internal_eref(x) next unless x.name == "xref" if refid? x["target"] x.name = "eref" xref_to_eref(x) else x.delete("type") end end end
# File lib/metanorma/standoc/cleanup_xref.rb, line 48 def xref_to_eref(elem) elem["bibitemid"] = elem["target"] unless elem["citeas"] = @anchors&.dig(elem["target"], :xref) @internal_eref_namespaces.include?(elem["type"]) or @log.add("Crossreferences", elem, "#{elem['target']} does not have a corresponding "\ "anchor ID in the bibliography!") end elem.delete("target") extract_localities(elem) unless elem.children.empty? end
# File lib/metanorma/standoc/cleanup_xref.rb, line 73 def xref_to_internal_eref(elem) a = elem["target"].split(":", 3) unless a.size < 2 || a[0].empty? || a[1].empty? elem["target"] = "#{a[0]}_#{a[1]}" a.size > 2 and elem.children = %{anchor="#{a[2..-1].join}",#{elem&.children&.text}} elem["type"] = a[0] @internal_eref_namespaces << a[0] elem.name = "eref" xref_to_eref(elem) end end
# File lib/metanorma/standoc/cleanup_inline.rb, line 144 def xreftarget_cleanup(elem) elem.xpath("//xref/@target").each do |s| if (ret = to_xreftarget(s.value)) != (orig = s.value) s.value = ret output = s.parent.dup output.children.remove @log.add("Anchors", s.parent, "normalised identifier in #{output} from #{orig}") end end end