module Slaw::Parse::Blocklists
Public Class Methods
adjust_blocklists(doc)
click to toggle source
# File lib/slaw/parse/blocklists.rb, line 7 def self.adjust_blocklists(doc) nest_blocklists(doc) fix_intros(doc) end
fix_intros(doc)
click to toggle source
Change p tags preceding a blocklist into listIntroductions within the blocklist
# File lib/slaw/parse/blocklists.rb, line 177 def self.fix_intros(doc) doc.xpath('//a:blockList', a: Slaw.akn_namespace).each do |blocklist| prev = blocklist.previous if prev and prev.name == 'p' prev.name = 'listIntroduction' blocklist.prepend_child(prev) end end end
guess_number_format(item, prev_format=nil)
click to toggle source
# File lib/slaw/parse/blocklists.rb, line 126 def self.guess_number_format(item, prev_format=nil) return nil unless item.num prev = item.previous_element nxt = item.next_element case item.num when "(i)" # Special case to detect difference between: # # (h) foo # (i) bar # (j) baz # # and # # (h) foo # (i) bar # (ii) baz # # (i) is NOT a sublist if: # - there was a previous item (h), and # - there is not a next item, or # - the next item is something other than (ii) if prev and prev.num =~ /^\(h/ and (!nxt or nxt.num != "(ii)") NumberingFormat.a else NumberingFormat.i end when "(u)", "(v)", "(x)" prev_format when /^\([ivx]+/ NumberingFormat.i when /^\([IVX]+/ NumberingFormat.I when /^\([a-z]{2}/ NumberingFormat.aa when /^\([A-Z]{2}/ NumberingFormat.AA when /^\([a-z]+/ NumberingFormat.a when /^\([A-Z]+/ NumberingFormat.A when /^\d+(\.\d+)+$/ NumberingFormat.new(:'i.i', item.num.count('.')) else NumberingFormat.unknown end end
nest_blocklist_items(items, our_number_format, list, prev)
click to toggle source
New blocklist nesting, starting with item
as its first element.
# File lib/slaw/parse/blocklists.rb, line 53 def self.nest_blocklist_items(items, our_number_format, list, prev) return if items.empty? item = items.shift sublist_count = 1 number_format = our_number_format while item and item.name == 'item' number_format = guess_number_format(item, number_format) break unless number_format # (aa) after (z) is same numbering type, pretend we've always # been this format if item.num == "(aa)" and item.previous_element and item.previous_element.num == "(z)" our_number_format = number_format end if number_format != our_number_format # new sublist, or back to the old list? if number_format < our_number_format # back to the old list items.unshift(item) break else # new sublist. # # The blockList is inserted as a child of the sibling just before # +item+, and that sibling's content is moved into the # +listIntroduction+ of the new list. sublist = item.document.create_element('blockList', eId: prev['eId'] + "__list_#{sublist_count}") sublist_count += 1 # list intro num = prev.at_xpath('a:num', a: Slaw.akn_namespace) if intro = num.next_element intro.name = 'listIntroduction' sublist << intro end # make +item+ the first in this list item['eId'] = sublist['eId'] + "__item_#{Slaw::Grammars::Counters.clean(item.num)}" sublist << item # insert this list as a child of the previous item prev << sublist # now keep walking item's (old) siblings # and pull in those elements that match our numbering # scheme nest_blocklist_items(items, number_format, sublist, item) end else # same number format # if this num is (i), we're numbering in :i, this isn't the first # element in this list, then assume we're following (h) with (i) if number_format.type == :i && item.num == "(i)" && prev items.unshift(item) break else # keep it with this list if list list << item item['eId'] = list['eId'] + "__item_#{Slaw::Grammars::Counters.clean(item.num)}" end end end prev = item item = items.shift end end
nest_blocklists(doc)
click to toggle source
Correctly re-nest nested block lists that are tagged with the “renest” attribute.
We do this by identifying the numbering format of each item in the list and comparing it with the surrounding elements. When the numbering format changes, we start a new nested list.
We make sure to handle special cases such as `(i)` coming between `(h)` and `(j)` versus being at the start of a `(i), (ii), (iii)` list.
(a) (b) (i) (ii) (aa) (bb) (c) (d)
becomes
(a) (b) (i) (ii) (aa) (bb) (c) (d)
@param doc [Nokogiri::XML::Document] the document
# File lib/slaw/parse/blocklists.rb, line 43 def self.nest_blocklists(doc) doc.xpath('//a:blockList[@renest]', a: Slaw.akn_namespace).each do |blocklist| blocklist.remove_attribute('renest') items = blocklist.xpath('a:item', a: Slaw.akn_namespace) nest_blocklist_items(items.to_a, guess_number_format(items.first), nil, nil) unless items.empty? end end