class NewspaperWorks::TextExtraction::RenderAlto
Public Class Methods
new(width, height, scaling = 1.0)
click to toggle source
# File lib/newspaper_works/text_extraction/render_alto.rb, line 7 def initialize(width, height, scaling = 1.0) @height = height @width = width @scaling = scaling end
Public Instance Methods
to_alto(words)
click to toggle source
# File lib/newspaper_works/text_extraction/render_alto.rb, line 13 def to_alto(words) page = alto_page(@width, @height) do |xml| words.each do |word| xml.String( CONTENT: word[:word], WIDTH: scale_point(word[:coordinates][2]).to_s, HEIGHT: scale_point(word[:coordinates][3]).to_s, HPOS: scale_point(word[:coordinates][0]).to_s, VPOS: scale_point(word[:coordinates][1]).to_s ) { xml.text '' } end end page.to_xml end
Private Instance Methods
alto_blockline(xml, pxwidth, pxheight) { |xml| ... }
click to toggle source
make block line and call word-block
# File lib/newspaper_works/text_extraction/render_alto.rb, line 68 def alto_blockline(xml, pxwidth, pxheight) xml.TextBlock(ID: 'ID1a', HEIGHT: pxheight.to_i, WIDTH: pxwidth.to_i, HPOS: '0', VPOS: '0') do xml.TextLine(HEIGHT: pxheight.to_i, WIDTH: pxwidth.to_i, HPOS: '0', VPOS: '0') do yield(xml) end end end
alto_layout(xml, pxwidth, pxheight, &block)
click to toggle source
return layout for page
# File lib/newspaper_works/text_extraction/render_alto.rb, line 51 def alto_layout(xml, pxwidth, pxheight, &block) xml.Layout do xml.Page(ID: 'ID1', PHYSICAL_IMG_NR: '1', HEIGHT: pxheight.to_i, WIDTH: pxwidth.to_i) do xml.PrintSpace(HEIGHT: pxheight.to_i, WIDTH: pxwidth.to_i, HPOS: '0', VPOS: '0') do alto_blockline(xml, pxwidth, pxheight, &block) end end end end
alto_page(pxwidth, pxheight, &block)
click to toggle source
given block to manage word generation, wrap with page/block/line
# File lib/newspaper_works/text_extraction/render_alto.rb, line 31 def alto_page(pxwidth, pxheight, &block) builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml| xml.alto(xmlns: 'http://www.loc.gov/standards/alto/ns-v2#') do xml.Description do xml.MeasurementUnit 'pixel' end alto_layout(xml, pxwidth, pxheight, &block) end end builder end
scale_point(value)
click to toggle source
# File lib/newspaper_works/text_extraction/render_alto.rb, line 43 def scale_point(value) # note: presuming non-fractional, even though ALTO 2.1 # specifies coordinates are xsd:float, not xsd:int, # simplify to integer value for output: (value * @scaling).to_i end