class PDF::Reader::PageTextReceiver
Builds a UTF-8 string of all the text on a single page by processing all the operaters in a content stream.
Constants
- SPACE
Attributes
options[R]
state[R]
Public Instance Methods
content()
click to toggle source
# File lib/pdf/reader/page_text_receiver.rb, line 50 def content PageLayout.new(@characters, @device_mediabox).to_s end
invoke_xobject(label)
click to toggle source
XObjects
# File lib/pdf/reader/page_text_receiver.rb, line 86 def invoke_xobject(label) @state.invoke_xobject(label) do |xobj| case xobj when PDF::Reader::FormXObject then xobj.walk(self) end end end
move_to_next_line_and_show_text(str)
click to toggle source
# File lib/pdf/reader/page_text_receiver.rb, line 72 def move_to_next_line_and_show_text(str) # ' @state.move_to_start_of_next_line show_text(str) end
page=(page)
click to toggle source
starting a new page
# File lib/pdf/reader/page_text_receiver.rb, line 40 def page=(page) @state = PageState.new(page) @content = [] @characters = [] @mediabox = page.objects.deref(page.attributes[:MediaBox]) device_bl = @state.ctm_transform(@mediabox[0], @mediabox[1]) device_tr = @state.ctm_transform(@mediabox[2], @mediabox[3]) @device_mediabox = [ device_bl.first, device_bl.last, device_tr.first, device_tr.last] end
set_spacing_next_line_show_text(aw, ac, string)
click to toggle source
# File lib/pdf/reader/page_text_receiver.rb, line 77 def set_spacing_next_line_show_text(aw, ac, string) # " @state.set_word_spacing(aw) @state.set_character_spacing(ac) move_to_next_line_and_show_text(string) end
show_text(string)
click to toggle source
Text Showing Operators
record text that is drawn on the page
# File lib/pdf/reader/page_text_receiver.rb, line 58 def show_text(string) # Tj (AWAY) internal_show_text(string) end
show_text_with_positioning(params)
click to toggle source
# File lib/pdf/reader/page_text_receiver.rb, line 62 def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)] params.each do |arg| if arg.is_a?(String) internal_show_text(arg) else @state.process_glyph_displacement(0, arg, false) end end end
Private Instance Methods
internal_show_text(string)
click to toggle source
# File lib/pdf/reader/page_text_receiver.rb, line 97 def internal_show_text(string) if @state.current_font.nil? raise PDF::Reader::MalformedPDFError, "current font is invalid" end glyphs = @state.current_font.unpack(string) glyphs.each_with_index do |glyph_code, index| # paint the current glyph newx, newy = @state.trm_transform(0,0) utf8_chars = @state.current_font.to_utf8(glyph_code) # apply to glyph displacment for the current glyph so the next # glyph will appear in the correct position glyph_width = @state.current_font.glyph_width(glyph_code) / 1000.0 th = 1 scaled_glyph_width = glyph_width * @state.font_size * th unless utf8_chars == SPACE @characters << TextRun.new(newx, newy, scaled_glyph_width, @state.font_size, utf8_chars) end @state.process_glyph_displacement(glyph_width, 0, utf8_chars == SPACE) end end