class PDF::Reader::MarkupPage::PageBoldItalicReceiver
Builds a UTF-8 plaintext string and a UTF-8 string that includes simple Bold and Italic markup of all the text on a single page by processing all the operators in a content stream.
Public Instance Methods
content()
click to toggle source
Returns the value of the content attribute
Calls superclass method
# File lib/pdf/reader/markup/page_bold_italic_receiver.rb, line 49 def content lines = super.lines.to_a fixed = [] current_line = 0 offset = 0 formatted_lines = markup.lines.to_a lines.each_with_index do |line, index| formatted_line = formatted_lines[index + offset] if line.strip == "" and (formatted_line and formatted_lines[index + offset].strip != "") offset -= 1 else fixed << line end end lines = fixed.join("") lines end
markup()
click to toggle source
Returns the value of the markup attribute - equivalent to the content attribute but with bold and italic markup
# File lib/pdf/reader/markup/page_bold_italic_receiver.rb, line 29 def markup unless @text.empty? line = fix_markup("#{@text.join("").strip}#{@last_tag_end}") @lines << line @text = [] end if @footer.join("").strip.empty? if @lines.last.empty? output = @lines[0..-2].join("\n") else output = @lines.join("\n") end else output = %Q|#{@lines.join("\n")}\n#{@footer.join("")}| end output end
page=(page)
click to toggle source
starting a new page
Calls superclass method
# File lib/pdf/reader/markup/page_bold_italic_receiver.rb, line 16 def page=(page) super(page) @last_tag_end = "" @open_tag = "" @lasty = 0.0 @footer = [] @text = [] @lines = [] end
Private Instance Methods
append_line(tags, run)
click to toggle source
# File lib/pdf/reader/markup/page_bold_italic_receiver.rb, line 102 def append_line(tags, run) line = fix_markup("#{@text.join("").strip}#{@last_tag_end}") unless @lines.empty? and line.strip.empty? @lines << line end @last_tag_end = "" @text = ["#{tags[:open]}#{run.to_s}"] end
build_markup(newy, run)
click to toggle source
# File lib/pdf/reader/markup/page_bold_italic_receiver.rb, line 135 def build_markup(newy, run) tags = markup_tags(@state.current_font) if tags[:open] == @open_tag if newy < 50 @footer << run.to_s newy = @lasty else if newy < @lasty append_line(tags, run) else @text << "#{run.to_s}" end end else if newy < 50 @footer << "#{@last_tag_end}#{run.to_s}" newy = @lasty else if newy < @lasty append_line(tags, run) else @text << "#{@last_tag_end}#{tags[:open]}#{run.to_s}" end end @last_tag_end = tags[:close] end @open_tag = tags[:open] @lasty = newy end
fix_markup(string)
click to toggle source
# File lib/pdf/reader/markup/page_bold_italic_receiver.rb, line 70 def fix_markup(string) #get Nokogiri to close any open tags string = Nokogiri::HTML::fragment(string).to_html #strip empty markup tags while string =~ /<(?:b|i)>\s*<\/(?:b|i)>/ string = string.gsub(/<(?:b|i)>\s*<\/(?:b|i)>/, "").strip end string end
font_type(font, type)
click to toggle source
# File lib/pdf/reader/markup/page_bold_italic_receiver.rb, line 81 def font_type(font, type) if font.basefont.to_s.include?(type) return true end false end
internal_show_text(string)
click to toggle source
# File lib/pdf/reader/markup/page_bold_italic_receiver.rb, line 111 def internal_show_text(string) if @state.current_font.nil? raise PDF::Reader::MalformedPDFError, "current font is invalid" end glyphs = @state.current_font.unpack(string) text = "" glyphs.each_with_index do |glyph_code, index| # paint the current glyph newx, newy = @state.trm_transform(0,0) utf8_chars = @state.current_font.to_utf8(glyph_code) # apply to glyph displacment for the current glyph so the next # glyph will appear in the correct position glyph_width = @state.current_font.glyph_width(glyph_code) / 1000.0 th = 1 scaled_glyph_width = glyph_width * @state.font_size * th run = TextRun.new(newx, newy, scaled_glyph_width, @state.font_size, utf8_chars) @characters << run @state.process_glyph_displacement(glyph_width, 0, utf8_chars == SPACE) build_markup(newy, run) end end