class Tesseract::API
Constants
- Types
Public Class Methods
image_for(image)
click to toggle source
Get a pointer to a tesseract-ocr usable image from a path, a string with the data or an IO stream.
# File lib/tesseract/api.rb, line 37 def self.image_for (image) Image.new(image) end
new()
click to toggle source
# File lib/tesseract/api.rb, line 56 def initialize @internal = FFI::AutoPointer.new(C::BaseAPI.create, self.class.method(:finalize)) end
to_language_code(code)
click to toggle source
Transform a language code to tesseract-ocr usable codes
# File lib/tesseract/api.rb, line 43 def self.to_language_code (code) ISO_639.find(code.to_s.downcase).alpha3 rescue code.to_s end
Public Instance Methods
all_word_confidences()
click to toggle source
# File lib/tesseract/api.rb, line 206 def all_word_confidences C::BaseAPI.all_word_confidences(to_ffi) end
clear()
click to toggle source
# File lib/tesseract/api.rb, line 210 def clear C::BaseAPI.clear(to_ffi) end
end()
click to toggle source
# File lib/tesseract/api.rb, line 214 def end C::BaseAPI.end(to_ffi) end
get_box(page = 0)
click to toggle source
# File lib/tesseract/api.rb, line 182 def get_box (page = 0) pointer = C::BaseAPI.get_box_text(to_ffi, page) result = pointer.read_string result.force_encoding 'UTF-8' result ensure C.free_array_of_char(pointer) end
get_hocr(page = 0)
click to toggle source
# File lib/tesseract/api.rb, line 169 def get_hocr(page = 0) pointer = C::BaseAPI.get_hocr_text(to_ffi, page) return if pointer.null? result = pointer.read_string result.force_encoding 'UTF-8' result ensure C.free_array_of_char(pointer) unless pointer.null? end
get_iterator()
click to toggle source
# File lib/tesseract/api.rb, line 152 def get_iterator Iterator.new(C::BaseAPI.get_iterator(to_ffi)) end
get_page_seg_mode()
click to toggle source
# File lib/tesseract/api.rb, line 112 def get_page_seg_mode C::BaseAPI.get_page_seg_mode(to_ffi) end
get_text()
click to toggle source
# File lib/tesseract/api.rb, line 156 def get_text pointer = C::BaseAPI.get_utf8_text(to_ffi) return if pointer.null? result = pointer.read_string result.force_encoding 'UTF-8' result ensure C.free_array_of_char(pointer) unless pointer.null? end
get_unlv()
click to toggle source
# File lib/tesseract/api.rb, line 192 def get_unlv pointer = C::BaseAPI.get_unlv_text(to_ffi) result = pointer.read_string result.force_encoding 'ISO8859-1' result ensure C.free_array_of_char(pointer) end
get_variable(name, type = nil)
click to toggle source
# File lib/tesseract/api.rb, line 80 def get_variable (name, type = nil) if type.nil? type = Types.keys.find { |type| C::BaseAPI.__send__ "has_#{type}_variable", to_ffi, name } if type C::BaseAPI.__send__ "get_#{type}_variable", to_ffi, name end else unless Types.has_key?(type) name, aliases = Types.find { |name, aliases| aliases.member?(type) } raise ArgumentError, "unknown type #{type}" unless name type = name end if C::BaseAPI.__send__ "has_#{type}_variable", to_ffi, name C::BaseAPI.__send__ "get_#{type}_variable", to_ffi, name end end end
init(datapath = nil, language = 'eng', mode = :DEFAULT)
click to toggle source
# File lib/tesseract/api.rb, line 102 def init (datapath = nil, language = 'eng', mode = :DEFAULT) unless C::BaseAPI.init(to_ffi, datapath || Tesseract.prefix || '/usr/share', language.to_s, mode).zero? raise 'the API did not Init correctly' end end
mean_text_confidence()
click to toggle source
# File lib/tesseract/api.rb, line 202 def mean_text_confidence C::BaseAPI.mean_text_conf(to_ffi) end
process_page(pix, page = 0, name = "")
click to toggle source
# File lib/tesseract/api.rb, line 140 def process_page (pix, page = 0, name = "") result = C.create_string unless C::BaseAPI.process_page(to_ffi, pix.is_a?(Image) ? pix.to_ffi : pix, page, name, result) raise 'process_page failed' end C.string_content(result).read_string(C.string_length(result)) ensure C.destroy_string(result) end
process_pages(name)
click to toggle source
# File lib/tesseract/api.rb, line 128 def process_pages (name) result = C.create_string unless C::BaseAPI.process_pages(to_ffi, name, result) raise 'process_pages failed' end C.string_content(result).read_string(C.string_length(result)) ensure C.destroy_string(result) end
read_config_file(path)
click to toggle source
# File lib/tesseract/api.rb, line 108 def read_config_file (path) C::BaseAPI.read_config_file(to_ffi, path) end
set_image(pix)
click to toggle source
# File lib/tesseract/api.rb, line 120 def set_image (pix) C::BaseAPI.set_image(to_ffi, pix.is_a?(Image) ? pix.to_ffi : pix) end
set_input_name(name)
click to toggle source
# File lib/tesseract/api.rb, line 68 def set_input_name (name) C::BaseAPI.set_input_name(to_ffi, name) end
set_output_name(name)
click to toggle source
# File lib/tesseract/api.rb, line 72 def set_output_name (name) C::BaseAPI.set_output_name(to_ffi, name) end
set_page_seg_mode(value)
click to toggle source
# File lib/tesseract/api.rb, line 116 def set_page_seg_mode (value) C::BaseAPI.set_page_seg_mode(to_ffi, value) end
set_rectangle(left, top, width, height)
click to toggle source
# File lib/tesseract/api.rb, line 124 def set_rectangle (left, top, width, height) C::BaseAPI.set_rectangle(to_ffi, left, top, width, height) end
set_variable(name, value)
click to toggle source
# File lib/tesseract/api.rb, line 76 def set_variable (name, value) C::BaseAPI.set_variable(to_ffi, name, value) end
to_ffi()
click to toggle source
# File lib/tesseract/api.rb, line 218 def to_ffi @internal end
version()
click to toggle source
# File lib/tesseract/api.rb, line 64 def version C::BaseAPI.version(to_ffi) end