class Tesseract::API

Constants

Types

Public Class Methods

image_for(image) click to toggle source

Get a pointer to a tesseract-ocr usable image from a path, a string with the data or an IO stream.

# File lib/tesseract/api.rb, line 37
def self.image_for (image)
        Image.new(image)
end
new() click to toggle source
# File lib/tesseract/api.rb, line 56
def initialize
        @internal = FFI::AutoPointer.new(C::BaseAPI.create, self.class.method(:finalize))
end
to_language_code(code) click to toggle source

Transform a language code to tesseract-ocr usable codes

# File lib/tesseract/api.rb, line 43
def self.to_language_code (code)
        ISO_639.find(code.to_s.downcase).alpha3
rescue
        code.to_s
end

Public Instance Methods

all_word_confidences() click to toggle source
# File lib/tesseract/api.rb, line 206
def all_word_confidences
        C::BaseAPI.all_word_confidences(to_ffi)
end
clear() click to toggle source
# File lib/tesseract/api.rb, line 210
def clear
        C::BaseAPI.clear(to_ffi)
end
end() click to toggle source
# File lib/tesseract/api.rb, line 214
def end
        C::BaseAPI.end(to_ffi)
end
get_box(page = 0) click to toggle source
# File lib/tesseract/api.rb, line 182
def get_box (page = 0)
        pointer = C::BaseAPI.get_box_text(to_ffi, page)
        result  = pointer.read_string
        result.force_encoding 'UTF-8'

        result
ensure
        C.free_array_of_char(pointer)
end
get_hocr(page = 0) click to toggle source
# File lib/tesseract/api.rb, line 169
def get_hocr(page = 0)
        pointer = C::BaseAPI.get_hocr_text(to_ffi, page)

        return if pointer.null?

        result = pointer.read_string
        result.force_encoding 'UTF-8'

        result
ensure
        C.free_array_of_char(pointer) unless pointer.null?
end
get_iterator() click to toggle source
# File lib/tesseract/api.rb, line 152
def get_iterator
        Iterator.new(C::BaseAPI.get_iterator(to_ffi))
end
get_page_seg_mode() click to toggle source
# File lib/tesseract/api.rb, line 112
def get_page_seg_mode
        C::BaseAPI.get_page_seg_mode(to_ffi)
end
get_text() click to toggle source
# File lib/tesseract/api.rb, line 156
def get_text
        pointer = C::BaseAPI.get_utf8_text(to_ffi)

        return if pointer.null?

        result = pointer.read_string
        result.force_encoding 'UTF-8'

        result
ensure
        C.free_array_of_char(pointer) unless pointer.null?
end
get_unlv() click to toggle source
# File lib/tesseract/api.rb, line 192
def get_unlv
        pointer = C::BaseAPI.get_unlv_text(to_ffi)
        result  = pointer.read_string
        result.force_encoding 'ISO8859-1'

        result
ensure
        C.free_array_of_char(pointer)
end
get_variable(name, type = nil) click to toggle source
# File lib/tesseract/api.rb, line 80
def get_variable (name, type = nil)
        if type.nil?
                type = Types.keys.find { |type| C::BaseAPI.__send__ "has_#{type}_variable", to_ffi, name }

                if type
                        C::BaseAPI.__send__ "get_#{type}_variable", to_ffi, name
                end
        else
                unless Types.has_key?(type)
                        name, aliases = Types.find { |name, aliases| aliases.member?(type) }

                        raise ArgumentError, "unknown type #{type}" unless name

                        type = name
                end

                if C::BaseAPI.__send__ "has_#{type}_variable", to_ffi, name
                        C::BaseAPI.__send__ "get_#{type}_variable", to_ffi, name
                end
        end
end
init(datapath = nil, language = 'eng', mode = :DEFAULT) click to toggle source
# File lib/tesseract/api.rb, line 102
def init (datapath = nil, language = 'eng', mode = :DEFAULT)
        unless C::BaseAPI.init(to_ffi, datapath || Tesseract.prefix || '/usr/share', language.to_s, mode).zero?
                raise 'the API did not Init correctly'
        end
end
mean_text_confidence() click to toggle source
# File lib/tesseract/api.rb, line 202
def mean_text_confidence
        C::BaseAPI.mean_text_conf(to_ffi)
end
process_page(pix, page = 0, name = "") click to toggle source
# File lib/tesseract/api.rb, line 140
def process_page (pix, page = 0, name = "")
        result = C.create_string

        unless C::BaseAPI.process_page(to_ffi, pix.is_a?(Image) ? pix.to_ffi : pix, page, name, result)
                raise 'process_page failed'
        end

        C.string_content(result).read_string(C.string_length(result))
ensure
        C.destroy_string(result)
end
process_pages(name) click to toggle source
# File lib/tesseract/api.rb, line 128
def process_pages (name)
        result = C.create_string

        unless C::BaseAPI.process_pages(to_ffi, name, result)
                raise 'process_pages failed'
        end

        C.string_content(result).read_string(C.string_length(result))
ensure
        C.destroy_string(result)
end
read_config_file(path) click to toggle source
# File lib/tesseract/api.rb, line 108
def read_config_file (path)
        C::BaseAPI.read_config_file(to_ffi, path)
end
set_image(pix) click to toggle source
# File lib/tesseract/api.rb, line 120
def set_image (pix)
        C::BaseAPI.set_image(to_ffi, pix.is_a?(Image) ? pix.to_ffi : pix)
end
set_input_name(name) click to toggle source
# File lib/tesseract/api.rb, line 68
def set_input_name (name)
        C::BaseAPI.set_input_name(to_ffi, name)
end
set_output_name(name) click to toggle source
# File lib/tesseract/api.rb, line 72
def set_output_name (name)
        C::BaseAPI.set_output_name(to_ffi, name)
end
set_page_seg_mode(value) click to toggle source
# File lib/tesseract/api.rb, line 116
def set_page_seg_mode (value)
        C::BaseAPI.set_page_seg_mode(to_ffi, value)
end
set_rectangle(left, top, width, height) click to toggle source
# File lib/tesseract/api.rb, line 124
def set_rectangle (left, top, width, height)
        C::BaseAPI.set_rectangle(to_ffi, left, top, width, height)
end
set_variable(name, value) click to toggle source
# File lib/tesseract/api.rb, line 76
def set_variable (name, value)
        C::BaseAPI.set_variable(to_ffi, name, value)
end
to_ffi() click to toggle source
# File lib/tesseract/api.rb, line 218
def to_ffi
        @internal
end
version() click to toggle source
# File lib/tesseract/api.rb, line 64
def version
        C::BaseAPI.version(to_ffi)
end