# Buildsheet autogenerated by ravenadm tool -- Do not edit.

NAMEBASE=		python-pyocr
VERSION=		0.8.5
KEYWORDS=		python
VARIANTS=		v13 v14
SDESC[v13]=		Wrapper for OCR engines (Tesseract, etc) (3.13)
SDESC[v14]=		Wrapper for OCR engines (Tesseract, etc) (3.14)
HOMEPAGE=		none
CONTACT=		Python_Automaton[python@ironwolf.systems]

DOWNLOAD_GROUPS=	main
SITES[main]=		PYPIWHL/91/1c/3ef6485732685ad9c938c9ebb3fad0570f58bdc54e1242cdfa40040a630e
DISTFILE[1]=		pyocr-0.8.5-py3-none-any.whl:main
DIST_SUBDIR=		python-src
DF_INDEX=		1
SPKGS[v13]=		single
SPKGS[v14]=		single

OPTIONS_AVAILABLE=	PY313 PY314
OPTIONS_STANDARD=	none
VOPTS[v13]=		PY313=ON PY314=OFF
VOPTS[v14]=		PY313=OFF PY314=ON

RUN_DEPENDS=		tesseract:tools:std

DISTNAME=		pyocr-0.8.5.dist-info

GENERATED=		yes

[PY313].RUN_DEPENDS_ON=			python-Pillow:single:v13
[PY313].USES_ON=			python:v13,wheel

[PY314].RUN_DEPENDS_ON=			python-Pillow:single:v14
[PY314].USES_ON=			python:v14,wheel

[FILE:2514:descriptions/desc.single]
# PyOCR

PyOCR is an optical character recognition (OCR) tool wrapper for python.
That is, it helps using various OCR tools from a Python program.

It has been tested only on GNU/Linux systems. It should also work on
similar
systems (*BSD, etc). It may or may not work on Windows, MacOSX, etc.

## Supported OCR tools

* Libtesseract (Python bindings for the C API)
* Tesseract (wrapper: fork + exec)
* Cuneiform (wrapper: fork + exec)

## Features

* Supports all the image formats supported by [Pillow],
  including jpeg, png, gif, bmp, tiff and others
* Various output types: text only, bounding boxes, etc.
* Orientation detection (Tesseract and libtesseract only)
* Can focus on digits only (Tesseract and libtesseract only)
* Can save and reload boxes in hOCR format
* PDF generation (libtesseract only)

## Limitations

* hOCR: Only a subset of the specification is supported. For instance,
pages and
  paragraph positions are not stored.

## Installation

```sh
sudo pip3 install pyocr  # Python 3.X
```

or the manual way:
```sh
mkdir -p ~/git ; cd git
git clone https://gitlab.gnome.org/World/OpenPaperwork/pyocr.git
cd pyocr
make install  # will run 'python ./setup.py install'
```

## Usage

### Initialization

```Python
from PIL import Image
import sys

import pyocr
import pyocr.builders

tools = pyocr.get_available_tools()
if len(tools) == 0:
    print("No OCR tool found")
    sys.exit(1)
# The tools are returned in the recommended order of usage
tool = tools[0]
print("Will use tool '%s'" % (tool.get_name()))
# Ex: Will use tool 'libtesseract'

langs = tool.get_available_languages()
print("Available languages: %s" % ", ".join(langs))
lang = langs[0]
print("Will use lang '%s'" % (lang))
# Ex: Will use lang 'fra'
# Note that languages are NOT sorted in any way. Please refer
# to the system locale settings for the default language
# to use.
```

### Image to text

```Python
txt = tool.image_to_string(
    Image.open('test.png'),
    lang=lang,
    builder=pyocr.builders.TextBuilder()
)
# txt is a Python string

word_boxes = tool.image_to_string(
    Image.open('test.png'),
    lang="eng",
    builder=pyocr.builders.WordBoxBuilder()
)
# list of box objects. For each box object:
#   box.content is the word in the box
#   box.position is its position on the page (in pixels)
#
# Beware that some OCR tools (Tesseract for instance)
# may return empty boxes

line_and_word_boxes = tool.image_to_string(
    Image.open('test.png'), lang="fra",
    builder=pyocr.builders.LineBoxBuilder()


[FILE:118:distinfo]
3a534eee5ac6ce681159d67528b8953f0f2a7a5aad8733373ea7b3e8ac13a0b4        40003 python-src/pyocr-0.8.5-py3-none-any.whl