tesseract
3.05.02
|
#include <resultiterator.h>
Public Member Functions | |
virtual | ~ResultIterator () |
virtual void | Begin () |
virtual bool | Next (PageIteratorLevel level) |
virtual bool | IsAtBeginningOf (PageIteratorLevel level) const |
virtual bool | IsAtFinalElement (PageIteratorLevel level, PageIteratorLevel element) const |
virtual char * | GetUTF8Text (PageIteratorLevel level) const |
bool | ParagraphIsLtr () const |
Public Member Functions inherited from tesseract::LTRResultIterator | |
LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height) | |
virtual | ~LTRResultIterator () |
char * | GetUTF8Text (PageIteratorLevel level) const |
void | SetLineSeparator (const char *new_line) |
void | SetParagraphSeparator (const char *new_para) |
float | Confidence (PageIteratorLevel level) const |
void | RowAttributes (float *row_height, float *descenders, float *ascenders) const |
const char * | WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const |
const char * | WordRecognitionLanguage () const |
StrongScriptDirection | WordDirection () const |
bool | WordIsFromDictionary () const |
bool | WordIsNumeric () const |
bool | HasBlamerInfo () const |
const void * | GetParamsTrainingBundle () const |
const char * | GetBlamerDebug () const |
const char * | GetBlamerMisadaptionDebug () const |
bool | HasTruthString () const |
bool | EquivalentToTruth (const char *str) const |
char * | WordTruthUTF8Text () const |
char * | WordNormedUTF8Text () const |
const char * | WordLattice (int *lattice_size) const |
bool | SymbolIsSuperscript () const |
bool | SymbolIsSubscript () const |
bool | SymbolIsDropcap () const |
Public Member Functions inherited from tesseract::PageIterator | |
PageIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height) | |
virtual | ~PageIterator () |
PageIterator (const PageIterator &src) | |
const PageIterator & | operator= (const PageIterator &src) |
bool | PositionedAtSameWord (const PAGE_RES_IT *other) const |
virtual void | RestartParagraph () |
bool | IsWithinFirstTextlineOfParagraph () const |
virtual void | RestartRow () |
int | Cmp (const PageIterator &other) const |
void | SetBoundingBoxComponents (bool include_upper_dots, bool include_lower_dots) |
bool | BoundingBox (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const |
bool | BoundingBox (PageIteratorLevel level, const int padding, int *left, int *top, int *right, int *bottom) const |
bool | BoundingBoxInternal (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const |
bool | Empty (PageIteratorLevel level) const |
PolyBlockType | BlockType () const |
Pta * | BlockPolygon () const |
Pix * | GetBinaryImage (PageIteratorLevel level) const |
Pix * | GetImage (PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const |
bool | Baseline (PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const |
void | Orientation (tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const |
void | ParagraphInfo (tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const |
bool | SetWordBlamerBundle (BlamerBundle *blamer_bundle) |
Static Public Member Functions | |
static ResultIterator * | StartOfParagraph (const LTRResultIterator &resit) |
static void | CalculateTextlineOrder (bool paragraph_is_ltr, const GenericVector< StrongScriptDirection > &word_dirs, GenericVectorEqEq< int > *reading_order) |
Static Public Attributes | |
static const int | kMinorRunStart = -1 |
static const int | kMinorRunEnd = -2 |
static const int | kComplexWord = -3 |
Protected Member Functions | |
TESS_LOCAL | ResultIterator (const LTRResultIterator &resit) |
Protected Member Functions inherited from tesseract::PageIterator | |
TESS_LOCAL void | BeginWord (int offset) |
Additional Inherited Members | |
Protected Attributes inherited from tesseract::LTRResultIterator | |
const char * | line_separator_ |
const char * | paragraph_separator_ |
Protected Attributes inherited from tesseract::PageIterator | |
PAGE_RES * | page_res_ |
Tesseract * | tesseract_ |
PAGE_RES_IT * | it_ |
WERD * | word_ |
int | word_length_ |
int | blob_index_ |
C_BLOB_IT * | cblob_it_ |
bool | include_upper_dots_ |
bool | include_lower_dots_ |
int | scale_ |
int | scaled_yres_ |
int | rect_left_ |
int | rect_top_ |
int | rect_width_ |
int | rect_height_ |
Definition at line 38 of file resultiterator.h.
|
inlinevirtual |
ResultIterator is copy constructible! The default copy constructor works just fine for us.
Definition at line 46 of file resultiterator.h.
|
explicitprotected |
We presume the data associated with the given iterator will outlive us. NB: This is private because it does something that is non-obvious: it resets to the beginning of the paragraph instead of staying wherever resit might have pointed.
Definition at line 33 of file resultiterator.cpp.
|
virtual |
Moves the iterator to point to the start of the page to begin an iteration.
Reimplemented from tesseract::PageIterator.
Definition at line 413 of file resultiterator.cpp.
|
static |
Yields the reading order as a sequence of indices and (optional) meta-marks for a set of words (given left-to-right). The meta marks are passed as negative values: kMinorRunStart Start of minor direction text. kMinorRunEnd End of minor direction text. kComplexWord The next indexed word contains both left-to-right and right-to-left characters and was treated as neutral.
For example, suppose we have five words in a text line, indexed [0,1,2,3,4] from the leftmost side of the text line. The following are all believable reading_orders:
Left-to-Right (in ltr paragraph): { 0, 1, 2, 3, 4 } Left-to-Right (in rtl paragraph): { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd } Right-to-Left (in rtl paragraph): { 4, 3, 2, 1, 0 } Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph: { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
Definition at line 255 of file resultiterator.cpp.
|
virtual |
Returns the null terminated UTF-8 encoded text string for the current object at the given level. Use delete [] to free after use.
Definition at line 556 of file resultiterator.cpp.
|
virtual |
IsAtBeginningOf() returns whether we're at the logical beginning of the given level. (as opposed to ResultIterator's left-to-right top-to-bottom order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf(). For a full description, see pageiterator.h
Reimplemented from tesseract::PageIterator.
Definition at line 496 of file resultiterator.cpp.
|
virtual |
Implement PageIterator's IsAtFinalElement correctly in a BiDi context. For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we point at the last word in a paragraph. See PageIterator for full comment.
NOTE! This is an exact copy of PageIterator::IsAtFinalElement with the change that the variable next is now a ResultIterator instead of a PageIterator.
Reimplemented from tesseract::PageIterator.
Definition at line 532 of file resultiterator.cpp.
|
virtual |
Moves to the start of the next object at the given level in the page hierarchy in the appropriate reading order and returns false if the end of the page was reached. NOTE that RIL_SYMBOL will skip non-text blocks, but all other PageIteratorLevel level values will visit each non-text block once. Think of non text blocks as containing a single para, with a single line, with a single imaginary word. Calls to Next with different levels may be freely intermixed. This function iterates words in right-to-left scripts correctly, if the appropriate language has been loaded into Tesseract.
Reimplemented from tesseract::PageIterator.
Definition at line 421 of file resultiterator.cpp.
bool tesseract::ResultIterator::ParagraphIsLtr | ( | ) | const |
Return whether the current paragraph's dominant reading direction is left-to-right (as opposed to right-to-left).
Definition at line 53 of file resultiterator.cpp.
|
static |
Definition at line 48 of file resultiterator.cpp.
|
static |
Definition at line 130 of file resultiterator.h.
|
static |
Definition at line 129 of file resultiterator.h.
|
static |
Definition at line 128 of file resultiterator.h.