24 #include "allheaders.h" 35 in_minor_direction_ =
false;
36 at_beginning_of_minor_run_ =
false;
37 preserve_interword_spaces_ =
false;
39 BoolParam *p = ParamUtils::FindParam<BoolParam>(
42 if (p != NULL) preserve_interword_spaces_ = (bool)(*p);
44 current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
45 MoveToLogicalStartOfTextline();
54 return current_paragraph_is_ltr_;
57 bool ResultIterator::CurrentParagraphIsLtr()
const {
61 it.RestartParagraph();
87 num_rtl = leftmost_rtl ? 1 : 0;
95 num_ltr += rightmost_ltr ? 1 : 0;
107 return num_ltr >= num_rtl;
114 void ResultIterator::CalculateBlobOrder(
116 bool context_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_;
117 blob_indices->
clear();
144 if (letter_types[i] == U_EURO_NUM && letter_types[i + 2] == U_EURO_NUM &&
145 (letter_types[i + 1] == U_EURO_NUM_SEP ||
146 letter_types[i + 1] == U_COMMON_NUM_SEP)) {
147 letter_types[i + 1] = U_EURO_NUM;
153 if (letter_types[i] == U_EURO_NUM_TERM) {
155 while (j <
word_length_ && letter_types[j] == U_EURO_NUM_TERM) { j++; }
156 if (j <
word_length_ && letter_types[j] == U_EURO_NUM) {
158 for (
int k = i; k < j; k++) letter_types[k] = U_EURO_NUM;
161 while (j > -1 && letter_types[j] == U_EURO_NUM_TERM) { j--; }
162 if (j > -1 && letter_types[j] == U_EURO_NUM) {
164 for (
int k = j; k <= i; k++) letter_types[k] = U_EURO_NUM;
172 int ti = letter_types[i];
173 if (ti == U_LTR || ti == U_EURO_NUM) {
177 int tj = letter_types[j];
178 if (tj == U_LTR || tj == U_EURO_NUM) {
180 }
else if (tj == U_COMMON_NUM_SEP || tj == U_OTHER_NEUTRAL) {
187 for (
int k = i; k <= last_good; k++) letter_types[k] = U_LTR;
190 letter_types[i] = U_RTL;
197 if (letter_types[i] == U_RTL) {
203 for (; j >= 0 && letter_types[j] != U_RTL; j--) { }
205 for (
int k = j + 1; k <= i; k++) blob_indices->
push_back(k);
213 for (
int i = 0; i < dirs.
size(); i++) {
226 bool paragraph_is_ltr,
227 const LTRResultIterator &resit,
234 bool paragraph_is_ltr,
235 const LTRResultIterator &resit,
240 directions = (dirs_arg != NULL) ? dirs_arg : &dirs;
248 directions->
push_back(ltr_it.WordDirection());
256 bool paragraph_is_ltr,
260 if (word_dirs.
size() == 0)
return;
264 int minor_direction, major_direction, major_step, start, end;
265 if (paragraph_is_ltr) {
267 end = word_dirs.
size();
272 start = word_dirs.
size() - 1;
281 int neutral_end = start;
282 while (neutral_end > 0 && word_dirs[neutral_end] ==
DIR_NEUTRAL) {
288 int left = neutral_end;
293 for (
int i = left; i < word_dirs.
size(); i++) {
302 for (
int i = start; i != end;) {
303 if (word_dirs[i] == minor_direction) {
305 while (j != end && word_dirs[j] != major_direction)
307 if (j == end) j -= major_step;
308 while (j != i && word_dirs[j] != minor_direction)
312 for (
int k = j; k != i; k -= major_step) {
326 int ResultIterator::LTRWordIndex()
const {
327 int this_word_index = 0;
329 textline.RestartRow();
330 while (!textline.PositionedAtSameWord(
it_)) {
334 return this_word_index;
337 void ResultIterator::MoveToLogicalStartOfWord() {
343 CalculateBlobOrder(&blob_order);
344 if (blob_order.
size() == 0 || blob_order[0] == 0)
return;
348 bool ResultIterator::IsAtFinalSymbolOfWord()
const {
351 CalculateBlobOrder(&blob_order);
355 bool ResultIterator::IsAtFirstSymbolOfWord()
const {
358 CalculateBlobOrder(&blob_order);
362 void ResultIterator::AppendSuffixMarks(
STRING *text)
const {
364 bool reading_direction_is_ltr =
365 current_paragraph_is_ltr_ ^ in_minor_direction_;
373 *
this, &textline_order);
374 int this_word_index = LTRWordIndex();
375 int i = textline_order.
get_index(this_word_index);
378 int last_non_word_mark = 0;
379 for (i++; i < textline_order.
size() && textline_order[i] < 0; i++) {
380 last_non_word_mark = textline_order[i];
383 *text += reading_direction_is_ltr ?
kLRM :
kRLM;
385 if (current_paragraph_is_ltr_) {
393 void ResultIterator::MoveToLogicalStartOfTextline() {
397 dynamic_cast<const LTRResultIterator&>(*
this),
400 for (; i < word_indices.
size() && word_indices[i] < 0; i++) {
401 if (word_indices[i] ==
kMinorRunStart) in_minor_direction_ =
true;
402 else if (word_indices[i] ==
kMinorRunEnd) in_minor_direction_ =
false;
404 if (in_minor_direction_) at_beginning_of_minor_run_ =
true;
405 if (i >= word_indices.
size())
return;
406 int first_word_index = word_indices[i];
407 for (
int j = 0; j < first_word_index; j++) {
410 MoveToLogicalStartOfWord();
415 current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
416 in_minor_direction_ =
false;
417 at_beginning_of_minor_run_ =
false;
418 MoveToLogicalStartOfTextline();
422 if (
it_->
block() == NULL)
return false;
431 current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
433 in_minor_direction_ =
false;
434 MoveToLogicalStartOfTextline();
439 CalculateBlobOrder(&blob_order);
441 while (next_blob < blob_order.
size() &&
445 if (next_blob < blob_order.
size()) {
448 at_beginning_of_minor_run_ =
false;
457 int this_word_index = LTRWordIndex();
461 int final_real_index = word_indices.
size() - 1;
462 while (final_real_index > 0 && word_indices[final_real_index] < 0)
464 for (
int i = 0; i < final_real_index; i++) {
465 if (word_indices[i] == this_word_index) {
467 for (; j < final_real_index && word_indices[j] < 0; j++) {
468 if (word_indices[j] ==
kMinorRunStart) in_minor_direction_ =
true;
469 if (word_indices[j] ==
kMinorRunEnd) in_minor_direction_ =
false;
471 at_beginning_of_minor_run_ = (word_indices[j - 1] ==
kMinorRunStart);
474 tprintf(
"Next(RIL_WORD): %d -> %d\n",
475 this_word_index, word_indices[j]);
478 for (
int k = 0; k < word_indices[j]; k++) {
481 MoveToLogicalStartOfWord();
486 tprintf(
"Next(RIL_WORD): %d -> EOL\n", this_word_index);
497 if (
it_->
block() == NULL)
return false;
498 if (
it_->
word() == NULL)
return true;
501 bool at_word_start = IsAtFirstSymbolOfWord();
502 if (level ==
RIL_WORD)
return at_word_start;
506 line_start.MoveToLogicalStartOfTextline();
508 bool at_textline_start = at_word_start && *line_start.
it_ == *
it_;
513 bool at_block_start = at_textline_start &&
515 if (level ==
RIL_BLOCK)
return at_block_start;
517 bool at_para_start = at_block_start ||
518 (at_textline_start &&
521 if (level ==
RIL_PARA)
return at_para_start;
534 if (
Empty(element))
return true;
543 if (next.
Empty(element))
return true;
544 while (element > level) {
557 if (
it_->
word() == NULL)
return NULL;
564 pp.AppendUTF8ParagraphText(&text);
569 AppendUTF8ParagraphText(&text);
574 it.MoveToLogicalStartOfTextline();
575 it.IterateAndAppendUTF8TextlineText(&text);
579 AppendUTF8WordText(&text);
583 bool reading_direction_is_ltr =
584 current_paragraph_is_ltr_ ^ in_minor_direction_;
585 if (at_beginning_of_minor_run_) {
586 text += reading_direction_is_ltr ?
kLRM :
kRLM;
589 if (IsAtFinalSymbolOfWord()) AppendSuffixMarks(&text);
593 int length = text.
length() + 1;
594 char* result =
new char[length];
595 strncpy(result, text.
string(), length);
599 void ResultIterator::AppendUTF8WordText(
STRING *text)
const {
602 bool reading_direction_is_ltr =
603 current_paragraph_is_ltr_ ^ in_minor_direction_;
604 if (at_beginning_of_minor_run_) {
605 *text += reading_direction_is_ltr ?
kLRM :
kRLM;
609 CalculateBlobOrder(&blob_order);
610 for (
int i = 0; i < blob_order.
size(); i++) {
611 *text +=
it_->
word()->
BestUTF8(blob_order[i], !reading_direction_is_ltr);
613 AppendSuffixMarks(text);
616 void ResultIterator::IterateAndAppendUTF8TextlineText(
STRING *text) {
625 *
this, &dirs, &textline_order);
627 current_paragraph_is_ltr_ ?
"ltr" :
"rtl");
628 PrintScriptDirs(dirs);
630 current_paragraph_is_ltr_ ?
"ltr" :
"rtl");
631 for (
int i = 0; i < textline_order.
size(); i++) {
632 tprintf(
"%d ", textline_order[i]);
637 int words_appended = 0;
640 : (words_appended > 0);
641 for (
int i = 0; i < numSpaces; ++i) {
644 AppendUTF8WordText(text);
648 tprintf(
"%d words printed\n", words_appended);
656 void ResultIterator::AppendUTF8ParagraphText(
STRING *text)
const {
658 it.RestartParagraph();
659 it.MoveToLogicalStartOfTextline();
662 it.IterateAndAppendUTF8TextlineText(text);
663 }
while (it.it_->block() != NULL && !it.IsAtBeginningOf(
RIL_PARA));
666 bool ResultIterator::BidiDebug(
int min_level)
const {
668 IntParam *p = ParamUtils::FindParam<IntParam>(
671 if (p != NULL) debug_level = (
inT32)(*p);
672 return debug_level >= min_level;
GenericVector< BoolParam * > bool_params
tesseract::ParamsVectors * GlobalParams()
BLOCK_RES * prev_block() const
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
virtual bool Next(PageIteratorLevel level)
bool IsWithinFirstTextlineOfParagraph() const
bool ParagraphIsLtr() const
const char * BestUTF8(int blob_index, bool in_rtl_context) const
virtual char * GetUTF8Text(PageIteratorLevel level) const
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
int get_index(T object) const
TESS_LOCAL void BeginWord(int offset)
WERD_CHOICE * best_choice
bool UnicharsInReadingOrder() const
GenericVector< IntParam * > int_params
const char * string() const
const char * paragraph_separator_
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
bool Empty(PageIteratorLevel level) const
virtual bool Next(PageIteratorLevel level)
const char * line_separator_
static const int kMinorRunEnd
virtual void RestartRow()
virtual bool IsAtBeginningOf(PageIteratorLevel level) const
static const int kMinorRunStart
ROW_RES * prev_row() const
BLOCK_RES * block() const
static const int kComplexWord
UNICHARSET::Direction SymbolDirection(int blob_index) const
TESS_LOCAL ResultIterator(const LTRResultIterator &resit)
static void CalculateTextlineOrder(bool paragraph_is_ltr, const GenericVector< StrongScriptDirection > &word_dirs, GenericVectorEqEq< int > *reading_order)