34 #include "unicode/uchar.h" 48 box_ = boxCreate(x, y, width, height);
53 std::vector<BoxChar*>* boxes) {
54 for (
int i = 0; i < boxes->size(); ++i) {
55 BOX*
box = (*boxes)[i]->box_;
71 for (
int i = 0; i < boxes->size(); ++i) {
72 if ((*boxes)[i]->box_ == NULL)
tprintf(
"Null box at index %d\n", i);
82 std::vector<BoxChar*>* boxes) {
85 for (
int i = 0; i < boxes->size(); ++i) {
86 Box*
box = (*boxes)[i]->box_;
88 if (prev_i < 0 || prev_i < i - 1 || i + 1 == boxes->size()) {
92 boxes->erase(boxes->begin() + i);
94 }
while (i >= 0 && i + 1 == boxes->size() && (*boxes)[i]->box_ == NULL);
99 Box* prev_box = (*boxes)[prev_i]->box_;
100 int shift =
box->x - prev_box->x;
101 if (vertical_rules) {
102 shift =
box->y - prev_box->y;
103 }
else if (rtl_rules) {
106 if (-shift > max_shift) {
108 int width = prev_box->w;
109 int height = prev_box->h;
110 int x = prev_box->x + width;
112 if (vertical_rules) {
114 y = prev_box->y + height;
115 }
else if (rtl_rules) {
116 x = prev_box->x - width;
118 tprintf(
"prev x = %d, width=%d\n", prev_box->x, width);
122 if (prev_i == i - 1) {
125 new_box->
AddBox(x, y, width, height);
126 new_box->page_ = (*boxes)[i]->page_;
127 boxes->insert(boxes->begin() + i, new_box);
130 (*boxes)[i - 1]->AddBox(x, y, width, height);
131 (*boxes)[i - 1]->ch_ =
"\t";
134 }
else if (shift > max_shift) {
145 std::vector<BoxChar*>* boxes) {
148 for (
int i = 1; i + 1 < boxes->size(); ++i) {
149 Box*
box = (*boxes)[i]->box_;
151 Box* prev = (*boxes)[i - 1]->box_;
152 Box* next = (*boxes)[i + 1]->box_;
154 int top =
MIN(prev->y, next->y);
155 int bottom =
MAX(prev->y + prev->h, next->y + next->h);
156 int left = prev->x + prev->w;
158 if (vertical_rules) {
159 top = prev->y + prev->h;
161 left =
MIN(prev->x, next->x);
162 right =
MAX(prev->x + prev->w, next->x + next->w);
163 }
else if (rtl_rules) {
168 left = next->x + next->w;
170 j >= 0 && (*boxes)[j]->ch_ !=
" " && (*boxes)[j]->ch_ !=
"\t";
172 prev = (*boxes)[j]->box_;
174 if (prev->x < right) {
180 for (
int j = i + 2; j < boxes->size() && (*boxes)[j]->box_ != NULL &&
181 (*boxes)[j]->ch_ !=
"\t";
183 next = (*boxes)[j]->box_;
184 if (next->x + next->w > left) {
185 left = next->x + next->w;
191 if (right <= left) right = left + 1;
192 if (bottom <= top) bottom = top + 1;
193 (*boxes)[i]->AddBox(left, top, right - left, bottom - top);
194 (*boxes)[i]->ch_ =
" ";
206 for (
int start = 0; start < boxes->size(); start = end + 1) {
208 while (end < boxes->size() && (*boxes)[end]->ch_ !=
"\t") ++end;
209 sort(boxes->begin() + start, boxes->begin() + end, sorter);
216 int num_rtl = 0, num_ltr = 0;
217 for (
int i = 0; i < boxes.size(); ++i) {
221 tprintf(
"Illegal utf8 in boxchar %d string:%s = ", i,
222 boxes[i]->ch_.c_str());
223 for (
int c = 0; c < boxes[i]->ch_.size(); ++c) {
224 tprintf(
" 0x%x", boxes[i]->ch_[c]);
229 for (
int j = 0; j < uni_vector.
size(); ++j) {
230 UCharDirection dir = u_charDirection(uni_vector[j]);
231 if (dir == U_RIGHT_TO_LEFT || dir == U_RIGHT_TO_LEFT_ARABIC ||
232 dir == U_ARABIC_NUMBER) {
239 return num_rtl > num_ltr;
245 inT64 total_dx = 0, total_dy = 0;
246 for (
int i = 1; i < boxes.size(); ++i) {
247 if (boxes[i - 1]->box_ != NULL && boxes[i]->box_ != NULL &&
248 boxes[i - 1]->page_ == boxes[i]->page_) {
249 int dx = boxes[i]->box_->x - boxes[i - 1]->box_->x;
250 int dy = boxes[i]->box_->y - boxes[i - 1]->box_->y;
258 return total_dy > total_dx;
264 int total_length = 0;
265 for (
int i = 0; i < boxes.size(); ++i) total_length += boxes[i]->ch_.size();
273 int start_box,
int end_box,
274 std::vector<BoxChar*>* boxes) {
275 Boxa* orig = boxaCreate(0);
276 for (
int i = start_box; i < end_box; ++i) {
277 BOX*
box = (*boxes)[i]->box_;
278 if (
box) boxaAddBox(orig,
box, L_CLONE);
280 Boxa* rotated = boxaRotate(orig, xcenter, ycenter, rotation);
282 for (
int i = start_box, box_ind = 0; i < end_box; ++i) {
283 if ((*boxes)[i]->box_) {
284 boxDestroy(&((*boxes)[i]->box_));
285 (*boxes)[i]->box_ = boxaGetBox(rotated, box_ind++, L_CLONE);
288 boxaDestroy(&rotated);
294 const std::vector<BoxChar*>& boxes) {
301 const std::vector<BoxChar*>& boxes) {
304 for (
int i = 0; i < boxes.size(); ++i) {
305 const Box*
box = boxes[i]->box_;
307 tprintf(
"Error: Call PrepareToWrite before WriteTesseractBoxFile!!\n");
312 boxes[i]->ch_.c_str(),
box->x, height -
box->y -
box->h,
313 box->x +
box->w, height -
box->y, boxes[i]->page_);
314 output.append(buffer, nbytes);
static void TranslateBoxes(int xshift, int yshift, vector< BoxChar *> *boxes)
static void ReorderRTLText(vector< BoxChar *> *boxes)
static bool UTF8ToUnicode(const char *utf8_str, GenericVector< int > *unicodes)
void AddBox(int x, int y, int width, int height)
BoxChar(const char *utf8_str, int len)
static string GetTesseractBoxStr(int height, const vector< BoxChar *> &boxes)
static void PrepareToWrite(vector< BoxChar *> *boxes)
static void WriteTesseractBoxFile(const string &name, int height, const vector< BoxChar *> &boxes)
static void InsertNewlines(bool rtl_rules, bool vertical_rules, vector< BoxChar *> *boxes)
static int TotalByteLength(const vector< BoxChar *> &boxes)
static bool ContainsMostlyRTL(const vector< BoxChar *> &boxes)
static void RotateBoxes(float rotation, int xcenter, int ycenter, int start_box, int end_box, vector< BoxChar *> *boxes)
const int kMinNewlineRatio
static void InsertSpaces(bool rtl_rules, bool vertical_rules, vector< BoxChar *> *boxes)
static void WriteStringToFileOrDie(const string &str, const string &filename)
static bool MostlyVertical(const vector< BoxChar *> &boxes)