27 #define MAX_LINE_LEN 1024 54 if (label32_ != NULL) {
63 if (label32_ != NULL) {
72 if (label32_ != NULL) {
78 SetLabel(reinterpret_cast<const char_32 *>(str32.c_str()));
86 unsigned short first_char;
87 unsigned short last_char;
88 unsigned short norm_top;
89 unsigned short norm_bottom;
90 unsigned short norm_aspect_ratio;
96 if (fp->
Read(&val32,
sizeof(val32)) !=
sizeof(val32)) {
99 if (val32 != 0xabd0fefe) {
103 if (fp->
Read(&val32,
sizeof(val32)) !=
sizeof(val32)) {
108 label32 =
new char_32[val32 + 1];
110 if (fp->
Read(label32, val32 *
sizeof(*label32)) !=
111 (val32 *
sizeof(*label32))) {
121 if (fp->
Read(&page,
sizeof(page)) !=
sizeof(page)) {
125 if (fp->
Read(&left,
sizeof(left)) !=
sizeof(left)) {
129 if (fp->
Read(&top,
sizeof(top)) !=
sizeof(top)) {
133 if (fp->
Read(&first_char,
sizeof(first_char)) !=
sizeof(first_char)) {
137 if (fp->
Read(&last_char,
sizeof(last_char)) !=
sizeof(last_char)) {
141 if (fp->
Read(&norm_top,
sizeof(norm_top)) !=
sizeof(norm_top)) {
145 if (fp->
Read(&norm_bottom,
sizeof(norm_bottom)) !=
sizeof(norm_bottom)) {
149 if (fp->
Read(&norm_aspect_ratio,
sizeof(norm_aspect_ratio)) !=
150 sizeof(norm_aspect_ratio)) {
157 char_samp->label32_ = label32;
158 char_samp->page_ = page;
159 char_samp->left_ = left;
160 char_samp->top_ = top;
161 char_samp->first_char_ = first_char;
162 char_samp->last_char_ = last_char;
163 char_samp->norm_top_ = norm_top;
164 char_samp->norm_bottom_ = norm_bottom;
165 char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
179 unsigned short first_char;
180 unsigned short last_char;
181 unsigned short norm_top;
182 unsigned short norm_bottom;
183 unsigned short norm_aspect_ratio;
188 if (fread(&val32, 1,
sizeof(val32), fp) !=
sizeof(val32)) {
191 if (val32 != 0xabd0fefe) {
195 if (fread(&val32, 1,
sizeof(val32), fp) !=
sizeof(val32)) {
200 label32 =
new char_32[val32 + 1];
202 if (fread(label32, 1, val32 *
sizeof(*label32), fp) !=
203 (val32 *
sizeof(*label32))) {
213 if (fread(&page, 1,
sizeof(page), fp) !=
sizeof(page) ||
214 fread(&left, 1,
sizeof(left), fp) !=
sizeof(left) ||
215 fread(&top, 1,
sizeof(top), fp) !=
sizeof(top) ||
216 fread(&first_char, 1,
sizeof(first_char), fp) !=
sizeof(first_char) ||
217 fread(&last_char, 1,
sizeof(last_char), fp) !=
sizeof(last_char) ||
218 fread(&norm_top, 1,
sizeof(norm_top), fp) !=
sizeof(norm_top) ||
219 fread(&norm_bottom, 1,
sizeof(norm_bottom), fp) !=
sizeof(norm_bottom) ||
220 fread(&norm_aspect_ratio, 1,
sizeof(norm_aspect_ratio), fp) !=
221 sizeof(norm_aspect_ratio)) {
228 char_samp->label32_ = label32;
229 char_samp->page_ = page;
230 char_samp->left_ = left;
231 char_samp->top_ = top;
232 char_samp->first_char_ = first_char;
233 char_samp->last_char_ = last_char;
234 char_samp->norm_top_ = norm_top;
235 char_samp->norm_bottom_ = norm_bottom;
236 char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
249 if (scaled_samp->
ScaleFrom(
this, isotropic) ==
false) {
253 scaled_samp->left_ = left_;
254 scaled_samp->top_ = top_;
255 scaled_samp->page_ = page_;
257 scaled_samp->first_char_ = first_char_;
258 scaled_samp->last_char_ = last_char_;
259 scaled_samp->norm_top_ = norm_top_;
260 scaled_samp->norm_bottom_ = norm_bottom_;
261 scaled_samp->norm_aspect_ratio_ = norm_aspect_ratio_;
267 unsigned char *data) {
282 if (fwrite(&val32, 1,
sizeof(val32), fp) !=
sizeof(val32)) {
286 val32 = (label32_ == NULL) ? 0 :
LabelLen(label32_);
287 if (fwrite(&val32, 1,
sizeof(val32), fp) !=
sizeof(val32)) {
291 if (label32_ != NULL) {
292 if (fwrite(label32_, 1, val32 *
sizeof(*label32_), fp) !=
293 (val32 *
sizeof(*label32_))) {
298 if (fwrite(&page_, 1,
sizeof(page_), fp) !=
sizeof(page_)) {
301 if (fwrite(&left_, 1,
sizeof(left_), fp) !=
sizeof(left_)) {
304 if (fwrite(&top_, 1,
sizeof(top_), fp) !=
sizeof(top_)) {
307 if (fwrite(&first_char_, 1,
sizeof(first_char_), fp) !=
308 sizeof(first_char_)) {
311 if (fwrite(&last_char_, 1,
sizeof(last_char_), fp) !=
sizeof(last_char_)) {
314 if (fwrite(&norm_top_, 1,
sizeof(norm_top_), fp) !=
sizeof(norm_top_)) {
317 if (fwrite(&norm_bottom_, 1,
sizeof(norm_bottom_), fp) !=
318 sizeof(norm_bottom_)) {
321 if (fwrite(&norm_aspect_ratio_, 1,
sizeof(norm_aspect_ratio_), fp) !=
322 sizeof(norm_aspect_ratio_)) {
340 int cropped_left = 0;
342 int cropped_wid =
wid_;
343 int cropped_hgt =
hgt_;
345 &cropped_wid, &cropped_hgt);
347 if (cropped_wid == 0 || cropped_hgt == 0) {
353 cropped_wid, cropped_hgt);
361 cropped_wid / (cropped_wid + cropped_hgt));
366 Copy(cropped_left, cropped_top, cropped_wid, cropped_hgt, cropped_samp);
373 int max_hist_wnd,
int min_con_comp_size)
const {
380 if (concomp_cnt <= 0 || !concomp_array) {
382 delete []concomp_array;
387 for (
int concomp = 0; concomp < concomp_cnt; concomp++) {
388 int concomp_seg_cnt = 0;
390 ConComp **concomp_seg_array = NULL;
392 concomp_array[concomp]->
Segment(max_hist_wnd, &concomp_seg_cnt);
394 if (concomp_alloc_seg == NULL) {
396 concomp_seg_array = concomp_array + concomp;
399 concomp_seg_array = concomp_alloc_seg;
400 delete concomp_array[concomp];
403 for (
int seg_idx = 0; seg_idx < concomp_seg_cnt; seg_idx++) {
405 if (concomp_seg_array[seg_idx]->
Width() < 2 &&
406 concomp_seg_array[seg_idx]->
Height() < 2) {
407 delete concomp_seg_array[seg_idx];
415 memcpy(temp_segm_array, seg_array, seg_cnt *
sizeof(*seg_array));
418 seg_array = temp_segm_array;
420 seg_array[seg_cnt++] = concomp_seg_array[seg_idx];
423 if (concomp_alloc_seg != NULL) {
424 delete []concomp_alloc_seg;
427 delete []concomp_array;
430 if (seg_cnt > 0 && seg_array != NULL) {
431 qsort(seg_array, seg_cnt,
sizeof(*seg_array), right_2_left ?
434 (*segment_cnt) = seg_cnt;
440 int seg_flags_size,
int *seg_flags,
441 bool *left_most,
bool *right_most,
446 end_concomp = strt_concomp + seg_flags_size;
451 for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
452 if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
454 min_id = concomp_array[concomp]->
ID();
455 max_id = concomp_array[concomp]->
ID();
458 UpdateRange(concomp_array[concomp]->ID(), &min_id, &max_id);
463 if (concomp_cnt < 1 || !once || min_id == -1 || max_id == -1) {
467 int id_cnt = max_id - min_id + 1;
468 bool *id_exist =
new bool[id_cnt];
469 bool *left_most_exist =
new bool[id_cnt];
470 bool *right_most_exist =
new bool[id_cnt];
471 memset(id_exist, 0, id_cnt *
sizeof(*id_exist));
472 memset(left_most_exist, 0, id_cnt *
sizeof(*left_most_exist));
473 memset(right_most_exist, 0, id_cnt *
sizeof(*right_most_exist));
481 int unq_left_most = 0;
482 int unq_right_most = 0;
483 for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
484 if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
486 left = concomp_array[concomp]->
Left();
487 right = concomp_array[concomp]->
Right();
488 top = concomp_array[concomp]->
Top();
489 bottom = concomp_array[concomp]->
Bottom();
493 concomp_array[concomp]->
Right(), &left, &right);
495 concomp_array[concomp]->
Bottom(), &top, &bottom);
498 int concomp_id = concomp_array[concomp]->
ID() - min_id;
499 if (!id_exist[concomp_id]) {
500 id_exist[concomp_id] =
true;
503 if (concomp_array[concomp]->LeftMost()) {
504 if (left_most_exist[concomp_id] ==
false) {
505 left_most_exist[concomp_id] =
true;
509 if (concomp_array[concomp]->RightMost()) {
510 if (right_most_exist[concomp_id] ==
false) {
511 right_most_exist[concomp_id] =
true;
518 delete []left_most_exist;
519 delete []right_most_exist;
520 if (!once || left == -1 || top == -1 || right == -1 || bottom == -1) {
523 (*left_most) = (unq_left_most >= unq_ids);
524 (*right_most) = (unq_right_most >= unq_ids);
526 CharSamp *samp =
new CharSamp(left, top, right - left + 1, bottom - top + 1);
529 for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
530 if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
533 samp->
line_buff_[pt_ptr->
y() - top][pt_ptr->
x() - left] = 0;
534 pt_ptr = pt_ptr->
Next();
560 unsigned char *raw_data = *raw_data_ptr;
563 memcpy(&val32, raw_data,
sizeof(val32));
564 raw_data +=
sizeof(val32);
565 if (val32 != 0xabd0fefe) {
569 memcpy(&val32, raw_data,
sizeof(val32));
570 raw_data +=
sizeof(val32);
573 label32 =
new char_32[val32 + 1];
575 memcpy(label32, raw_data, val32 *
sizeof(*label32));
576 raw_data += (val32 *
sizeof(*label32));
587 char_samp->label32_ = label32;
588 memcpy(&char_samp->page_, raw_data,
sizeof(char_samp->page_));
589 raw_data +=
sizeof(char_samp->page_);
590 memcpy(&char_samp->left_, raw_data,
sizeof(char_samp->left_));
591 raw_data +=
sizeof(char_samp->left_);
592 memcpy(&char_samp->top_, raw_data,
sizeof(char_samp->top_));
593 raw_data +=
sizeof(char_samp->top_);
594 memcpy(&char_samp->first_char_, raw_data,
sizeof(char_samp->first_char_));
595 raw_data +=
sizeof(char_samp->first_char_);
596 memcpy(&char_samp->last_char_, raw_data,
sizeof(char_samp->last_char_));
597 raw_data +=
sizeof(char_samp->last_char_);
598 memcpy(&char_samp->norm_top_, raw_data,
sizeof(char_samp->norm_top_));
599 raw_data +=
sizeof(char_samp->norm_top_);
600 memcpy(&char_samp->norm_bottom_, raw_data,
sizeof(char_samp->norm_bottom_));
601 raw_data +=
sizeof(char_samp->norm_bottom_);
602 memcpy(&char_samp->norm_aspect_ratio_, raw_data,
603 sizeof(char_samp->norm_aspect_ratio_));
604 raw_data +=
sizeof(char_samp->norm_aspect_ratio_);
612 (*raw_data_ptr) = raw_data;
619 CharSamp *scaled_bmp =
Scale(conv_grid_size, conv_grid_size);
624 unsigned char *buff = scaled_bmp->
RawData();
627 int bmp_size = conv_grid_size * conv_grid_size;
628 for (input = 0; input < bmp_size; input++) {
629 features[input] = 255.0f - (1.0f * buff[input]);
unsigned short LastChar() const
bool ScaleFrom(Bmp8 *bmp, bool isotropic=true)
static int Right2LeftComparer(const void *comp1, const void *comp2)
unsigned short Height() const
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
void Copy(int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const
CharSamp * Scale(int wid, int hgt, bool isotropic=true)
unsigned short NormAspectRatio() const
void SetFirstChar(unsigned short first_char)
void SetNormAspectRatio(unsigned short norm_aspect_ratio)
bool Save2CharDumpFile(FILE *fp) const
static CharSamp * FromCharDumpFile(CachedFile *fp)
unsigned short Width() const
static const int kConCompAllocChunk
bool ComputeFeatures(int conv_grid_size, float *features)
void Crop(int *xst_src, int *yst_src, int *wid, int *hgt)
ConComp ** Segment(int max_hist_wnd, int *concomp_cnt)
unsigned short NormTop() const
unsigned short Right() const
void SetLabel(char_32 label)
unsigned short Top() const
static void UTF32ToUTF8(const char_32 *utf32_str, string *str)
ConComp ** Segment(int *seg_cnt, bool right_2_left, int max_hist_wnd, int min_con_comp_size) const
void SetNormBottom(unsigned short norm_bottom)
string stringLabel() const
unsigned short FirstChar() const
static CharSamp * FromRawData(int left, int top, int wid, int hgt, unsigned char *data)
int Read(void *read_buff, int bytes)
unsigned char * RawData() const
unsigned short NormBottom() const
bool LoadFromCharDumpFile(CachedFile *fp)
void SetNormTop(unsigned short norm_top)
unsigned char ** line_buff_
basic_string< char_32 > string_32
bool LoadFromRawData(unsigned char *data)
unsigned short Left() const
static CharSamp * FromConComps(ConComp **concomp_array, int strt_concomp, int seg_flags_size, int *seg_flags, bool *left_most, bool *right_most, int word_hgt)
static int Left2RightComparer(const void *comp1, const void *comp2)
void SetLastChar(unsigned short last_char)
ConComp ** FindConComps(int *concomp_cnt, int min_size) const
unsigned short Bottom() const
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32)
bool SaveBmp2CharDumpFile(FILE *fp) const