tesseract  3.05.02
char_samp.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: char_samp.h
3  * Description: Declaration of a Character Bitmap Sample Class
4  * Author: Ahmad Abdulkader
5  * Created: 2007
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // The CharSamp inherits the Bmp8 class that represents images of
21 // words, characters and segments throughout Cube
22 // CharSamp adds more data members to hold the physical location of the image
23 // in a page, page number in a book if available.
24 // It also holds the label (GT) of the image that might correspond to a single
25 // character or a word
26 // It also provides methods for segmenting, scaling and cropping of the sample
27 
28 #ifndef CHAR_SAMP_H
29 #define CHAR_SAMP_H
30 
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <string>
34 #include "bmp_8.h"
35 #include "string_32.h"
36 
37 namespace tesseract {
38 
39 class CharSamp : public Bmp8 {
40  public:
41  CharSamp();
42  CharSamp(int wid, int hgt);
43  CharSamp(int left, int top, int wid, int hgt);
44  ~CharSamp();
45  // accessor methods
46  unsigned short Left() const { return left_; }
47  unsigned short Right() const { return left_ + wid_; }
48  unsigned short Top() const { return top_; }
49  unsigned short Bottom() const { return top_ + hgt_; }
50  unsigned short Page() const { return page_; }
51  unsigned short NormTop() const { return norm_top_; }
52  unsigned short NormBottom() const { return norm_bottom_; }
53  unsigned short NormAspectRatio() const { return norm_aspect_ratio_; }
54  unsigned short FirstChar() const { return first_char_; }
55  unsigned short LastChar() const { return last_char_; }
56  char_32 Label() const {
57  if (label32_ == NULL || LabelLen() != 1) {
58  return 0;
59  }
60  return label32_[0];
61  }
62  char_32 * StrLabel() const { return label32_; }
63  string stringLabel() const;
64 
65  void SetLeft(unsigned short left) { left_ = left; }
66  void SetTop(unsigned short top) { top_ = top; }
67  void SetPage(unsigned short page) { page_ = page; }
68  void SetLabel(char_32 label) {
69  delete []label32_;
70  label32_ = new char_32[2];
71  label32_[0] = label;
72  label32_[1] = 0;
73  }
74  void SetLabel(const char_32 *label32) {
75  delete []label32_;
76  label32_ = NULL;
77  if (label32 != NULL) {
78  // remove any byte order marks if any
79  if (label32[0] == 0xfeff) {
80  label32++;
81  }
82  int len = LabelLen(label32);
83  label32_ = new char_32[len + 1];
84  memcpy(label32_, label32, len * sizeof(*label32));
85  label32_[len] = 0;
86  }
87  }
88  void SetLabel(string str);
89  void SetNormTop(unsigned short norm_top) { norm_top_ = norm_top; }
90  void SetNormBottom(unsigned short norm_bottom) {
91  norm_bottom_ = norm_bottom;
92  }
93  void SetNormAspectRatio(unsigned short norm_aspect_ratio) {
94  norm_aspect_ratio_ = norm_aspect_ratio;
95  }
96  void SetFirstChar(unsigned short first_char) {
97  first_char_ = first_char;
98  }
99  void SetLastChar(unsigned short last_char) {
100  last_char_ = last_char;
101  }
102 
103  // Saves the charsamp to a dump file
104  bool Save2CharDumpFile(FILE *fp) const;
105  // Crops the underlying image and returns a new CharSamp with the
106  // same character information but new dimensions. Warning: does not
107  // necessarily set the normalized top and bottom correctly since
108  // those depend on its location within the word (or CubeSearchObject).
109  CharSamp *Crop();
110  // Computes the connected components of the char sample
111  ConComp **Segment(int *seg_cnt, bool right_2_left, int max_hist_wnd,
112  int min_con_comp_size) const;
113  // returns a copy of the charsamp that is scaled to the
114  // specified width and height
115  CharSamp *Scale(int wid, int hgt, bool isotropic = true);
116  // returns a Clone of the charsample
117  CharSamp *Clone() const;
118  // computes the features corresponding to the char sample
119  bool ComputeFeatures(int conv_grid_size, float *features);
120  // Load a Char Samp from a dump file
121  static CharSamp *FromCharDumpFile(CachedFile *fp);
122  static CharSamp *FromCharDumpFile(FILE *fp);
123  static CharSamp *FromCharDumpFile(unsigned char **raw_data);
124  static CharSamp *FromRawData(int left, int top, int wid, int hgt,
125  unsigned char *data);
126  static CharSamp *FromConComps(ConComp **concomp_array,
127  int strt_concomp, int seg_flags_size,
128  int *seg_flags, bool *left_most,
129  bool *right_most, int word_hgt);
130  static int AuxFeatureCnt() { return (5); }
131  // Return the length of the label string
132  int LabelLen() const { return LabelLen(label32_); }
133  static int LabelLen(const char_32 *label32) {
134  if (label32 == NULL) {
135  return 0;
136  }
137  int len = 0;
138  while (label32[++len] != 0);
139  return len;
140  }
141  private:
142  char_32 * label32_;
143  unsigned short page_;
144  unsigned short left_;
145  unsigned short top_;
146  // top of sample normalized to a word height of 255
147  unsigned short norm_top_;
148  // bottom of sample normalized to a word height of 255
149  unsigned short norm_bottom_;
150  // 255 * ratio of character width to (width + height)
151  unsigned short norm_aspect_ratio_;
152  unsigned short first_char_;
153  unsigned short last_char_;
154 };
155 
156 }
157 
158 #endif // CHAR_SAMP_H
unsigned short LastChar() const
Definition: char_samp.h:55
static int LabelLen(const char_32 *label32)
Definition: char_samp.h:133
void SetPage(unsigned short page)
Definition: char_samp.h:67
char_32 * StrLabel() const
Definition: char_samp.h:62
CharSamp * Clone() const
Definition: char_samp.cpp:542
void SetTop(unsigned short top)
Definition: char_samp.h:66
CharSamp * Scale(int wid, int hgt, bool isotropic=true)
Definition: char_samp.cpp:247
unsigned short NormAspectRatio() const
Definition: char_samp.h:53
void SetFirstChar(unsigned short first_char)
Definition: char_samp.h:96
void SetNormAspectRatio(unsigned short norm_aspect_ratio)
Definition: char_samp.h:93
CharSamp * Crop()
Definition: char_samp.cpp:338
bool Save2CharDumpFile(FILE *fp) const
Definition: char_samp.cpp:278
static CharSamp * FromCharDumpFile(CachedFile *fp)
Definition: char_samp.cpp:82
int LabelLen() const
Definition: char_samp.h:132
bool ComputeFeatures(int conv_grid_size, float *features)
Definition: char_samp.cpp:617
unsigned short NormTop() const
Definition: char_samp.h:51
unsigned short Right() const
Definition: char_samp.h:47
void SetLabel(char_32 label)
Definition: char_samp.h:68
unsigned short Top() const
Definition: char_samp.h:48
ConComp ** Segment(int *seg_cnt, bool right_2_left, int max_hist_wnd, int min_con_comp_size) const
Definition: char_samp.cpp:372
void SetNormBottom(unsigned short norm_bottom)
Definition: char_samp.h:90
string stringLabel() const
Definition: char_samp.cpp:61
unsigned short FirstChar() const
Definition: char_samp.h:54
static CharSamp * FromRawData(int left, int top, int wid, int hgt, unsigned char *data)
Definition: char_samp.cpp:266
unsigned short wid_
Definition: bmp_8.h:95
unsigned short Page() const
Definition: char_samp.h:50
unsigned short NormBottom() const
Definition: char_samp.h:52
signed int char_32
Definition: string_32.h:40
unsigned short hgt_
Definition: bmp_8.h:96
void SetNormTop(unsigned short norm_top)
Definition: char_samp.h:89
unsigned short Left() const
Definition: char_samp.h:46
static CharSamp * FromConComps(ConComp **concomp_array, int strt_concomp, int seg_flags_size, int *seg_flags, bool *left_most, bool *right_most, int word_hgt)
Definition: char_samp.cpp:439
void SetLastChar(unsigned short last_char)
Definition: char_samp.h:99
static int AuxFeatureCnt()
Definition: char_samp.h:130
unsigned short Bottom() const
Definition: char_samp.h:49
char_32 Label() const
Definition: char_samp.h:56
void SetLeft(unsigned short left)
Definition: char_samp.h:65
void SetLabel(const char_32 *label32)
Definition: char_samp.h:74