tesseract  3.05.02
shapeclassifier.cpp
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
4 // File: shapeclassifier.h
5 // Description: Base interface class for classifiers that return a
6 // shape index.
7 // Author: Ray Smith
8 // Created: Thu Dec 15 15:24:27 PST 2011
9 //
10 // (C) Copyright 2011, Google Inc.
11 // Licensed under the Apache License, Version 2.0 (the "License");
12 // you may not use this file except in compliance with the License.
13 // You may obtain a copy of the License at
14 // http://www.apache.org/licenses/LICENSE-2.0
15 // Unless required by applicable law or agreed to in writing, software
16 // distributed under the License is distributed on an "AS IS" BASIS,
17 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 // See the License for the specific language governing permissions and
19 // limitations under the License.
20 //
22 
23 #ifdef HAVE_CONFIG_H
24 #include "config_auto.h"
25 #endif
26 
27 #include "shapeclassifier.h"
28 #include "genericvector.h"
29 #include "scrollview.h"
30 #include "shapetable.h"
31 #include "svmnode.h"
32 #include "trainingsample.h"
33 #include "tprintf.h"
34 
35 namespace tesseract {
36 
37 // Classifies the given [training] sample, writing to results.
38 // See shapeclassifier.h for a full description.
39 // Default implementation calls the ShapeRating version.
41  const TrainingSample& sample, Pix* page_pix, int debug,
42  UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
43  results->truncate(0);
44  GenericVector<ShapeRating> shape_results;
45  int num_shape_results = ClassifySample(sample, page_pix, debug, keep_this,
46  &shape_results);
47  const ShapeTable* shapes = GetShapeTable();
48  GenericVector<int> unichar_map;
49  unichar_map.init_to_size(shapes->unicharset().size(), -1);
50  for (int r = 0; r < num_shape_results; ++r) {
51  shapes->AddShapeToResults(shape_results[r], &unichar_map, results);
52  }
53  return results->size();
54 }
55 
56 // Classifies the given [training] sample, writing to results.
57 // See shapeclassifier.h for a full description.
58 // Default implementation aborts.
60  int debug, int keep_this,
61  GenericVector<ShapeRating>* results) {
62  ASSERT_HOST("Must implement ClassifySample!" == NULL);
63  return 0;
64 }
65 
66 // Returns the shape that contains unichar_id that has the best result.
67 // If result is not NULL, it is set with the shape_id and rating.
68 // Does not need to be overridden if ClassifySample respects the keep_this
69 // rule.
71  Pix* page_pix, UNICHAR_ID unichar_id,
72  ShapeRating* result) {
74  const ShapeTable* shapes = GetShapeTable();
75  int num_results = ClassifySample(sample, page_pix, 0, unichar_id, &results);
76  for (int r = 0; r < num_results; ++r) {
77  if (shapes->GetShape(results[r].shape_id).ContainsUnichar(unichar_id)) {
78  if (result != NULL)
79  *result = results[r];
80  return results[r].shape_id;
81  }
82  }
83  return -1;
84 }
85 
86 // Provides access to the UNICHARSET that this classifier works with.
87 // Only needs to be overridden if GetShapeTable() can return NULL.
89  return GetShapeTable()->unicharset();
90 }
91 
92 // Visual debugger classifies the given sample, displays the results and
93 // solicits user input to display other classifications. Returns when
94 // the user has finished with debugging the sample.
95 // Probably doesn't need to be overridden if the subclass provides
96 // DisplayClassifyAs.
98  Pix* page_pix,
99  UNICHAR_ID unichar_id) {
100 #ifndef GRAPHICS_DISABLED
101  static ScrollView* terminator = NULL;
102  if (terminator == NULL) {
103  terminator = new ScrollView("XIT", 0, 0, 50, 50, 50, 50, true);
104  }
105  ScrollView* debug_win = CreateFeatureSpaceWindow("ClassifierDebug", 0, 0);
106  // Provide a right-click menu to choose the class.
107  SVMenuNode* popup_menu = new SVMenuNode();
108  popup_menu->AddChild("Choose class to debug", 0, "x", "Class to debug");
109  popup_menu->BuildMenu(debug_win, false);
110  // Display the features in green.
111  const INT_FEATURE_STRUCT* features = sample.features();
112  int num_features = sample.num_features();
113  for (int f = 0; f < num_features; ++f) {
114  RenderIntFeature(debug_win, &features[f], ScrollView::GREEN);
115  }
116  debug_win->Update();
118  // Debug classification until the user quits.
119  const UNICHARSET& unicharset = GetUnicharset();
120  SVEvent* ev;
121  SVEventType ev_type;
122  do {
124  if (unichar_id >= 0) {
125  tprintf("Debugging class %d = %s\n",
126  unichar_id, unicharset.id_to_unichar(unichar_id));
127  UnicharClassifySample(sample, page_pix, 1, unichar_id, &results);
128  DisplayClassifyAs(sample, page_pix, unichar_id, 1, &windows);
129  } else {
130  tprintf("Invalid unichar_id: %d\n", unichar_id);
131  UnicharClassifySample(sample, page_pix, 1, -1, &results);
132  }
133  if (unichar_id >= 0) {
134  tprintf("Debugged class %d = %s\n",
135  unichar_id, unicharset.id_to_unichar(unichar_id));
136  }
137  tprintf("Right-click in ClassifierDebug window to choose debug class,");
138  tprintf(" Left-click or close window to quit...\n");
139  UNICHAR_ID old_unichar_id;
140  do {
141  old_unichar_id = unichar_id;
142  ev = debug_win->AwaitEvent(SVET_ANY);
143  ev_type = ev->type;
144  if (ev_type == SVET_POPUP) {
145  if (unicharset.contains_unichar(ev->parameter)) {
146  unichar_id = unicharset.unichar_to_id(ev->parameter);
147  } else {
148  tprintf("Char class '%s' not found in unicharset", ev->parameter);
149  }
150  }
151  delete ev;
152  } while (unichar_id == old_unichar_id &&
153  ev_type != SVET_CLICK && ev_type != SVET_DESTROY);
154  } while (ev_type != SVET_CLICK && ev_type != SVET_DESTROY);
155  delete debug_win;
156 #endif // GRAPHICS_DISABLED
157 }
158 
159 // Displays classification as the given shape_id. Creates as many windows
160 // as it feels fit, using index as a guide for placement. Adds any created
161 // windows to the windows output and returns a new index that may be used
162 // by any subsequent classifiers. Caller waits for the user to view and
163 // then destroys the windows by clearing the vector.
165  const TrainingSample& sample, Pix* page_pix,
166  UNICHAR_ID unichar_id, int index,
167  PointerVector<ScrollView>* windows) {
168  // Does nothing in the default implementation.
169  return index;
170 }
171 
172 // Prints debug information on the results.
174  const char* context, const GenericVector<UnicharRating>& results) const {
175  tprintf("%s\n", context);
176  for (int i = 0; i < results.size(); ++i) {
177  tprintf("%g: c_id=%d=%s", results[i].rating, results[i].unichar_id,
178  GetUnicharset().id_to_unichar(results[i].unichar_id));
179  if (!results[i].fonts.empty()) {
180  tprintf(" Font Vector:");
181  for (int f = 0; f < results[i].fonts.size(); ++f) {
182  tprintf(" %d", results[i].fonts[f].fontinfo_id);
183  }
184  }
185  tprintf("\n");
186  }
187 }
189  const char* context, const GenericVector<ShapeRating>& results) const {
190  tprintf("%s\n", context);
191  for (int i = 0; i < results.size(); ++i) {
192  tprintf("%g:", results[i].rating);
193  if (results[i].joined)
194  tprintf("[J]");
195  if (results[i].broken)
196  tprintf("[B]");
197  tprintf(" %s\n", GetShapeTable()->DebugStr(results[i].shape_id).string());
198  }
199 }
200 
201 // Removes any result that has all its unichars covered by a better choice,
202 // regardless of font.
204  GenericVector<ShapeRating>* results) const {
205  GenericVector<ShapeRating> filtered_results;
206  // Copy results to filtered results and knock out duplicate unichars.
207  const ShapeTable* shapes = GetShapeTable();
208  for (int r = 0; r < results->size(); ++r) {
209  if (r > 0) {
210  const Shape& shape_r = shapes->GetShape((*results)[r].shape_id);
211  int c;
212  for (c = 0; c < shape_r.size(); ++c) {
213  int unichar_id = shape_r[c].unichar_id;
214  int s;
215  for (s = 0; s < r; ++s) {
216  const Shape& shape_s = shapes->GetShape((*results)[s].shape_id);
217  if (shape_s.ContainsUnichar(unichar_id))
218  break; // We found unichar_id.
219  }
220  if (s == r)
221  break; // We didn't find unichar_id.
222  }
223  if (c == shape_r.size())
224  continue; // We found all the unichar ids in previous answers.
225  }
226  filtered_results.push_back((*results)[r]);
227  }
228  *results = filtered_results;
229 }
230 
231 } // namespace tesseract.
232 
233 
234 
235 
236 
virtual int BestShapeForUnichar(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id, ShapeRating *result)
ScrollView * CreateFeatureSpaceWindow(const char *name, int xpos, int ypos)
Definition: intproto.cpp:1920
SVEventType
Definition: scrollview.h:45
void AddShapeToResults(const ShapeRating &shape_rating, GenericVector< int > *unichar_map, GenericVector< UnicharRating > *results) const
Definition: shapetable.cpp:697
SVEventType type
Definition: scrollview.h:64
virtual int ClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< ShapeRating > *results)
bool TESS_API contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
static void Update()
Definition: scrollview.cpp:715
void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT *Feature, ScrollView::Color color)
Definition: intproto.cpp:1755
virtual int DisplayClassifyAs(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id, int index, PointerVector< ScrollView > *windows)
int size() const
Definition: unicharset.h:297
virtual void UnicharPrintResults(const char *context, const GenericVector< UnicharRating > &results) const
bool ContainsUnichar(int unichar_id) const
Definition: shapetable.cpp:156
virtual void PrintResults(const char *context, const GenericVector< ShapeRating > &results) const
const UNICHARSET & unicharset() const
Definition: shapetable.h:281
int push_back(T object)
virtual void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id)
virtual const ShapeTable * GetShapeTable() const =0
char * parameter
Definition: scrollview.h:71
SVEvent * AwaitEvent(SVEventType type)
Definition: scrollview.cpp:449
void truncate(int size)
void BuildMenu(ScrollView *sv, bool menu_bar=true)
Definition: svmnode.cpp:121
UNICHAR_ID TESS_API unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
#define tprintf(...)
Definition: tprintf.h:31
int size() const
Definition: genericvector.h:72
int size() const
Definition: shapetable.h:202
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:323
SVMenuNode * AddChild(const char *txt)
Definition: svmnode.cpp:59
Definition: cluster.h:32
void FilterDuplicateUnichars(GenericVector< ShapeRating > *results) const
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)
bool empty() const
Definition: genericvector.h:84
void init_to_size(int size, T t)
#define ASSERT_HOST(x)
Definition: errcode.h:84
virtual const UNICHARSET & GetUnicharset() const
int UNICHAR_ID
Definition: unichar.h:33