tesseract  3.05.02
fontinfo.h
Go to the documentation of this file.
1 // File: fontinfo.h
3 // Description: Font information classes abstracted from intproto.h/cpp.
4 // Author: rays@google.com (Ray Smith)
5 // Created: Tue May 17 17:08:01 PDT 2011
6 //
7 // (C) Copyright 2011, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 
21 #ifndef TESSERACT_CCSTRUCT_FONTINFO_H_
22 #define TESSERACT_CCSTRUCT_FONTINFO_H_
23 
24 #include "genericvector.h"
25 #include "host.h"
26 #include "unichar.h"
27 
28 #include <stdint.h>
29 
30 template <typename T> class UnicityTable;
31 
32 namespace tesseract {
33 
34 class BitVector;
35 
36 // Simple struct to hold a font and a score. The scores come from the low-level
37 // integer matcher, so they are in the uinT16 range. Fonts are an index to
38 // fontinfo_table.
39 // These get copied around a lot, so best to keep them small.
40 struct ScoredFont {
41  ScoredFont() : fontinfo_id(-1), score(0) {}
42  ScoredFont(int font_id, uinT16 classifier_score)
43  : fontinfo_id(font_id), score(classifier_score) {}
44 
45  // Index into fontinfo table, but inside the classifier, may be a shapetable
46  // index.
48  // Raw score from the low-level classifier.
50 };
51 
52 // Struct for information about spacing between characters in a particular font.
58 };
59 
60 /*
61  * font_properties contains properties about boldness, italicness, fixed pitch,
62  * serif, fraktur
63  */
64 struct FontInfo {
65  FontInfo() : name(NULL), properties(0), universal_id(0), spacing_vec(NULL) {}
66  ~FontInfo() {}
67 
68  // Writes to the given file. Returns false in case of error.
69  bool Serialize(FILE* fp) const;
70  // Reads from the given file. Returns false in case of error.
71  // If swap is true, assumes a big/little-endian swap is needed.
72  bool DeSerialize(bool swap, FILE* fp);
73 
74  // Reserves unicharset_size spots in spacing_vec.
75  void init_spacing(int unicharset_size) {
77  spacing_vec->init_to_size(unicharset_size, NULL);
78  }
79  // Adds the given pointer to FontSpacingInfo to spacing_vec member
80  // (FontInfo class takes ownership of the pointer).
81  // Note: init_spacing should be called before calling this function.
82  void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info) {
83  ASSERT_HOST(spacing_vec != NULL && spacing_vec->size() > uch_id);
84  (*spacing_vec)[uch_id] = spacing_info;
85  }
86 
87  // Returns the pointer to FontSpacingInfo for the given UNICHAR_ID.
88  const FontSpacingInfo *get_spacing(UNICHAR_ID uch_id) const {
89  return (spacing_vec == NULL || spacing_vec->size() <= uch_id) ?
90  NULL : (*spacing_vec)[uch_id];
91  }
92 
93  // Fills spacing with the value of the x gap expected between the two given
94  // UNICHAR_IDs. Returns true on success.
95  bool get_spacing(UNICHAR_ID prev_uch_id,
96  UNICHAR_ID uch_id,
97  int *spacing) const {
98  const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id);
99  const FontSpacingInfo *fsi = this->get_spacing(uch_id);
100  if (prev_fsi == NULL || fsi == NULL) return false;
101  int i = 0;
102  for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) {
103  if (prev_fsi->kerned_unichar_ids[i] == uch_id) break;
104  }
105  if (i < prev_fsi->kerned_unichar_ids.size()) {
106  *spacing = prev_fsi->kerned_x_gaps[i];
107  } else {
108  *spacing = prev_fsi->x_gap_after + fsi->x_gap_before;
109  }
110  return true;
111  }
112 
113  bool is_italic() const { return properties & 1; }
114  bool is_bold() const { return (properties & 2) != 0; }
115  bool is_fixed_pitch() const { return (properties & 4) != 0; }
116  bool is_serif() const { return (properties & 8) != 0; }
117  bool is_fraktur() const { return (properties & 16) != 0; }
118 
119  char* name;
121  // The universal_id is a field reserved for the initialization process
122  // to assign a unique id number to all fonts loaded for the current
123  // combination of languages. This id will then be returned by
124  // ResultIterator::WordFontAttributes.
126  // Horizontal spacing between characters (indexed by UNICHAR_ID).
128 };
129 
130 // Every class (character) owns a FontSet that represents all the fonts that can
131 // render this character.
132 // Since almost all the characters from the same script share the same set of
133 // fonts, the sets are shared over multiple classes (see
134 // Classify::fontset_table_). Thus, a class only store an id to a set.
135 // Because some fonts cannot render just one character of a set, there are a
136 // lot of FontSet that differ only by one font. Rather than storing directly
137 // the FontInfo in the FontSet structure, it's better to share FontInfos among
138 // FontSets (Classify::fontinfo_table_).
139 struct FontSet {
140  int32_t size;
141  int32_t* configs; // FontInfo ids
142 };
143 
144 // Class that adds a bit of functionality on top of GenericVector to
145 // implement a table of FontInfo that replaces UniCityTable<FontInfo>.
146 // TODO(rays) change all references once all existing traineddata files
147 // are replaced.
148 class FontInfoTable : public GenericVector<FontInfo> {
149  public:
150  FontInfoTable();
151  ~FontInfoTable();
152 
153  // Writes to the given file. Returns false in case of error.
154  bool Serialize(FILE* fp) const;
155  // Reads from the given file. Returns false in case of error.
156  // If swap is true, assumes a big/little-endian swap is needed.
157  bool DeSerialize(bool swap, FILE* fp);
158 
159  // Returns true if the given set of fonts includes one with the same
160  // properties as font_id.
162  int font_id, const GenericVector<ScoredFont>& font_set) const;
163  // Returns true if the given set of fonts includes multiple properties.
165  const GenericVector<ScoredFont>& font_set) const;
166 
167  // Moves any non-empty FontSpacingInfo entries from other to this.
168  void MoveSpacingInfoFrom(FontInfoTable* other);
169  // Moves this to the target unicity table.
170  void MoveTo(UnicityTable<FontInfo>* target);
171 };
172 
173 // Compare FontInfo structures.
174 bool CompareFontInfo(const FontInfo& fi1, const FontInfo& fi2);
175 // Compare FontSet structures.
176 bool CompareFontSet(const FontSet& fs1, const FontSet& fs2);
177 // Deletion callbacks for GenericVector.
180 
181 // Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
182 bool read_info(FILE* f, FontInfo* fi, bool swap);
183 bool write_info(FILE* f, const FontInfo& fi);
184 bool read_spacing_info(FILE *f, FontInfo* fi, bool swap);
185 bool write_spacing_info(FILE* f, const FontInfo& fi);
186 bool read_set(FILE* f, FontSet* fs, bool swap);
187 bool write_set(FILE* f, const FontSet& fs);
188 
189 } // namespace tesseract.
190 
191 #endif /* THIRD_PARTY_TESSERACT_CCSTRUCT_FONTINFO_H_ */
int32_t * configs
Definition: fontinfo.h:141
bool is_italic() const
Definition: fontinfo.h:113
void init_spacing(int unicharset_size)
Definition: fontinfo.h:75
bool read_info(FILE *f, FontInfo *fi, bool swap)
Definition: fontinfo.cpp:152
short inT16
Definition: host.h:33
GenericVector< UNICHAR_ID > kerned_unichar_ids
Definition: fontinfo.h:56
void swap(int index1, int index2)
void FontInfoDeleteCallback(FontInfo f)
Definition: fontinfo.cpp:139
bool Serialize(FILE *fp) const
Definition: fontinfo.cpp:49
void FontSetDeleteCallback(FontSet fs)
Definition: fontinfo.cpp:146
bool get_spacing(UNICHAR_ID prev_uch_id, UNICHAR_ID uch_id, int *spacing) const
Definition: fontinfo.h:95
bool write_spacing_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:211
bool write_set(FILE *f, const FontSet &fs)
Definition: fontinfo.cpp:253
void MoveSpacingInfoFrom(FontInfoTable *other)
Definition: fontinfo.cpp:85
void MoveTo(UnicityTable< FontInfo > *target)
Definition: fontinfo.cpp:106
bool CompareFontSet(const FontSet &fs1, const FontSet &fs2)
Definition: fontinfo.cpp:128
unsigned short uinT16
Definition: host.h:34
bool is_fixed_pitch() const
Definition: fontinfo.h:115
GenericVector< FontSpacingInfo * > * spacing_vec
Definition: fontinfo.h:127
bool DeSerialize(bool swap, FILE *fp)
Definition: fontinfo.cpp:34
int inT32
Definition: host.h:35
int size() const
Definition: genericvector.h:72
bool SetContainsMultipleFontProperties(const GenericVector< ScoredFont > &font_set) const
Definition: fontinfo.cpp:72
ScoredFont(int font_id, uinT16 classifier_score)
Definition: fontinfo.h:42
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
Definition: fontinfo.cpp:120
bool DeSerialize(bool swap, FILE *fp)
Definition: fontinfo.cpp:54
unsigned int uinT32
Definition: host.h:36
GenericVector< inT16 > kerned_x_gaps
Definition: fontinfo.h:57
bool SetContainsFontProperties(int font_id, const GenericVector< ScoredFont > &font_set) const
Definition: fontinfo.cpp:61
bool Serialize(FILE *fp) const
Definition: fontinfo.cpp:27
bool is_bold() const
Definition: fontinfo.h:114
bool is_serif() const
Definition: fontinfo.h:116
bool read_spacing_info(FILE *f, FontInfo *fi, bool swap)
Definition: fontinfo.cpp:177
#define ASSERT_HOST(x)
Definition: errcode.h:84
const FontSpacingInfo * get_spacing(UNICHAR_ID uch_id) const
Definition: fontinfo.h:88
void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info)
Definition: fontinfo.h:82
bool write_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:168
bool read_set(FILE *f, FontSet *fs, bool swap)
Definition: fontinfo.cpp:240
bool is_fraktur() const
Definition: fontinfo.h:117
int UNICHAR_ID
Definition: unichar.h:33