tesseract  3.05.02
indexmapbidi.cpp
Go to the documentation of this file.
1 // File: indexmapbidi.cpp
3 // Description: Bi-directional mapping between a sparse and compact space.
4 // Author: rays@google.com (Ray Smith)
5 // Created: Tue Apr 06 11:33:59 PDT 2010
6 //
7 // (C) Copyright 2010, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #include "indexmapbidi.h"
21 
22 namespace tesseract {
23 
24 // SparseToCompact takes a sparse index to an index in the compact space.
25 // Uses a binary search to find the result. For faster speed use
26 // IndexMapBiDi, but that takes more memory.
27 int IndexMap::SparseToCompact(int sparse_index) const {
28  int result = compact_map_.binary_search(sparse_index);
29  return compact_map_[result] == sparse_index ? result : -1;
30 }
31 
32 // Copy from the input.
33 void IndexMap::CopyFrom(const IndexMap& src) {
36 }
37 void IndexMap::CopyFrom(const IndexMapBiDi& src) {
38  sparse_size_ = src.SparseSize();
40 }
41 
42 // Writes to the given file. Returns false in case of error.
43 bool IndexMap::Serialize(FILE* fp) const {
44  inT32 sparse_size = sparse_size_;
45  if (fwrite(&sparse_size, sizeof(sparse_size), 1, fp) != 1) return false;
46  if (!compact_map_.Serialize(fp)) return false;
47  return true;
48 }
49 
50 // Reads from the given file. Returns false in case of error.
51 // If swap is true, assumes a big/little-endian swap is needed.
52 bool IndexMap::DeSerialize(bool swap, FILE* fp) {
53  inT32 sparse_size;
54  if (fread(&sparse_size, sizeof(sparse_size), 1, fp) != 1) return false;
55  if (swap)
56  ReverseN(&sparse_size, sizeof(sparse_size));
57  sparse_size_ = sparse_size;
58  if (!compact_map_.DeSerialize(swap, fp)) return false;
59  return true;
60 }
61 
62 
63 // Top-level init function in a single call to initialize a map to select
64 // a single contiguous subrange [start, end) of the sparse space to be mapped
65 // 1 to 1 to the compact space, with all other elements of the sparse space
66 // left unmapped.
67 // No need to call Setup after this.
68 void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) {
69  Init(sparse_size, false);
70  for (int i = start; i < end; ++i)
71  SetMap(i, true);
72  Setup();
73 }
74 
75 // Initializes just the sparse_map_ to the given size with either all
76 // forward indices mapped (all_mapped = true) or none (all_mapped = false).
77 // Call Setup immediately after, or make calls to SetMap first to adjust the
78 // mapping and then call Setup before using the map.
79 void IndexMapBiDi::Init(int size, bool all_mapped) {
80  sparse_map_.init_to_size(size, -1);
81  if (all_mapped) {
82  for (int i = 0; i < size; ++i)
83  sparse_map_[i] = i;
84  }
85 }
86 
87 // Sets a given index in the sparse_map_ to be mapped or not.
88 void IndexMapBiDi::SetMap(int sparse_index, bool mapped) {
89  sparse_map_[sparse_index] = mapped ? 0 : -1;
90 }
91 
92 // Sets up the sparse_map_ and compact_map_ properly after Init and
93 // some calls to SetMap. Assumes an ordered 1-1 map from set indices
94 // in the forward map to the compact space.
96  int compact_size = 0;
97  for (int i = 0; i < sparse_map_.size(); ++i) {
98  if (sparse_map_[i] >= 0) {
99  sparse_map_[i] = compact_size++;
100  }
101  }
102  compact_map_.init_to_size(compact_size, -1);
103  for (int i = 0; i < sparse_map_.size(); ++i) {
104  if (sparse_map_[i] >= 0) {
105  compact_map_[sparse_map_[i]] = i;
106  }
107  }
108  sparse_size_ = sparse_map_.size();
109 }
110 
111 // Copy from the input.
113  sparse_map_ = src.sparse_map_;
115  sparse_size_ = sparse_map_.size();
116 }
117 
118 // Merges the two compact space indices. May be called many times, but
119 // the merges must be concluded by a call to CompleteMerges.
120 // Returns true if a merge was actually performed.
121 bool IndexMapBiDi::Merge(int compact_index1, int compact_index2) {
122  // Find the current master index for index1 and index2.
123  compact_index1 = MasterCompactIndex(compact_index1);
124  compact_index2 = MasterCompactIndex(compact_index2);
125  // Be sure that index1 < index2.
126  if (compact_index1 > compact_index2) {
127  int tmp = compact_index1;
128  compact_index1 = compact_index2;
129  compact_index2 = tmp;
130  } else if (compact_index1 == compact_index2) {
131  return false;
132  }
133  // To save iterating over all sparse_map_ entries, simply make the master
134  // entry for index2 point to index1.
135  // This leaves behind a potential chain of parents that needs to be chased,
136  // as above.
137  sparse_map_[compact_map_[compact_index2]] = compact_index1;
138  if (compact_index1 >= 0)
139  compact_map_[compact_index2] = compact_map_[compact_index1];
140  return true;
141 }
142 
143 // Completes one or more Merge operations by further compacting the
144 // compact space. Unused compact space indices are removed, and the used
145 // ones above shuffled down to fill the gaps.
146 // Example:
147 // Input sparse_map_: (x indicates -1)
148 // x x 0 x 2 x x 4 x 0 x 2 x
149 // Output sparse_map_:
150 // x x 0 x 1 x x 2 x 0 x 1 x
151 // Output compact_map_:
152 // 2 4 7.
154  // Ensure each sparse_map_entry contains a master compact_map_ index.
155  int compact_size = 0;
156  for (int i = 0; i < sparse_map_.size(); ++i) {
157  int compact_index = MasterCompactIndex(sparse_map_[i]);
158  sparse_map_[i] = compact_index;
159  if (compact_index >= compact_size)
160  compact_size = compact_index + 1;
161  }
162  // Re-generate the compact_map leaving holes for unused indices.
163  compact_map_.init_to_size(compact_size, -1);
164  for (int i = 0; i < sparse_map_.size(); ++i) {
165  if (sparse_map_[i] >= 0) {
166  if (compact_map_[sparse_map_[i]] == -1)
167  compact_map_[sparse_map_[i]] = i;
168  }
169  }
170  // Compact the compact_map, leaving tmp_compact_map saying where each
171  // index went to in the compacted map.
172  GenericVector<inT32> tmp_compact_map;
173  tmp_compact_map.init_to_size(compact_size, -1);
174  compact_size = 0;
175  for (int i = 0; i < compact_map_.size(); ++i) {
176  if (compact_map_[i] >= 0) {
177  tmp_compact_map[i] = compact_size;
178  compact_map_[compact_size++] = compact_map_[i];
179  }
180  }
181  compact_map_.truncate(compact_size);
182  // Now modify the entries in the sparse map to point to the new locations.
183  for (int i = 0; i < sparse_map_.size(); ++i) {
184  if (sparse_map_[i] >= 0) {
185  sparse_map_[i] = tmp_compact_map[sparse_map_[i]];
186  }
187  }
188 }
189 
190 // Writes to the given file. Returns false in case of error.
191 bool IndexMapBiDi::Serialize(FILE* fp) const {
192  if (!IndexMap::Serialize(fp)) return false;
193  // Make a vector containing the rest of the map. If the map is many-to-one
194  // then each additional sparse entry needs to be stored.
195  // Normally we store only the compact map to save space.
196  GenericVector<inT32> remaining_pairs;
197  for (int i = 0; i < sparse_map_.size(); ++i) {
198  if (sparse_map_[i] >= 0 && compact_map_[sparse_map_[i]] != i) {
199  remaining_pairs.push_back(i);
200  remaining_pairs.push_back(sparse_map_[i]);
201  }
202  }
203  if (!remaining_pairs.Serialize(fp)) return false;
204  return true;
205 }
206 
207 // Reads from the given file. Returns false in case of error.
208 // If swap is true, assumes a big/little-endian swap is needed.
209 bool IndexMapBiDi::DeSerialize(bool swap, FILE* fp) {
210  if (!IndexMap::DeSerialize(swap, fp)) return false;
211  GenericVector<inT32> remaining_pairs;
212  if (!remaining_pairs.DeSerialize(swap, fp)) return false;
213  sparse_map_.init_to_size(sparse_size_, -1);
214  for (int i = 0; i < compact_map_.size(); ++i) {
215  sparse_map_[compact_map_[i]] = i;
216  }
217  for (int i = 0; i < remaining_pairs.size(); ++i) {
218  int sparse_index = remaining_pairs[i++];
219  sparse_map_[sparse_index] = remaining_pairs[i];
220  }
221  return true;
222 }
223 
224 // Bulk calls to SparseToCompact.
225 // Maps the given array of sparse indices to an array of compact indices.
226 // Assumes the input is sorted. The output indices are sorted and uniqued.
227 // Return value is the number of "missed" features, being features that
228 // don't map to the compact feature space.
230  GenericVector<int>* compact) const {
231  compact->truncate(0);
232  int num_features = sparse.size();
233  int missed_features = 0;
234  int prev_good_feature = -1;
235  for (int f = 0; f < num_features; ++f) {
236  int feature = sparse_map_[sparse[f]];
237  if (feature >= 0) {
238  if (feature != prev_good_feature) {
239  compact->push_back(feature);
240  prev_good_feature = feature;
241  }
242  } else {
243  ++missed_features;
244  }
245  }
246  return missed_features;
247 }
248 
249 } // namespace tesseract.
250 
bool Serialize(FILE *fp) const
void CopyFrom(const IndexMap &src)
void InitAndSetupRange(int sparse_size, int start, int end)
bool Serialize(FILE *fp) const
int push_back(T object)
virtual int SparseSize() const
Definition: indexmapbidi.h:142
bool Merge(int compact_index1, int compact_index2)
bool Serialize(FILE *fp) const
void truncate(int size)
int inT32
Definition: host.h:35
void CopyFrom(const IndexMapBiDi &src)
virtual int SparseToCompact(int sparse_index) const
int size() const
Definition: genericvector.h:72
bool DeSerialize(bool swap, FILE *fp)
void Init(int size, bool all_mapped)
GenericVector< inT32 > compact_map_
Definition: indexmapbidi.h:80
bool DeSerialize(bool swap, FILE *fp)
int binary_search(const T &target) const
int MapFeatures(const GenericVector< int > &sparse, GenericVector< int > *compact) const
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:177
void init_to_size(int size, T t)
void SetMap(int sparse_index, bool mapped)
bool DeSerialize(bool swap, FILE *fp)