tesseract  3.05.02
genericvector.h
Go to the documentation of this file.
1 // File: genericvector.h
3 // Description: Generic vector class
4 // Author: Daria Antonova
5 // Created: Mon Jun 23 11:26:43 PDT 2008
6 //
7 // (C) Copyright 2007, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 //
20 #ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_
21 #define TESSERACT_CCUTIL_GENERICVECTOR_H_
22 
23 #include <assert.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 
27 #include "tesscallback.h"
28 #include "errcode.h"
29 #include "helpers.h"
30 #include "ndminx.h"
31 #include "serialis.h"
32 #include "strngs.h"
33 
34 // Use PointerVector<T> below in preference to GenericVector<T*>, as that
35 // provides automatic deletion of pointers, [De]Serialize that works, and
36 // sort that works.
37 template <typename T>
38 class GenericVector {
39  public:
41  clear_cb_(NULL), compare_cb_(NULL) {}
42 
43  GenericVector(int size, T init_val) {
44  init(size);
45  init_to_size(size, init_val);
46  }
47 
48  // Copy
49  GenericVector(const GenericVector& other) {
50  this->init(other.size());
51  this->operator+=(other);
52  }
55 
57 
58  // Reserve some memory.
59  void reserve(int size);
60  // Double the size of the internal array.
61  void double_the_size();
62 
63  // Resizes to size and sets all values to t.
64  void init_to_size(int size, T t);
65  // Resizes to size without any initialization.
66  void resize_no_init(int size) {
67  reserve(size);
68  size_used_ = size;
69  }
70 
71  // Return the size used.
72  int size() const {
73  return size_used_;
74  }
75  int size_reserved() const {
76  return size_reserved_;
77  }
78 
79  int length() const {
80  return size_used_;
81  }
82 
83  // Return true if empty.
84  bool empty() const {
85  return size_used_ == 0;
86  }
87 
88  // Return the object from an index.
89  T &get(int index) const;
90  T &back() const;
91  T &operator[](int index) const;
92  // Returns the last object and removes it.
93  T pop_back();
94 
95  // Return the index of the T object.
96  // This method NEEDS a compare_callback to be passed to
97  // set_compare_callback.
98  int get_index(T object) const;
99 
100  // Return true if T is in the array
101  bool contains(T object) const;
102 
103  // Return true if the index is valid
104  T contains_index(int index) const;
105 
106  // Push an element in the end of the array
107  int push_back(T object);
108  void operator+=(T t);
109 
110  // Push an element in the end of the array if the same
111  // element is not already contained in the array.
112  int push_back_new(T object);
113 
114  // Push an element in the front of the array
115  // Note: This function is O(n)
116  int push_front(T object);
117 
118  // Set the value at the given index
119  void set(T t, int index);
120 
121  // Insert t at the given index, push other elements to the right.
122  void insert(T t, int index);
123 
124  // Removes an element at the given index and
125  // shifts the remaining elements to the left.
126  void remove(int index);
127 
128  // Truncates the array to the given size by removing the end.
129  // If the current size is less, the array is not expanded.
130  void truncate(int size) {
131  if (size < size_used_)
132  size_used_ = size;
133  }
134 
135  // Add a callback to be called to delete the elements when the array took
136  // their ownership.
138 
139  // Add a callback to be called to compare the elements when needed (contains,
140  // get_id, ...)
142 
143  // Clear the array, calling the clear callback function if any.
144  // All the owned callbacks are also deleted.
145  // If you don't want the callbacks to be deleted, before calling clear, set
146  // the callback to NULL.
147  void clear();
148 
149  // Delete objects pointed to by data_[i]
150  void delete_data_pointers();
151 
152  // This method clears the current object, then, does a shallow copy of
153  // its argument, and finally invalidates its argument.
154  // Callbacks are moved to the current object;
155  void move(GenericVector<T>* from);
156 
157  // Read/Write the array to a file. This does _NOT_ read/write the callbacks.
158  // The callback given must be permanent since they will be called more than
159  // once. The given callback will be deleted at the end.
160  // If the callbacks are NULL, then the data is simply read/written using
161  // fread (and swapping)/fwrite.
162  // Returns false on error or if the callback returns false.
163  // DEPRECATED. Use [De]Serialize[Classes] instead.
164  bool write(FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const;
165  bool read(FILE* f, TessResultCallback3<bool, FILE*, T*, bool>* cb, bool swap);
166  // Writes a vector of simple types to the given file. Assumes that bitwise
167  // read/write of T will work. Returns false in case of error.
168  // TODO(rays) Change all callers to use TFile and remove deprecated methods.
169  bool Serialize(FILE* fp) const;
170  bool Serialize(tesseract::TFile* fp) const;
171  // Reads a vector of simple types from the given file. Assumes that bitwise
172  // read/write will work with ReverseN according to sizeof(T).
173  // Returns false in case of error.
174  // If swap is true, assumes a big/little-endian swap is needed.
175  bool DeSerialize(bool swap, FILE* fp);
176  bool DeSerialize(bool swap, tesseract::TFile* fp);
177  // Skips the deserialization of the vector.
178  static bool SkipDeSerialize(bool swap, tesseract::TFile* fp);
179  // Writes a vector of classes to the given file. Assumes the existence of
180  // bool T::Serialize(FILE* fp) const that returns false in case of error.
181  // Returns false in case of error.
182  bool SerializeClasses(FILE* fp) const;
183  bool SerializeClasses(tesseract::TFile* fp) const;
184  // Reads a vector of classes from the given file. Assumes the existence of
185  // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
186  // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
187  // this function. Returns false in case of error.
188  // If swap is true, assumes a big/little-endian swap is needed.
189  bool DeSerializeClasses(bool swap, FILE* fp);
190  bool DeSerializeClasses(bool swap, tesseract::TFile* fp);
191  // Calls SkipDeSerialize on the elements of the vector.
192  static bool SkipDeSerializeClasses(bool swap, tesseract::TFile* fp);
193 
194  // Allocates a new array of double the current_size, copies over the
195  // information from data to the new location, deletes data and returns
196  // the pointed to the new larger array.
197  // This function uses memcpy to copy the data, instead of invoking
198  // operator=() for each element like double_the_size() does.
199  static T *double_the_size_memcpy(int current_size, T *data) {
200  T *data_new = new T[current_size * 2];
201  memcpy(data_new, data, sizeof(T) * current_size);
202  delete[] data;
203  return data_new;
204  }
205 
206  // Reverses the elements of the vector.
207  void reverse() {
208  for (int i = 0; i < size_used_ / 2; ++i)
209  Swap(&data_[i], &data_[size_used_ - 1 - i]);
210  }
211 
212  // Sorts the members of this vector using the less than comparator (cmp_lt),
213  // which compares the values. Useful for GenericVectors to primitive types.
214  // Will not work so great for pointers (unless you just want to sort some
215  // pointers). You need to provide a specialization to sort_cmp to use
216  // your type.
217  void sort();
218 
219  // Sort the array into the order defined by the qsort function comparator.
220  // The comparator function is as defined by qsort, ie. it receives pointers
221  // to two Ts and returns negative if the first element is to appear earlier
222  // in the result and positive if it is to appear later, with 0 for equal.
223  void sort(int (*comparator)(const void*, const void*)) {
224  qsort(data_, size_used_, sizeof(*data_), comparator);
225  }
226 
227  // Searches the array (assuming sorted in ascending order, using sort()) for
228  // an element equal to target and returns true if it is present.
229  // Use binary_search to get the index of target, or its nearest candidate.
230  bool bool_binary_search(const T& target) const {
231  int index = binary_search(target);
232  if (index >= size_used_)
233  return false;
234  return data_[index] == target;
235  }
236  // Searches the array (assuming sorted in ascending order, using sort()) for
237  // an element equal to target and returns the index of the best candidate.
238  // The return value is conceptually the largest index i such that
239  // data_[i] <= target or 0 if target < the whole vector.
240  // NOTE that this function uses operator> so really the return value is
241  // the largest index i such that data_[i] > target is false.
242  int binary_search(const T& target) const {
243  int bottom = 0;
244  int top = size_used_;
245  while (top - bottom > 1) {
246  int middle = (bottom + top) / 2;
247  if (data_[middle] > target)
248  top = middle;
249  else
250  bottom = middle;
251  }
252  return bottom;
253  }
254 
255  // Compact the vector by deleting elements using operator!= on basic types.
256  // The vector must be sorted.
257  void compact_sorted() {
258  if (size_used_ == 0)
259  return;
260 
261  // First element is in no matter what, hence the i = 1.
262  int last_write = 0;
263  for (int i = 1; i < size_used_; ++i) {
264  // Finds next unique item and writes it.
265  if (data_[last_write] != data_[i])
266  data_[++last_write] = data_[i];
267  }
268  // last_write is the index of a valid data cell, so add 1.
269  size_used_ = last_write + 1;
270  }
271 
272  // Compact the vector by deleting elements for which delete_cb returns
273  // true. delete_cb is a permanent callback and will be deleted.
275  int new_size = 0;
276  int old_index = 0;
277  // Until the callback returns true, the elements stay the same.
278  while (old_index < size_used_ && !delete_cb->Run(old_index++))
279  ++new_size;
280  // Now just copy anything else that gets false from delete_cb.
281  for (; old_index < size_used_; ++old_index) {
282  if (!delete_cb->Run(old_index)) {
283  data_[new_size++] = data_[old_index];
284  }
285  }
286  size_used_ = new_size;
287  delete delete_cb;
288  }
289 
290  T dot_product(const GenericVector<T>& other) const {
291  T result = static_cast<T>(0);
292  for (int i = MIN(size_used_, other.size_used_) - 1; i >= 0; --i)
293  result += data_[i] * other.data_[i];
294  return result;
295  }
296 
297  // Returns the index of what would be the target_index_th item in the array
298  // if the members were sorted, without actually sorting. Members are
299  // shuffled around, but it takes O(n) time.
300  // NOTE: uses operator< and operator== on the members.
301  int choose_nth_item(int target_index) {
302  // Make sure target_index is legal.
303  if (target_index < 0)
304  target_index = 0; // ensure legal
305  else if (target_index >= size_used_)
306  target_index = size_used_ - 1;
307  unsigned int seed = 1;
308  return choose_nth_item(target_index, 0, size_used_, &seed);
309  }
310 
311  // Swaps the elements with the given indices.
312  void swap(int index1, int index2) {
313  if (index1 != index2) {
314  T tmp = data_[index1];
315  data_[index1] = data_[index2];
316  data_[index2] = tmp;
317  }
318  }
319  // Returns true if all elements of *this are within the given range.
320  // Only uses operator<
321  bool WithinBounds(const T& rangemin, const T& rangemax) const {
322  for (int i = 0; i < size_used_; ++i) {
323  if (data_[i] < rangemin || rangemax < data_[i])
324  return false;
325  }
326  return true;
327  }
328 
329  protected:
330  // Internal recursive version of choose_nth_item.
331  int choose_nth_item(int target_index, int start, int end, unsigned int* seed);
332 
333  // Init the object, allocating size memory.
334  void init(int size);
335 
336  // We are assuming that the object generally placed in thie
337  // vector are small enough that for efficiency it makes sense
338  // to start with a larger initial size.
339  static const int kDefaultVectorSize = 4;
342  T* data_;
344  // Mutable because Run method is not const
346 };
347 
348 namespace tesseract {
349 
350 // Function to read a GenericVector<char> from a whole file.
351 // Returns false on failure.
352 typedef bool (*FileReader)(const STRING& filename, GenericVector<char>* data);
353 // Function to write a GenericVector<char> to a whole file.
354 // Returns false on failure.
355 typedef bool (*FileWriter)(const GenericVector<char>& data,
356  const STRING& filename);
357 // The default FileReader loads the whole file into the vector of char,
358 // returning false on error.
359 inline bool LoadDataFromFile(const STRING& filename,
360  GenericVector<char>* data) {
361  bool result = false;
362  FILE* fp = fopen(filename.string(), "rb");
363  if (fp != NULL) {
364  fseek(fp, 0, SEEK_END);
365  size_t size = ftell(fp);
366  fseek(fp, 0, SEEK_SET);
367  if (size > 0) {
368  // reserve an extra byte in case caller wants to append a '\0' character
369  data->reserve(size + 1);
370  data->resize_no_init(size);
371  result = fread(&(*data)[0], 1, size, fp) == size;
372  }
373  fclose(fp);
374  }
375  return result;
376 }
377 // The default FileWriter writes the vector of char to the filename file,
378 // returning false on error.
379 inline bool SaveDataToFile(const GenericVector<char>& data,
380  const STRING& filename) {
381  FILE* fp = fopen(filename.string(), "wb");
382  if (fp == NULL) return false;
383  bool result =
384  static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
385  fclose(fp);
386  return result;
387 }
388 
389 template <typename T>
390 bool cmp_eq(T const & t1, T const & t2) {
391  return t1 == t2;
392 }
393 
394 // Used by sort()
395 // return < 0 if t1 < t2
396 // return 0 if t1 == t2
397 // return > 0 if t1 > t2
398 template <typename T>
399 int sort_cmp(const void* t1, const void* t2) {
400  const T* a = static_cast<const T *> (t1);
401  const T* b = static_cast<const T *> (t2);
402  if (*a < *b) {
403  return -1;
404  } else if (*b < *a) {
405  return 1;
406  } else {
407  return 0;
408  }
409 }
410 
411 // Used by PointerVector::sort()
412 // return < 0 if t1 < t2
413 // return 0 if t1 == t2
414 // return > 0 if t1 > t2
415 template <typename T>
416 int sort_ptr_cmp(const void* t1, const void* t2) {
417  const T* a = *reinterpret_cast<T * const *>(t1);
418  const T* b = *reinterpret_cast<T * const *>(t2);
419  if (*a < *b) {
420  return -1;
421  } else if (*b < *a) {
422  return 1;
423  } else {
424  return 0;
425  }
426 }
427 
428 // Subclass for a vector of pointers. Use in preference to GenericVector<T*>
429 // as it provides automatic deletion and correct serialization, with the
430 // corollary that all copy operations are deep copies of the pointed-to objects.
431 template<typename T>
432 class PointerVector : public GenericVector<T*> {
433  public:
435  explicit PointerVector(int size) : GenericVector<T*>(size) { }
437  // Clear must be called here, even though it is called again by the base,
438  // as the base will call the wrong clear.
439  clear();
440  }
441  // Copy must be deep, as the pointers will be automatically deleted on
442  // destruction.
443  PointerVector(const PointerVector& other) : GenericVector<T*>(other) {
444  this->init(other.size());
445  this->operator+=(other);
446  }
448  this->reserve(this->size_used_ + other.size_used_);
449  for (int i = 0; i < other.size(); ++i) {
450  this->push_back(new T(*other.data_[i]));
451  }
452  return *this;
453  }
454 
456  if (&other != this) {
457  this->truncate(0);
458  this->operator+=(other);
459  }
460  return *this;
461  }
462 
463  // Removes an element at the given index and
464  // shifts the remaining elements to the left.
465  void remove(int index) {
466  delete GenericVector<T*>::data_[index];
468  }
469 
470  // Truncates the array to the given size by removing the end.
471  // If the current size is less, the array is not expanded.
472  void truncate(int size) {
473  for (int i = size; i < GenericVector<T*>::size_used_; ++i)
474  delete GenericVector<T*>::data_[i];
476  }
477 
478  // Compact the vector by deleting elements for which delete_cb returns
479  // true. delete_cb is a permanent callback and will be deleted.
481  int new_size = 0;
482  int old_index = 0;
483  // Until the callback returns true, the elements stay the same.
484  while (old_index < GenericVector<T*>::size_used_ &&
485  !delete_cb->Run(GenericVector<T*>::data_[old_index++]))
486  ++new_size;
487  // Now just copy anything else that gets false from delete_cb.
488  for (; old_index < GenericVector<T*>::size_used_; ++old_index) {
489  if (!delete_cb->Run(GenericVector<T*>::data_[old_index])) {
490  GenericVector<T*>::data_[new_size++] =
491  GenericVector<T*>::data_[old_index];
492  } else {
493  delete GenericVector<T*>::data_[old_index];
494  }
495  }
497  delete delete_cb;
498  }
499 
500  // Clear the array, calling the clear callback function if any.
501  // All the owned callbacks are also deleted.
502  // If you don't want the callbacks to be deleted, before calling clear, set
503  // the callback to NULL.
504  void clear() {
507  }
508 
509  // Writes a vector of (pointers to) classes to the given file. Assumes the
510  // existence of bool T::Serialize(FILE*) const that returns false in case of
511  // error. There is no Serialize for simple types, as you would have a
512  // normal GenericVector of those.
513  // Returns false in case of error.
514  bool Serialize(FILE* fp) const {
516  if (fwrite(&used, sizeof(used), 1, fp) != 1) return false;
517  for (int i = 0; i < used; ++i) {
518  inT8 non_null = GenericVector<T*>::data_[i] != NULL;
519  if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) return false;
520  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
521  }
522  return true;
523  }
524  bool Serialize(TFile* fp) const {
526  if (fp->FWrite(&used, sizeof(used), 1) != 1) return false;
527  for (int i = 0; i < used; ++i) {
528  inT8 non_null = GenericVector<T*>::data_[i] != NULL;
529  if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) return false;
530  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
531  }
532  return true;
533  }
534  // Reads a vector of (pointers to) classes to the given file. Assumes the
535  // existence of bool T::DeSerialize(bool, Tfile*) const that returns false in
536  // case of error. There is no Serialize for simple types, as you would have a
537  // normal GenericVector of those.
538  // If swap is true, assumes a big/little-endian swap is needed.
539  // Also needs T::T(), as new T is used in this function.
540  // Returns false in case of error.
541  bool DeSerialize(bool swap, FILE* fp) {
542  inT32 reserved;
543  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
544  if (swap) Reverse32(&reserved);
545  GenericVector<T*>::reserve(reserved);
546  truncate(0);
547  for (int i = 0; i < reserved; ++i) {
548  inT8 non_null;
549  if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false;
550  T* item = NULL;
551  if (non_null) {
552  item = new T;
553  if (!item->DeSerialize(swap, fp)) {
554  delete item;
555  return false;
556  }
557  this->push_back(item);
558  } else {
559  // Null elements should keep their place in the vector.
560  this->push_back(NULL);
561  }
562  }
563  return true;
564  }
565  bool DeSerialize(bool swap, TFile* fp) {
566  inT32 reserved;
567  if (!DeSerializeSize(swap, fp, &reserved)) return false;
568  GenericVector<T*>::reserve(reserved);
569  truncate(0);
570  for (int i = 0; i < reserved; ++i) {
571  if (!DeSerializeElement(swap, fp)) return false;
572  }
573  return true;
574  }
575  // Enables deserialization of a selection of elements. Note that in order to
576  // retain the integrity of the stream, the caller must call some combination
577  // of DeSerializeElement and DeSerializeSkip of the exact number returned in
578  // *size, assuming a true return.
579  static bool DeSerializeSize(bool swap, TFile* fp, inT32* size) {
580  if (fp->FRead(size, sizeof(*size), 1) != 1) return false;
581  if (swap) Reverse32(size);
582  return true;
583  }
584  // Reads and appends to the vector the next element of the serialization.
585  bool DeSerializeElement(bool swap, TFile* fp) {
586  inT8 non_null;
587  if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
588  T* item = NULL;
589  if (non_null) {
590  item = new T;
591  if (!item->DeSerialize(swap, fp)) {
592  delete item;
593  return false;
594  }
595  this->push_back(item);
596  } else {
597  // Null elements should keep their place in the vector.
598  this->push_back(NULL);
599  }
600  return true;
601  }
602  // Skips the next element of the serialization.
603  static bool DeSerializeSkip(bool swap, TFile* fp) {
604  inT8 non_null;
605  if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
606  if (non_null) {
607  if (!T::SkipDeSerialize(swap, fp)) return false;
608  }
609  return true;
610  }
611 
612  // Sorts the items pointed to by the members of this vector using
613  // t::operator<().
614  void sort() { this->GenericVector<T*>::sort(&sort_ptr_cmp<T>); }
615 };
616 
617 } // namespace tesseract
618 
619 // A useful vector that uses operator== to do comparisons.
620 template <typename T>
621 class GenericVectorEqEq : public GenericVector<T> {
622  public:
625  NewPermanentTessCallback(tesseract::cmp_eq<T>));
626  }
629  NewPermanentTessCallback(tesseract::cmp_eq<T>));
630  }
631 };
632 
633 template <typename T>
634 void GenericVector<T>::init(int size) {
635  size_used_ = 0;
636  size_reserved_ = 0;
637  data_ = 0;
638  clear_cb_ = 0;
639  compare_cb_ = 0;
640  reserve(size);
641 }
642 
643 template <typename T>
645  clear();
646 }
647 
648 // Reserve some memory. If the internal array contains elements, they are
649 // copied.
650 template <typename T>
652  if (size_reserved_ >= size || size <= 0)
653  return;
654  if (size < kDefaultVectorSize) size = kDefaultVectorSize;
655  T* new_array = new T[size];
656  for (int i = 0; i < size_used_; ++i)
657  new_array[i] = data_[i];
658  delete[] data_;
659  data_ = new_array;
660  size_reserved_ = size;
661 }
662 
663 template <typename T>
665  if (size_reserved_ == 0) {
666  reserve(kDefaultVectorSize);
667  }
668  else {
669  reserve(2 * size_reserved_);
670  }
671 }
672 
673 // Resizes to size and sets all values to t.
674 template <typename T>
675 void GenericVector<T>::init_to_size(int size, T t) {
676  reserve(size);
677  size_used_ = size;
678  for (int i = 0; i < size; ++i)
679  data_[i] = t;
680 }
681 
682 
683 // Return the object from an index.
684 template <typename T>
685 T &GenericVector<T>::get(int index) const {
686  ASSERT_HOST(index >= 0 && index < size_used_);
687  return data_[index];
688 }
689 
690 template <typename T>
691 T &GenericVector<T>::operator[](int index) const {
692  assert(index >= 0 && index < size_used_);
693  return data_[index];
694 }
695 
696 template <typename T>
698  ASSERT_HOST(size_used_ > 0);
699  return data_[size_used_ - 1];
700 }
701 // Returns the last object and removes it.
702 template <typename T>
704  ASSERT_HOST(size_used_ > 0);
705  return data_[--size_used_];
706 }
707 
708 // Return the object from an index.
709 template <typename T>
710 void GenericVector<T>::set(T t, int index) {
711  ASSERT_HOST(index >= 0 && index < size_used_);
712  data_[index] = t;
713 }
714 
715 // Shifts the rest of the elements to the right to make
716 // space for the new elements and inserts the given element
717 // at the specified index.
718 template <typename T>
719 void GenericVector<T>::insert(T t, int index) {
720  ASSERT_HOST(index >= 0 && index <= size_used_);
721  if (size_reserved_ == size_used_)
722  double_the_size();
723  for (int i = size_used_; i > index; --i) {
724  data_[i] = data_[i-1];
725  }
726  data_[index] = t;
727  size_used_++;
728 }
729 
730 // Removes an element at the given index and
731 // shifts the remaining elements to the left.
732 template <typename T>
733 void GenericVector<T>::remove(int index) {
734  ASSERT_HOST(index >= 0 && index < size_used_);
735  for (int i = index; i < size_used_ - 1; ++i) {
736  data_[i] = data_[i+1];
737  }
738  size_used_--;
739 }
740 
741 // Return true if the index is valindex
742 template <typename T>
744  return index >= 0 && index < size_used_;
745 }
746 
747 // Return the index of the T object.
748 template <typename T>
749 int GenericVector<T>::get_index(T object) const {
750  for (int i = 0; i < size_used_; ++i) {
751  ASSERT_HOST(compare_cb_ != NULL);
752  if (compare_cb_->Run(object, data_[i]))
753  return i;
754  }
755  return -1;
756 }
757 
758 // Return true if T is in the array
759 template <typename T>
760 bool GenericVector<T>::contains(T object) const {
761  return get_index(object) != -1;
762 }
763 
764 // Add an element in the array
765 template <typename T>
767  int index = 0;
768  if (size_used_ == size_reserved_)
769  double_the_size();
770  index = size_used_++;
771  data_[index] = object;
772  return index;
773 }
774 
775 template <typename T>
777  int index = get_index(object);
778  if (index >= 0)
779  return index;
780  return push_back(object);
781 }
782 
783 // Add an element in the array (front)
784 template <typename T>
786  if (size_used_ == size_reserved_)
787  double_the_size();
788  for (int i = size_used_; i > 0; --i)
789  data_[i] = data_[i-1];
790  data_[0] = object;
791  ++size_used_;
792  return 0;
793 }
794 
795 template <typename T>
797  push_back(t);
798 }
799 
800 template <typename T>
802  this->reserve(size_used_ + other.size_used_);
803  for (int i = 0; i < other.size(); ++i) {
804  this->operator+=(other.data_[i]);
805  }
806  return *this;
807 }
808 
809 template <typename T>
811  if (&other != this) {
812  this->truncate(0);
813  this->operator+=(other);
814  }
815  return *this;
816 }
817 
818 // Add a callback to be called to delete the elements when the array took
819 // their ownership.
820 template <typename T>
822  clear_cb_ = cb;
823 }
824 
825 // Add a callback to be called to delete the elements when the array took
826 // their ownership.
827 template <typename T>
830  compare_cb_ = cb;
831 }
832 
833 // Clear the array, calling the callback function if any.
834 template <typename T>
836  if (size_reserved_ > 0) {
837  if (clear_cb_ != NULL)
838  for (int i = 0; i < size_used_; ++i)
839  clear_cb_->Run(data_[i]);
840  delete[] data_;
841  data_ = NULL;
842  size_used_ = 0;
843  size_reserved_ = 0;
844  }
845  if (clear_cb_ != NULL) {
846  delete clear_cb_;
847  clear_cb_ = NULL;
848  }
849  if (compare_cb_ != NULL) {
850  delete compare_cb_;
851  compare_cb_ = NULL;
852  }
853 }
854 
855 template <typename T>
857  for (int i = 0; i < size_used_; ++i)
858  if (data_[i]) {
859  delete data_[i];
860  }
861 }
862 
863 
864 template <typename T>
866  FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const {
867  if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) return false;
868  if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) return false;
869  if (cb != NULL) {
870  for (int i = 0; i < size_used_; ++i) {
871  if (!cb->Run(f, data_[i])) {
872  delete cb;
873  return false;
874  }
875  }
876  delete cb;
877  } else {
878  if (fwrite(data_, sizeof(T), size_used_, f) != size_used_) return false;
879  }
880  return true;
881 }
882 
883 template <typename T>
886  bool swap) {
887  inT32 reserved;
888  if (fread(&reserved, sizeof(reserved), 1, f) != 1) return false;
889  if (swap) Reverse32(&reserved);
890  reserve(reserved);
891  if (fread(&size_used_, sizeof(size_used_), 1, f) != 1) return false;
892  if (swap) Reverse32(&size_used_);
893  if (cb != NULL) {
894  for (int i = 0; i < size_used_; ++i) {
895  if (!cb->Run(f, data_ + i, swap)) {
896  delete cb;
897  return false;
898  }
899  }
900  delete cb;
901  } else {
902  if (fread(data_, sizeof(T), size_used_, f) != size_used_) return false;
903  if (swap) {
904  for (int i = 0; i < size_used_; ++i)
905  ReverseN(&data_[i], sizeof(T));
906  }
907  }
908  return true;
909 }
910 
911 // Writes a vector of simple types to the given file. Assumes that bitwise
912 // read/write of T will work. Returns false in case of error.
913 template <typename T>
914 bool GenericVector<T>::Serialize(FILE* fp) const {
915  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false;
916  if (fwrite(data_, sizeof(*data_), size_used_, fp) != size_used_) return false;
917  return true;
918 }
919 template <typename T>
921  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
922  if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) return false;
923  return true;
924 }
925 
926 // Reads a vector of simple types from the given file. Assumes that bitwise
927 // read/write will work with ReverseN according to sizeof(T).
928 // Returns false in case of error.
929 // If swap is true, assumes a big/little-endian swap is needed.
930 template <typename T>
931 bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
932  inT32 reserved;
933  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
934  if (swap) Reverse32(&reserved);
935  reserve(reserved);
936  size_used_ = reserved;
937  if (fread(data_, sizeof(T), size_used_, fp) != size_used_) return false;
938  if (swap) {
939  for (int i = 0; i < size_used_; ++i)
940  ReverseN(&data_[i], sizeof(data_[i]));
941  }
942  return true;
943 }
944 template <typename T>
946  inT32 reserved;
947  if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
948  if (swap) Reverse32(&reserved);
949  reserve(reserved);
950  size_used_ = reserved;
951  if (fp->FRead(data_, sizeof(T), size_used_) != size_used_) return false;
952  if (swap) {
953  for (int i = 0; i < size_used_; ++i)
954  ReverseN(&data_[i], sizeof(data_[i]));
955  }
956  return true;
957 }
958 template <typename T>
960  inT32 reserved;
961  if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
962  if (swap) Reverse32(&reserved);
963  return fp->FRead(NULL, sizeof(T), reserved) == reserved;
964 }
965 
966 // Writes a vector of classes to the given file. Assumes the existence of
967 // bool T::Serialize(FILE* fp) const that returns false in case of error.
968 // Returns false in case of error.
969 template <typename T>
971  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false;
972  for (int i = 0; i < size_used_; ++i) {
973  if (!data_[i].Serialize(fp)) return false;
974  }
975  return true;
976 }
977 template <typename T>
979  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
980  for (int i = 0; i < size_used_; ++i) {
981  if (!data_[i].Serialize(fp)) return false;
982  }
983  return true;
984 }
985 
986 // Reads a vector of classes from the given file. Assumes the existence of
987 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
988 // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
989 // this function. Returns false in case of error.
990 // If swap is true, assumes a big/little-endian swap is needed.
991 template <typename T>
992 bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
993  uinT32 reserved;
994  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
995  if (swap) Reverse32(&reserved);
996  T empty;
997  init_to_size(reserved, empty);
998  for (int i = 0; i < reserved; ++i) {
999  if (!data_[i].DeSerialize(swap, fp)) return false;
1000  }
1001  return true;
1002 }
1003 template <typename T>
1005  uinT32 reserved;
1006  if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
1007  if (swap) Reverse32(&reserved);
1008  T empty;
1009  init_to_size(reserved, empty);
1010  for (int i = 0; i < reserved; ++i) {
1011  if (!data_[i].DeSerialize(swap, fp)) return false;
1012  }
1013  return true;
1014 }
1015 template <typename T>
1017  uinT32 reserved;
1018  if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
1019  if (swap) Reverse32(&reserved);
1020  for (int i = 0; i < reserved; ++i) {
1021  if (!T::SkipDeSerialize(swap, fp)) return false;
1022  }
1023  return true;
1024 }
1025 
1026 // This method clear the current object, then, does a shallow copy of
1027 // its argument, and finally invalidates its argument.
1028 template <typename T>
1030  this->clear();
1031  this->data_ = from->data_;
1032  this->size_reserved_ = from->size_reserved_;
1033  this->size_used_ = from->size_used_;
1034  this->compare_cb_ = from->compare_cb_;
1035  this->clear_cb_ = from->clear_cb_;
1036  from->data_ = NULL;
1037  from->clear_cb_ = NULL;
1038  from->compare_cb_ = NULL;
1039  from->size_used_ = 0;
1040  from->size_reserved_ = 0;
1041 }
1042 
1043 template <typename T>
1045  sort(&tesseract::sort_cmp<T>);
1046 }
1047 
1048 // Internal recursive version of choose_nth_item.
1049 // The algorithm used comes from "Algorithms" by Sedgewick:
1050 // http://books.google.com/books/about/Algorithms.html?id=idUdqdDXqnAC
1051 // The principle is to choose a random pivot, and move everything less than
1052 // the pivot to its left, and everything greater than the pivot to the end
1053 // of the array, then recurse on the part that contains the desired index, or
1054 // just return the answer if it is in the equal section in the middle.
1055 // The random pivot guarantees average linear time for the same reason that
1056 // n times vector::push_back takes linear time on average.
1057 // target_index, start and and end are all indices into the full array.
1058 // Seed is a seed for rand_r for thread safety purposes. Its value is
1059 // unimportant as the random numbers do not affect the result except
1060 // between equal answers.
1061 template <typename T>
1062 int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
1063  unsigned int* seed) {
1064  // Number of elements to process.
1065  int num_elements = end - start;
1066  // Trivial cases.
1067  if (num_elements <= 1)
1068  return start;
1069  if (num_elements == 2) {
1070  if (data_[start] < data_[start + 1]) {
1071  return target_index > start ? start + 1 : start;
1072  } else {
1073  return target_index > start ? start : start + 1;
1074  }
1075  }
1076  // Place the pivot at start.
1077  #ifndef rand_r // _MSC_VER, ANDROID
1078  srand(*seed);
1079  #define rand_r(seed) rand()
1080  #endif // _MSC_VER
1081  int pivot = rand_r(seed) % num_elements + start;
1082  swap(pivot, start);
1083  // The invariant condition here is that items [start, next_lesser) are less
1084  // than the pivot (which is at index next_lesser) and items
1085  // [prev_greater, end) are greater than the pivot, with items
1086  // [next_lesser, prev_greater) being equal to the pivot.
1087  int next_lesser = start;
1088  int prev_greater = end;
1089  for (int next_sample = start + 1; next_sample < prev_greater;) {
1090  if (data_[next_sample] < data_[next_lesser]) {
1091  swap(next_lesser++, next_sample++);
1092  } else if (data_[next_sample] == data_[next_lesser]) {
1093  ++next_sample;
1094  } else {
1095  swap(--prev_greater, next_sample);
1096  }
1097  }
1098  // Now the invariant is set up, we recurse on just the section that contains
1099  // the desired index.
1100  if (target_index < next_lesser)
1101  return choose_nth_item(target_index, start, next_lesser, seed);
1102  else if (target_index < prev_greater)
1103  return next_lesser; // In equal bracket.
1104  else
1105  return choose_nth_item(target_index, prev_greater, end, seed);
1106 }
1107 
1108 
1109 #endif // TESSERACT_CCUTIL_GENERICVECTOR_H_
int sort_ptr_cmp(const void *t1, const void *t2)
TessResultCallback2< bool, T const &, T const & > * compare_cb_
bool DeSerializeElement(bool swap, TFile *fp)
PointerVector< T > & operator+=(const PointerVector &other)
void init(int size)
int size_reserved() const
Definition: genericvector.h:75
bool DeSerialize(bool swap, TFile *fp)
void compact(TessResultCallback1< bool, const T *> *delete_cb)
GenericVectorEqEq(int size)
void compact_sorted()
void resize_no_init(int size)
Definition: genericvector.h:66
T & get(int index) const
static const int kDefaultVectorSize
static bool DeSerializeSkip(bool swap, TFile *fp)
virtual R Run(A1)=0
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
int FWrite(const void *buffer, int size, int count)
Definition: serialis.cpp:131
void Reverse32(void *ptr)
Definition: helpers.h:193
void insert(T t, int index)
GenericVector< T > & operator+=(const GenericVector &other)
static T * double_the_size_memcpy(int current_size, T *data)
void swap(int index1, int index2)
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
Definition: ipoints.h:86
#define MIN(x, y)
Definition: ndminx.h:28
PointerVector< T > & operator=(const PointerVector &other)
bool contains(T object) const
void remove(int index)
bool Serialize(TFile *fp) const
GenericVector(int size, T init_val)
Definition: genericvector.h:43
void move(GenericVector< T > *from)
GenericVector(const GenericVector &other)
Definition: genericvector.h:49
int get_index(T object) const
int push_back_new(T object)
T & back() const
bool DeSerializeClasses(bool swap, FILE *fp)
static bool SkipDeSerialize(bool swap, tesseract::TFile *fp)
bool Serialize(FILE *fp) const
void set_compare_callback(TessResultCallback2< bool, T const &, T const &> *cb)
T dot_product(const GenericVector< T > &other) const
void double_the_size()
int push_back(T object)
inT32 choose_nth_item(inT32 index, float *array, inT32 count)
Definition: statistc.cpp:638
#define rand_r(seed)
virtual R Run(A1, A2, A3)=0
bool LoadDataFromFile(const STRING &filename, GenericVector< char > *data)
T & operator[](int index) const
void set_clear_callback(TessCallback1< T > *cb)
PointerVector(const PointerVector &other)
int FRead(void *buffer, int size, int count)
Definition: serialis.cpp:91
virtual R Run(A1, A2)=0
SIGNED char inT8
Definition: host.h:31
int push_front(T object)
TessCallback1< T > * clear_cb_
bool read(FILE *f, TessResultCallback3< bool, FILE *, T *, bool > *cb, bool swap)
void truncate(int size)
void delete_data_pointers()
bool write(FILE *f, TessResultCallback2< bool, FILE *, T const &> *cb) const
static bool SkipDeSerializeClasses(bool swap, tesseract::TFile *fp)
int inT32
Definition: host.h:35
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
Definition: strngs.h:44
int size() const
Definition: genericvector.h:72
bool WithinBounds(const T &rangemin, const T &rangemax) const
int length() const
Definition: genericvector.h:79
void set(T t, int index)
unsigned int uinT32
Definition: host.h:36
int binary_search(const T &target) const
static bool DeSerializeSize(bool swap, TFile *fp, inT32 *size)
bool SerializeClasses(FILE *fp) const
int choose_nth_item(int target_index)
T contains_index(int index) const
GenericVector< T > & operator=(const GenericVector &other)
void reserve(int size)
bool(* FileReader)(const STRING &filename, GenericVector< char > *data)
bool cmp_eq(T const &t1, T const &t2)
void sort(int(*comparator)(const void *, const void *))
int sort_cmp(const void *t1, const void *t2)
bool DeSerialize(bool swap, FILE *fp)
bool empty() const
Definition: genericvector.h:84
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:177
void init_to_size(int size, T t)
#define ASSERT_HOST(x)
Definition: errcode.h:84
inT32 size_reserved_
bool(* FileWriter)(const GenericVector< char > &data, const STRING &filename)
void compact(TessResultCallback1< bool, int > *delete_cb)
bool bool_binary_search(const T &target) const
bool Serialize(FILE *fp) const
bool DeSerialize(bool swap, FILE *fp)
void Swap(T *p1, T *p2)
Definition: helpers.h:90