tesseract  3.05.02
bitvector.h
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
4 // File: bitvector.h
5 // Description: Class replacement for BITVECTOR.
6 // Author: Ray Smith
7 // Created: Mon Jan 10 17:44:01 PST 2011
8 //
9 // (C) Copyright 2011, Google Inc.
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 // http://www.apache.org/licenses/LICENSE-2.0
14 // Unless required by applicable law or agreed to in writing, software
15 // distributed under the License is distributed on an "AS IS" BASIS,
16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 // See the License for the specific language governing permissions and
18 // limitations under the License.
19 //
21 
22 
23 #ifndef TESSERACT_CCUTIL_BITVECTOR_H__
24 #define TESSERACT_CCUTIL_BITVECTOR_H__
25 
26 #include <assert.h>
27 #include <stdio.h>
28 #include "host.h"
29 
30 namespace tesseract {
31 
32 // Trivial class to encapsulate a fixed-length array of bits, with
33 // Serialize/DeSerialize. Replaces the old macros.
34 class BitVector {
35  public:
36  // Fast lookup table to get the first least significant set bit in a byte.
37  // For zero, the table has 255, but since it is a special case, most code
38  // that uses this table will check for zero before looking up lsb_index_.
39  static const uinT8 lsb_index_[256];
40  // Fast lookup table to get the residual bits after zeroing the least
41  // significant set bit in a byte.
42  static const uinT8 lsb_eroded_[256];
43  // Fast lookup table to give the number of set bits in a byte.
44  static const int hamming_table_[256];
45 
46  BitVector();
47  // Initializes the array to length * false.
48  explicit BitVector(int length);
49  BitVector(const BitVector& src);
50  BitVector& operator=(const BitVector& src);
51  ~BitVector();
52 
53  // Initializes the array to length * false.
54  void Init(int length);
55 
56  // Returns the number of bits that are accessible in the vector.
57  int size() const {
58  return bit_size_;
59  }
60 
61  // Writes to the given file. Returns false in case of error.
62  bool Serialize(FILE* fp) const;
63  // Reads from the given file. Returns false in case of error.
64  // If swap is true, assumes a big/little-endian swap is needed.
65  bool DeSerialize(bool swap, FILE* fp);
66 
67  void SetAllFalse();
68  void SetAllTrue();
69 
70  // Accessors to set/reset/get bits.
71  // The range of index is [0, size()-1].
72  // There is debug-only bounds checking.
73  void SetBit(int index) {
74  array_[WordIndex(index)] |= BitMask(index);
75  }
76  void ResetBit(int index) {
77  array_[WordIndex(index)] &= ~BitMask(index);
78  }
79  void SetValue(int index, bool value) {
80  if (value)
81  SetBit(index);
82  else
83  ResetBit(index);
84  }
85  bool At(int index) const {
86  return (array_[WordIndex(index)] & BitMask(index)) != 0;
87  }
88  bool operator[](int index) const {
89  return (array_[WordIndex(index)] & BitMask(index)) != 0;
90  }
91 
92  // Returns the index of the next set bit after the given index.
93  // Useful for quickly iterating through the set bits in a sparse vector.
94  int NextSetBit(int prev_bit) const;
95 
96  // Returns the number of set bits in the vector.
97  int NumSetBits() const;
98 
99  // Logical in-place operations on whole bit vectors. Tries to do something
100  // sensible if they aren't the same size, but they should be really.
101  void operator|=(const BitVector& other);
102  void operator&=(const BitVector& other);
103  void operator^=(const BitVector& other);
104  // Set subtraction *this = v1 - v2.
105  void SetSubtract(const BitVector& v1, const BitVector& v2);
106 
107  private:
108  // Allocates memory for a vector of the given length.
109  void Alloc(int length);
110 
111  // Computes the index to array_ for the given index, with debug range
112  // checking.
113  int WordIndex(int index) const {
114  assert(0 <= index && index < bit_size_);
115  return index / kBitFactor;
116  }
117  // Returns a mask to select the appropriate bit for the given index.
118  uinT32 BitMask(int index) const {
119  return 1 << (index & (kBitFactor - 1));
120  }
121  // Returns the number of array elements needed to represent the current
122  // bit_size_.
123  int WordLength() const {
124  return (bit_size_ + kBitFactor - 1) / kBitFactor;
125  }
126  // Returns the number of bytes consumed by the array_.
127  int ByteLength() const {
128  return WordLength() * sizeof(*array_);
129  }
130 
131  // Number of bits in this BitVector.
132  inT32 bit_size_;
133  // Array of words used to pack the bits.
134  // Bits are stored little-endian by uinT32 word, ie by word first and then
135  // starting with the least significant bit in each word.
136  uinT32* array_;
137  // Number of bits in an array_ element.
138  static const int kBitFactor = sizeof(uinT32) * 8;
139 };
140 
141 } // namespace tesseract.
142 
143 #endif // TESSERACT_CCUTIL_BITVECTOR_H__
int size() const
Definition: bitvector.h:57
int NumSetBits() const
Definition: bitvector.cpp:212
void Init(int length)
Definition: bitvector.cpp:132
unsigned char uinT8
Definition: host.h:32
void operator|=(const BitVector &other)
Definition: bitvector.cpp:227
bool operator[](int index) const
Definition: bitvector.h:88
bool At(int index) const
Definition: bitvector.h:85
void SetValue(int index, bool value)
Definition: bitvector.h:79
void operator^=(const BitVector &other)
Definition: bitvector.cpp:239
static const uinT8 lsb_index_[256]
Definition: bitvector.h:39
bool Serialize(FILE *fp) const
Definition: bitvector.cpp:138
bool DeSerialize(bool swap, FILE *fp)
Definition: bitvector.cpp:148
int inT32
Definition: host.h:35
void SetBit(int index)
Definition: bitvector.h:73
unsigned int uinT32
Definition: host.h:36
static const uinT8 lsb_eroded_[256]
Definition: bitvector.h:42
BitVector & operator=(const BitVector &src)
Definition: bitvector.cpp:121
void ResetBit(int index)
Definition: bitvector.h:76
int NextSetBit(int prev_bit) const
Definition: bitvector.cpp:174
void SetSubtract(const BitVector &v1, const BitVector &v2)
Definition: bitvector.cpp:245
static const int hamming_table_[256]
Definition: bitvector.h:44
void operator &=(const BitVector &other)