tesseract  3.05.02
strngs.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: strngs.h (Formerly strings.h)
3  * Description: STRING class definition.
4  * Author: Ray Smith
5  * Created: Fri Feb 15 09:15:01 GMT 1991
6  *
7  * (C) Copyright 1991, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef STRNGS_H
21 #define STRNGS_H
22 
23 #include <stdio.h>
24 #include <string.h>
25 #include "platform.h"
26 #include "memry.h"
27 
28 namespace tesseract {
29 class TFile;
30 } // namespace tesseract.
31 
32 // STRING_IS_PROTECTED means that string[index] = X is invalid
33 // because you have to go through strings interface to modify it.
34 // This allows the string to ensure internal integrity and maintain
35 // its own string length. Unfortunately this is not possible because
36 // STRINGS are used as direct-manipulation data buffers for things
37 // like length arrays and many places cast away the const on string()
38 // to mutate the string. Turning this off means that internally we
39 // cannot assume we know the strlen.
40 #define STRING_IS_PROTECTED 0
41 
42 template <typename T> class GenericVector;
43 
45 {
46  public:
47  STRING();
48  STRING(const STRING &string);
49  STRING(const char *string);
50  STRING(const char *data, int length);
51  ~STRING ();
52 
53  // Writes to the given file. Returns false in case of error.
54  bool Serialize(FILE* fp) const;
55  // Reads from the given file. Returns false in case of error.
56  // If swap is true, assumes a big/little-endian swap is needed.
57  bool DeSerialize(bool swap, FILE* fp);
58  // Writes to the given file. Returns false in case of error.
59  bool Serialize(tesseract::TFile* fp) const;
60  // Reads from the given file. Returns false in case of error.
61  // If swap is true, assumes a big/little-endian swap is needed.
62  bool DeSerialize(bool swap, tesseract::TFile* fp);
63  // As DeSerialize, but only seeks past the data - hence a static method.
64  static bool SkipDeSerialize(bool swap, tesseract::TFile* fp);
65 
66  BOOL8 contains(const char c) const;
67  inT32 length() const;
68  inT32 size() const { return length(); }
69  const char *string() const;
70  const char *c_str() const;
71 
72  inline char* strdup() const {
73  inT32 len = length() + 1;
74  return strncpy(new char[len], GetCStr(), len);
75  }
76 
77 #if STRING_IS_PROTECTED
78  const char &operator[] (inT32 index) const;
79  // len is number of chars in s to insert starting at index in this string
80  void insert_range(inT32 index, const char*s, int len);
81  void erase_range(inT32 index, int len);
82 #else
83  char &operator[] (inT32 index) const;
84 #endif
85  void split(const char c, GenericVector<STRING> *splited);
86  void truncate_at(inT32 index);
87 
88  BOOL8 operator== (const STRING & string) const;
89  BOOL8 operator!= (const STRING & string) const;
90  BOOL8 operator!= (const char *string) const;
91 
92  STRING & operator= (const char *string);
93  STRING & operator= (const STRING & string);
94 
95  STRING operator+ (const STRING & string) const;
96  STRING operator+ (const char ch) const;
97 
98  STRING & operator+= (const char *string);
99  STRING & operator+= (const STRING & string);
100  STRING & operator+= (const char ch);
101 
102  // Assignment for strings which are not null-terminated.
103  void assign(const char *cstr, int len);
104 
105  // Appends the given string and int (as a %d) to this.
106  // += cannot be used for ints as there as a char += operator that would
107  // be ambiguous, and ints usually need a string before or between them
108  // anyway.
109  void add_str_int(const char* str, int number);
110  // Appends the given string and double (as a %.8g) to this.
111  void add_str_double(const char* str, double number);
112 
113  // ensure capacity but keep pointer encapsulated
114  inline void ensure(inT32 min_capacity) { ensure_cstr(min_capacity); }
115 
116  private:
117  typedef struct STRING_HEADER {
118  // How much space was allocated in the string buffer for char data.
119  int capacity_;
120 
121  // used_ is how much of the capacity is currently being used,
122  // including a '\0' terminator.
123  //
124  // If used_ is 0 then string is NULL (not even the '\0')
125  // else if used_ > 0 then it is strlen() + 1 (because it includes '\0')
126  // else strlen is >= 0 (not NULL) but needs to be computed.
127  // this condition is set when encapsulation is violated because
128  // an API returned a mutable string.
129  //
130  // capacity_ - used_ = excess capacity that the string can grow
131  // without reallocating
132  mutable int used_;
133  } STRING_HEADER;
134 
135  // To preserve the behavior of the old serialization, we only have space
136  // for one pointer in this structure. So we are embedding a data structure
137  // at the start of the storage that will hold additional state variables,
138  // then storing the actual string contents immediately after.
139  STRING_HEADER* data_;
140 
141  // returns the header part of the storage
142  inline STRING_HEADER* GetHeader() {
143  return data_;
144  }
145  inline const STRING_HEADER* GetHeader() const {
146  return data_;
147  }
148 
149  // returns the string data part of storage
150  inline char* GetCStr() { return ((char*)data_) + sizeof(STRING_HEADER); }
151 
152  inline const char* GetCStr() const {
153  return ((const char *)data_) + sizeof(STRING_HEADER);
154  }
155  inline bool InvariantOk() const {
156 #if STRING_IS_PROTECTED
157  return (GetHeader()->used_ == 0) ?
158  (string() == NULL) : (GetHeader()->used_ == (strlen(string()) + 1));
159 #else
160  return true;
161 #endif
162  }
163 
164  // Ensure string has requested capacity as optimization
165  // to avoid unnecessary reallocations.
166  // The return value is a cstr buffer with at least requested capacity
167  char* ensure_cstr(inT32 min_capacity);
168 
169  void FixHeader() const; // make used_ non-negative, even if const
170 
171  char* AllocData(int used, int capacity);
172  void DiscardData();
173 };
174 #endif
inT32 size() const
Definition: strngs.h:68
char * strdup() const
Definition: strngs.h:72
void ensure(inT32 min_capacity)
Definition: strngs.h:114
#define TESS_API
Definition: platform.h:81
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
Definition: ipoints.h:86
unsigned char BOOL8
Definition: host.h:46
ICOORD operator+(const ICOORD &op1, const ICOORD &op2)
Definition: ipoints.h:68
int inT32
Definition: host.h:35
Definition: strngs.h:44