tesseract  3.05.02
char_samp_set.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: char_samp_enum.cpp
3  * Description: Implementation of a Character Sample Set Class
4  * Author: Ahmad Abdulkader
5  * Created: 2007
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include <stdlib.h>
21 #include <string>
22 #include "char_samp_set.h"
23 #include "cached_file.h"
24 
25 namespace tesseract {
26 
28  cnt_ = 0;
29  samp_buff_ = NULL;
30  own_samples_ = false;
31 }
32 
34  Cleanup();
35 }
36 
37 // free buffers and init vars
38 void CharSampSet::Cleanup() {
39  if (samp_buff_ != NULL) {
40  // only free samples if owned by class
41  if (own_samples_ == true) {
42  for (int samp_idx = 0; samp_idx < cnt_; samp_idx++) {
43  delete samp_buff_[samp_idx];
44  }
45  }
46  delete []samp_buff_;
47  }
48  cnt_ = 0;
49  samp_buff_ = NULL;
50 }
51 
52 // add a new sample
53 bool CharSampSet::Add(CharSamp *char_samp) {
54  if ((cnt_ % SAMP_ALLOC_BLOCK) == 0) {
55  // create an extended buffer
56  CharSamp **new_samp_buff =
57  reinterpret_cast<CharSamp **>(new CharSamp *[cnt_ + SAMP_ALLOC_BLOCK]);
58  // copy old contents
59  if (cnt_ > 0) {
60  memcpy(new_samp_buff, samp_buff_, cnt_ * sizeof(*samp_buff_));
61  delete []samp_buff_;
62  }
63  samp_buff_ = new_samp_buff;
64  }
65  samp_buff_[cnt_++] = char_samp;
66  return true;
67 }
68 
69 // load char samples from file
70 bool CharSampSet::LoadCharSamples(FILE *fp) {
71  // free existing
72  Cleanup();
73  // samples are created here and owned by the class
74  own_samples_ = true;
75  // start loading char samples
76  while (feof(fp) == 0) {
77  CharSamp *new_samp = CharSamp::FromCharDumpFile(fp);
78  if (new_samp != NULL) {
79  if (Add(new_samp) == false) {
80  return false;
81  }
82  }
83  }
84  return true;
85 }
86 
87 // creates a CharSampSet object from file
89  FILE *fp;
90  unsigned int val32;
91  // open the file
92  fp = fopen(file_name.c_str(), "rb");
93  if (fp == NULL) {
94  return NULL;
95  }
96  // read and verify marker
97  if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
98  fclose(fp);
99  return NULL;
100  }
101  if (val32 != 0xfefeabd0) {
102  fclose(fp);
103  return NULL;
104  }
105  // create an object
106  CharSampSet *samp_set = new CharSampSet();
107  if (samp_set->LoadCharSamples(fp) == false) {
108  delete samp_set;
109  samp_set = NULL;
110  }
111  fclose(fp);
112  return samp_set;
113 }
114 
115 // Create a new Char Dump file
116 FILE *CharSampSet::CreateCharDumpFile(string file_name) {
117  FILE *fp;
118  unsigned int val32;
119  // create the file
120  fp = fopen(file_name.c_str(), "wb");
121  if (!fp) {
122  return NULL;
123  }
124  // read and verify marker
125  val32 = 0xfefeabd0;
126  if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
127  fclose(fp);
128  return NULL;
129  }
130  return fp;
131 }
132 
133 // Enumerate the Samples in the set one-by-one calling the enumertor's
134  // EnumCharSamp method for each sample
135 bool CharSampSet::EnumSamples(string file_name, CharSampEnum *enum_obj) {
136  CachedFile *fp_in;
137  unsigned int val32;
138  long i64_size,
139  i64_pos;
140  // open the file
141  fp_in = new CachedFile(file_name);
142  i64_size = fp_in->Size();
143  if (i64_size < 1) {
144  return false;
145  }
146  // read and verify marker
147  if (fp_in->Read(&val32, sizeof(val32)) != sizeof(val32)) {
148  return false;
149  }
150  if (val32 != 0xfefeabd0) {
151  return false;
152  }
153  // start loading char samples
154  while (fp_in->eof() == false) {
155  CharSamp *new_samp = CharSamp::FromCharDumpFile(fp_in);
156  i64_pos = fp_in->Tell();
157  if (new_samp != NULL) {
158  bool ret_flag = (enum_obj)->EnumCharSamp(new_samp,
159  (100.0f * i64_pos / i64_size));
160  delete new_samp;
161  if (ret_flag == false) {
162  break;
163  }
164  }
165  }
166  delete fp_in;
167  return true;
168 }
169 
170 } // namespace ocrlib
static CharSampSet * FromCharDumpFile(string file_name)
static FILE * CreateCharDumpFile(string file_name)
static CharSamp * FromCharDumpFile(CachedFile *fp)
Definition: char_samp.cpp:82
#define SAMP_ALLOC_BLOCK
Definition: char_samp_set.h:38
static bool EnumSamples(string file_name, CharSampEnum *enumerator)
int Read(void *read_buff, int bytes)
Definition: cached_file.cpp:79
bool Add(CharSamp *char_samp)