tesseract  3.05.02
cube_utils.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: cube_utils.h
3  * Description: Declaration of the Cube Utilities Class
4  * Author: Ahmad Abdulkader
5  * Created: 2008
6  *
7  *(C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0(the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // The CubeUtils class provides miscellaneous utility and helper functions
21 // to the rest of the Cube Engine
22 
23 #ifndef CUBE_UTILS_H
24 #define CUBE_UTILS_H
25 
26 #include <vector>
27 #include <string>
28 
29 #include "allheaders.h"
30 #include "const.h"
31 #include "char_set.h"
32 #include "char_samp.h"
33 
34 namespace tesseract {
35 class CubeUtils {
36  public:
37  CubeUtils();
38  ~CubeUtils();
39 
40  // Converts a probability value to a cost by getting the -log() of the
41  // probability value to a known base
42  static int Prob2Cost(double prob_val);
43  // Converts a cost to probability by getting the exp(-normalized cost)
44  static double Cost2Prob(int cost);
45  // Computes the length of a 32-bit char buffer
46  static int StrLen(const char_32 *str);
47  // Compares two 32-bit char buffers
48  static int StrCmp(const char_32 *str1, const char_32 *str2);
49  // Duplicates a 32-bit char buffer
50  static char_32 *StrDup(const char_32 *str);
51  // Creates a CharSamp from an Pix and a bounding box
52  static CharSamp *CharSampleFromPix(Pix *pix,
53  int left, int top, int wid, int hgt);
54  // Creates a Pix from a CharSamp
55  static Pix *PixFromCharSample(CharSamp *char_samp);
56  // read the contents of a file to a string
57  static bool ReadFileToString(const string &file_name, string *str);
58  // split a string into vectors using any of the specified delimiters
59  static void SplitStringUsing(const string &str, const string &delims,
60  vector<string> *str_vec);
61  // UTF-8 to UTF-32 convesion functions
62  static void UTF8ToUTF32(const char *utf8_str, string_32 *str32);
63  static void UTF32ToUTF8(const char_32 *utf32_str, string *str);
64  // Returns true if input word has either 1) all-one-case, or 2)
65  // first character upper-case, and remaining characters lower-case.
66  // If char_set is not NULL, uses tesseract's unicharset functions
67  // to determine case properties. Otherwise, uses C-locale-dependent
68  // functions, which may be unreliable on non-ASCII characters.
69  static bool IsCaseInvariant(const char_32 *str32, CharSet *char_set);
70  // Returns char_32 pointer to the lower-case-transformed version of
71  // the input string or NULL on error. If char_set is NULL returns NULL.
72  // Return array must be freed by caller.
73  static char_32 *ToLower(const char_32 *str32, CharSet *char_set);
74  // Returns char_32 pointer to the upper-case-transformed version of
75  // the input string or NULL on error. If char_set is NULL returns NULL.
76  // Return array must be freed by caller.
77  static char_32 *ToUpper(const char_32 *str32, CharSet *char_set);
78  private:
79  static unsigned char *GetImageData(Pix *pix,
80  int left, int top, int wid, int hgt);
81 };
82 } // namespace tesseract
83 #endif // CUBE_UTILS_H
static char_32 * ToLower(const char_32 *str32, CharSet *char_set)
Definition: cube_utils.cpp:338
static bool ReadFileToString(const string &file_name, string *str)
Definition: cube_utils.cpp:189
static char_32 * ToUpper(const char_32 *str32, CharSet *char_set)
Definition: cube_utils.cpp:369
static double Cost2Prob(int cost)
Definition: cube_utils.cpp:47
static int Prob2Cost(double prob_val)
Definition: cube_utils.cpp:37
static void UTF32ToUTF8(const char_32 *utf32_str, string *str)
Definition: cube_utils.cpp:272
static int StrLen(const char_32 *str)
Definition: cube_utils.cpp:54
static void SplitStringUsing(const string &str, const string &delims, vector< string > *str_vec)
Definition: cube_utils.cpp:220
static bool IsCaseInvariant(const char_32 *str32, CharSet *char_set)
Definition: cube_utils.cpp:284
signed int char_32
Definition: string_32.h:40
basic_string< char_32 > string_32
Definition: string_32.h:41
static CharSamp * CharSampleFromPix(Pix *pix, int left, int top, int wid, int hgt)
Definition: cube_utils.cpp:101
static int StrCmp(const char_32 *str1, const char_32 *str2)
Definition: cube_utils.cpp:66
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32)
Definition: cube_utils.cpp:256
static Pix * PixFromCharSample(CharSamp *char_samp)
Definition: cube_utils.cpp:120
static char_32 * StrDup(const char_32 *str)
Definition: cube_utils.cpp:90