#include <cube_utils.h>
|
static int | Prob2Cost (double prob_val) |
|
static double | Cost2Prob (int cost) |
|
static int | StrLen (const char_32 *str) |
|
static int | StrCmp (const char_32 *str1, const char_32 *str2) |
|
static char_32 * | StrDup (const char_32 *str) |
|
static CharSamp * | CharSampleFromPix (Pix *pix, int left, int top, int wid, int hgt) |
|
static Pix * | PixFromCharSample (CharSamp *char_samp) |
|
static bool | ReadFileToString (const string &file_name, string *str) |
|
static void | SplitStringUsing (const string &str, const string &delims, vector< string > *str_vec) |
|
static void | UTF8ToUTF32 (const char *utf8_str, string_32 *str32) |
|
static void | UTF32ToUTF8 (const char_32 *utf32_str, string *str) |
|
static bool | IsCaseInvariant (const char_32 *str32, CharSet *char_set) |
|
static char_32 * | ToLower (const char_32 *str32, CharSet *char_set) |
|
static char_32 * | ToUpper (const char_32 *str32, CharSet *char_set) |
|
Definition at line 35 of file cube_utils.h.
◆ CubeUtils()
tesseract::CubeUtils::CubeUtils |
( |
| ) |
|
◆ ~CubeUtils()
tesseract::CubeUtils::~CubeUtils |
( |
| ) |
|
◆ CharSampleFromPix()
CharSamp * tesseract::CubeUtils::CharSampleFromPix |
( |
Pix * |
pix, |
|
|
int |
left, |
|
|
int |
top, |
|
|
int |
wid, |
|
|
int |
hgt |
|
) |
| |
|
static |
creates a char samp from a specified portion of the image
Definition at line 101 of file cube_utils.cpp.
104 unsigned char *temp_buff = GetImageData(pix, left, top, wid, hgt);
105 if (temp_buff == NULL) {
static CharSamp * FromRawData(int left, int top, int wid, int hgt, unsigned char *data)
◆ Cost2Prob()
double tesseract::CubeUtils::Cost2Prob |
( |
int |
cost | ) |
|
|
static |
◆ IsCaseInvariant()
bool tesseract::CubeUtils::IsCaseInvariant |
( |
const char_32 * |
str32, |
|
|
CharSet * |
char_set |
|
) |
| |
|
static |
Definition at line 284 of file cube_utils.cpp.
285 bool all_one_case =
true;
298 first_upper = isupper(str32[0]);
299 first_lower = islower(str32[0]);
302 prev_upper = first_upper;
303 prev_lower = first_lower;
304 for (
int c = 1; str32[c] != 0; ++c) {
305 cur_upper = isupper(str32[c]);
306 cur_lower = islower(str32[c]);
307 if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
308 all_one_case =
false;
311 prev_upper = cur_upper;
312 prev_lower = cur_lower;
315 UNICHARSET *unicharset = char_set->InternalUnicharset();
317 first_upper = unicharset->
get_isupper(char_set->ClassID(str32[0]));
318 first_lower = unicharset->
get_islower(char_set->ClassID(str32[0]));
321 prev_upper = first_upper;
322 prev_lower = first_lower;
324 for (
int c = 1; c <
StrLen(str32); ++c) {
325 cur_upper = unicharset->
get_isupper(char_set->ClassID(str32[c]));
326 cur_lower = unicharset->
get_islower(char_set->ClassID(str32[c]));
327 if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
328 all_one_case =
false;
331 prev_upper = cur_upper;
332 prev_lower = cur_lower;
335 return all_one_case || capitalized;
static int StrLen(const char_32 *str)
bool get_isupper(UNICHAR_ID unichar_id) const
bool get_islower(UNICHAR_ID unichar_id) const
◆ PixFromCharSample()
Pix * tesseract::CubeUtils::PixFromCharSample |
( |
CharSamp * |
char_samp | ) |
|
|
static |
create a B/W image from a char_sample
Definition at line 120 of file cube_utils.cpp.
122 if (char_samp == NULL) {
127 int stride = char_samp->Stride();
128 int wid = char_samp->Width();
129 int hgt = char_samp->Height();
131 Pix *pix = pixCreate(wid, hgt, 1);
137 unsigned char *line = char_samp->RawData();
138 for (
int y = 0; y < hgt ; y++, line += stride) {
139 for (
int x = 0; x < wid; x++) {
141 pixSetPixel(pix, x, y, 0);
143 pixSetPixel(pix, x, y, 255);
◆ Prob2Cost()
int tesseract::CubeUtils::Prob2Cost |
( |
double |
prob_val | ) |
|
|
static |
convert a prob to a cost (-ve log prob)
Definition at line 37 of file cube_utils.cpp.
◆ ReadFileToString()
bool tesseract::CubeUtils::ReadFileToString |
( |
const string & |
file_name, |
|
|
string * |
str |
|
) |
| |
|
static |
read file contents to a string
Definition at line 189 of file cube_utils.cpp.
191 FILE *fp = fopen(file_name.c_str(),
"rb");
197 fseek(fp, 0, SEEK_END);
198 int file_size = ftell(fp);
204 str->reserve(file_size);
207 char *buff =
new char[file_size];
208 int read_bytes = fread(buff, 1, static_cast<int>(file_size), fp);
209 if (read_bytes == file_size) {
210 str->append(buff, file_size);
214 return (read_bytes == file_size);
◆ SplitStringUsing()
void tesseract::CubeUtils::SplitStringUsing |
( |
const string & |
str, |
|
|
const string & |
delims, |
|
|
vector< string > * |
str_vec |
|
) |
| |
|
static |
splits a string into vectors based on specified delimiters
Definition at line 220 of file cube_utils.cpp.
224 if (delims[0] !=
'\0' && delims[1] ==
'\0') {
226 const char* p = str.data();
227 const char* end = p + str.size();
232 const char* start = p;
233 while (++p != end && *p != c);
234 str_vec->push_back(
string(start, p - start));
240 string::size_type begin_index, end_index;
241 begin_index = str.find_first_not_of(delims);
242 while (begin_index != string::npos) {
243 end_index = str.find_first_of(delims, begin_index);
244 if (end_index == string::npos) {
245 str_vec->push_back(str.substr(begin_index));
248 str_vec->push_back(str.substr(begin_index, (end_index - begin_index)));
249 begin_index = str.find_first_not_of(delims, end_index);
◆ StrCmp()
int tesseract::CubeUtils::StrCmp |
( |
const char_32 * |
str1, |
|
|
const char_32 * |
str2 |
|
) |
| |
|
static |
compares two char_32 strings
Definition at line 66 of file cube_utils.cpp.
70 for (; (*pch1) != 0 && (*pch2) != 0; pch1++, pch2++) {
71 if ((*pch1) != (*pch2)) {
72 return (*pch1) - (*pch2);
◆ StrDup()
Duplicates a 32-bit char buffer
Definition at line 90 of file cube_utils.cpp.
93 memcpy(new_str, str32, len *
sizeof(*str32));
static int StrLen(const char_32 *str)
◆ StrLen()
int tesseract::CubeUtils::StrLen |
( |
const char_32 * |
char_32_ptr | ) |
|
|
static |
computes the length of a NULL terminated char_32 string
Definition at line 54 of file cube_utils.cpp.
55 if (char_32_ptr == NULL) {
59 while (char_32_ptr[++len]);
◆ ToLower()
Definition at line 338 of file cube_utils.cpp.
342 UNICHARSET *unicharset = char_set->InternalUnicharset();
345 for (
int i = 0; i < len; ++i) {
347 if (ch == INVALID_UNICHAR_ID) {
352 if (unicharset->
get_isupper(char_set->ClassID(ch))) {
354 const char_32 *str32_lower = char_set->ClassString(uid_lower);
356 if (!str32_lower ||
StrLen(str32_lower) != 1) {
360 lower[i] = str32_lower[0];
UNICHAR_ID get_other_case(UNICHAR_ID unichar_id) const
static int StrLen(const char_32 *str)
bool get_isupper(UNICHAR_ID unichar_id) const
◆ ToUpper()
Definition at line 369 of file cube_utils.cpp.
373 UNICHARSET *unicharset = char_set->InternalUnicharset();
376 for (
int i = 0; i < len; ++i) {
378 if (ch == INVALID_UNICHAR_ID) {
383 if (unicharset->
get_islower(char_set->ClassID(ch))) {
385 const char_32 *str32_upper = char_set->ClassString(uid_upper);
387 if (!str32_upper ||
StrLen(str32_upper) != 1) {
391 upper[i] = str32_upper[0];
UNICHAR_ID get_other_case(UNICHAR_ID unichar_id) const
static int StrLen(const char_32 *str)
bool get_islower(UNICHAR_ID unichar_id) const
◆ UTF32ToUTF8()
void tesseract::CubeUtils::UTF32ToUTF8 |
( |
const char_32 * |
utf32_str, |
|
|
string * |
str |
|
) |
| |
|
static |
UTF-32 to UTF-8 conversion functions
Definition at line 272 of file cube_utils.cpp.
274 for (
const char_32 *ch_32 = utf32_str; (*ch_32) != 0; ch_32++) {
276 char *utf8 = uni_ch.utf8_str();
◆ UTF8ToUTF32()
void tesseract::CubeUtils::UTF8ToUTF32 |
( |
const char * |
utf8_str, |
|
|
string_32 * |
str32 |
|
) |
| |
|
static |
UTF-8 to UTF-32 conversion functions
Definition at line 256 of file cube_utils.cpp.
258 int len = strlen(utf8_str);
260 for (
int ch = 0; ch < len; ch += step) {
263 UNICHAR uni_ch(utf8_str + ch, step);
264 (*str32) += uni_ch.first_uni();
static int utf8_step(const char *utf8_str)
The documentation for this class was generated from the following files: