30 #include "unicode/errorcode.h" 31 #include "unicode/normlzr.h" 32 #include "unicode/unistr.h" 33 #include "unicode/utypes.h" 37 static string EncodeAsUTF8(
const char32 ch32) {
39 return string(uni_ch.utf8(), uni_ch.utf8_len());
61 min_norm_length_(0), max_norm_length_(0) {}
68 string lig8 = EncodeAsUTF8(lig);
69 icu::UnicodeString unicode_lig8(static_cast<UChar32>(lig));
70 icu::UnicodeString normed8_result;
71 icu::ErrorCode status;
72 icu::Normalizer::normalize(unicode_lig8, UNORM_NFKC, 0, normed8_result,
75 normed8_result.toUTF8String(normed8);
80 int lig_length = lig8.length();
81 int norm_length = normed8.size();
82 if (normed8 != lig8 && lig_length > 1 && norm_length > 1) {
122 result += lig_it->second;
159 int len = str.size();
165 if (i + liglen <= len) {
166 string lig_cand = str.substr(i, liglen);
169 tlog(3,
"Considering %s -> %s\n", lig_cand.c_str(),
178 result += it->second;
179 tlog(2,
"Substituted %s -> %s\n", lig_cand.c_str(),
190 result += str.substr(i, len - i);
LigHash lig_to_norm_table_
bool CanRenderString(const char *utf8_word, int len, std::vector< string > *graphemes) const
static SmartPtr< LigatureTable > instance_
LigHash norm_to_lig_table_
string RemoveLigatures(const string &str) const
static const_iterator end(const char *utf8_str, const int byte_length)
string AddLigatures(const string &str, const PangoFontInfo *font) const
static const_iterator begin(const char *utf8_str, const int byte_length)
static TESS_API const char * kCustomLigatures[][2]
int get_utf8(char *buf) const
string RemoveCustomLigatures(const string &str) const
static LigatureTable * Get()