30 contextual_ = contextual;
34 for (
int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
47 if (!obj->Init(data_file_path,
lang)) {
54 bool WordSizeModel::Init(
const string &data_file_path,
const string &
lang) {
55 string stats_file_name;
56 stats_file_name = data_file_path +
lang;
57 stats_file_name +=
".cube.size";
67 vector<string> tokens;
69 if (tokens.size() < 1) {
70 fprintf(stderr,
"Cube ERROR (WordSizeModel::Init): invalid " 71 "file contents: %s\n", stats_file_name.c_str());
75 font_pair_size_models_.clear();
78 int token_cnt = contextual_ ?
79 (kExpectedTokenCount + 4) : kExpectedTokenCount;
84 int size_class_cnt = contextual_ ?
88 for (
int tok = 0; tok < tokens.size(); tok += token_cnt) {
90 if (tok == 0 || fnt_name != tokens[tok]) {
91 FontPairSizeInfo fnt_info;
93 fnt_info.pair_size_info =
new PairSizeInfo *[size_class_cnt];
95 fnt_info.pair_size_info[0] =
96 new PairSizeInfo[size_class_cnt * size_class_cnt];
98 memset(fnt_info.pair_size_info[0], 0, size_class_cnt * size_class_cnt *
99 sizeof(PairSizeInfo));
101 for (
int cls = 1; cls < size_class_cnt; cls++) {
102 fnt_info.pair_size_info[cls] =
103 fnt_info.pair_size_info[cls - 1] + size_class_cnt;
107 string stripped_font_name = tokens[tok].substr(0, tokens[tok].find(
'.'));
108 string::size_type strt_pos = stripped_font_name.find_last_of(
"/\\");
109 if (strt_pos != string::npos) {
110 fnt_info.font_name = stripped_font_name.substr(strt_pos);
112 fnt_info.font_name = stripped_font_name;
114 font_pair_size_models_.push_back(fnt_info);
141 if (sscanf(tokens[tok + 1].c_str(),
"%d", &cls_0) != 1 ||
142 sscanf(tokens[tok + 2].c_str(),
"%d", &start_0) != 1 ||
143 sscanf(tokens[tok + 3].c_str(),
"%d", &end_0) != 1 ||
144 sscanf(tokens[tok + 5].c_str(),
"%lf", &wid_0) != 1 ||
145 sscanf(tokens[tok + 6].c_str(),
"%lf", &hgt_0) != 1 ||
146 sscanf(tokens[tok + 7].c_str(),
"%d", &cls_1) != 1 ||
147 sscanf(tokens[tok + 8].c_str(),
"%d", &start_1) != 1 ||
148 sscanf(tokens[tok + 9].c_str(),
"%d", &end_1) != 1 ||
149 sscanf(tokens[tok + 11].c_str(),
"%lf", &delta_top) != 1 ||
150 sscanf(tokens[tok + 12].c_str(),
"%lf", &wid_1) != 1 ||
151 sscanf(tokens[tok + 13].c_str(),
"%lf", &hgt_1) != 1 ||
152 (start_0 != 0 && start_0 != 1) || (end_0 != 0 && end_0 != 1) ||
153 (start_1 != 0 && start_1 != 1) || (end_1 != 0 && end_1 != 1)) {
154 fprintf(stderr,
"Cube ERROR (WordSizeModel::Init): bad format at " 155 "line %d\n", 1 + (tok / token_cnt));
158 size_code_0 =
SizeCode(cls_0, start_0, end_0);
159 size_code_1 =
SizeCode(cls_1, start_1, end_1);
161 if (sscanf(tokens[tok + 1].c_str(),
"%d", &cls_0) != 1 ||
162 sscanf(tokens[tok + 3].c_str(),
"%lf", &wid_0) != 1 ||
163 sscanf(tokens[tok + 4].c_str(),
"%lf", &hgt_0) != 1 ||
164 sscanf(tokens[tok + 5].c_str(),
"%d", &cls_1) != 1 ||
165 sscanf(tokens[tok + 7].c_str(),
"%lf", &delta_top) != 1 ||
166 sscanf(tokens[tok + 8].c_str(),
"%lf", &wid_1) != 1 ||
167 sscanf(tokens[tok + 9].c_str(),
"%lf", &hgt_1) != 1) {
168 fprintf(stderr,
"Cube ERROR (WordSizeModel::Init): bad format at " 169 "line %d\n", 1 + (tok / token_cnt));
177 FontPairSizeInfo fnt_info = font_pair_size_models_.back();
178 fnt_info.pair_size_info[size_code_0][size_code_1].delta_top =
179 static_cast<int>(delta_top * kShapeModelScale);
180 fnt_info.pair_size_info[size_code_0][size_code_1].wid_0 =
181 static_cast<int>(wid_0 * kShapeModelScale);
182 fnt_info.pair_size_info[size_code_0][size_code_1].hgt_0 =
183 static_cast<int>(hgt_0 * kShapeModelScale);
184 fnt_info.pair_size_info[size_code_0][size_code_1].wid_1 =
185 static_cast<int>(wid_1 * kShapeModelScale);
186 fnt_info.pair_size_info[size_code_0][size_code_1].hgt_1 =
187 static_cast<int>(hgt_1 * kShapeModelScale);
189 fnt_name = tokens[tok];
199 double best_dist =
static_cast<double>(
WORST_COST);
201 for (
int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
203 double mean_dist = 0;
206 for (
int smp_0 = 0; smp_0 < samp_cnt; smp_0++) {
207 int cls_0 = char_set_->
ClassID(samp_array[smp_0]->StrLabel());
215 samp_array[smp_0]->FirstChar() == 0 ? 0 : 1,
216 samp_array[smp_0]->LastChar() == 0 ? 0 : 1);
221 int char0_height = samp_array[smp_0]->
Height();
222 int char0_width = samp_array[smp_0]->
Width();
223 int char0_top = samp_array[smp_0]->
Top();
225 for (
int smp_1 = smp_0 + 1; smp_1 < samp_cnt; smp_1++) {
226 int cls_1 = char_set_->
ClassID(samp_array[smp_1]->StrLabel());
234 samp_array[smp_1]->FirstChar() == 0 ? 0 : 1,
235 samp_array[smp_1]->LastChar() == 0 ? 0 : 1);
240 char0_width, char0_height, char0_top, samp_array[smp_1]->Width(),
241 samp_array[smp_1]->Height(), samp_array[smp_1]->Top(),
252 mean_dist /= pair_cnt;
253 if (best_fnt == -1 || mean_dist < best_dist) {
254 best_dist = mean_dist;
258 if (best_fnt == -1) {
261 return static_cast<int>(best_dist);
266 int width_1,
int height_1,
int top_1,
268 double scale_factor =
static_cast<double>(pair_info.
hgt_0) /
269 static_cast<double>(height_0);
271 if (scale_factor > 0) {
272 double norm_width_0 = width_0 * scale_factor;
273 double norm_width_1 = width_1 * scale_factor;
274 double norm_height_1 = height_1 * scale_factor;
275 double norm_delta_top = (top_1 - top_0) * scale_factor;
279 dist += fabs(pair_info.
wid_0 - norm_width_0);
280 dist += fabs(pair_info.
wid_1 - norm_width_1);
281 dist += fabs(pair_info.
hgt_1 - norm_height_1);
282 dist += fabs(pair_info.
delta_top - norm_delta_top);
unsigned short Height() const
static int SizeCode(int cls_id, int start, int end)
static double PairCost(int width_0, int height_0, int top_0, int width_1, int height_1, int top_1, const PairSizeInfo &pair_info)
unsigned short Width() const
static bool ReadFileToString(const string &file_name, string *str)
PairSizeInfo ** pair_size_info
unsigned short Top() const
int Cost(CharSamp **samp_array, int samp_cnt) const
WordSizeModel(CharSet *, bool contextual)
static void SplitStringUsing(const string &str, const string &delims, vector< string > *str_vec)
static WordSizeModel * Create(const string &data_file_path, const string &lang, CharSet *char_set, bool contextual)
int ClassID(const char_32 *str) const