54 file_name = data_file_path +
lang;
55 file_name +=
".cube.bigrams";
64 CharBigramTable *table = &char_bigrams_obj->bigram_table_;
68 table->char_bigram = NULL;
71 vector<string> str_vec;
74 for (
int big = 0; big < str_vec.size(); big++) {
78 if (sscanf(str_vec[big].c_str(),
"%d %x %x", &cnt, &ch1, &ch2) != 3) {
79 fprintf(stderr,
"Cube ERROR (CharBigrams::Create): invalid format " 80 "reading line: %s\n", str_vec[big].c_str());
81 delete char_bigrams_obj;
86 if (ch1 > table->max_char) {
87 CharBigram *char_bigram =
new CharBigram[ch1 + 1];
89 if (table->char_bigram != NULL && table->max_char >= 0) {
90 memcpy(char_bigram, table->char_bigram,
91 (table->max_char + 1) *
sizeof(*char_bigram));
93 delete []table->char_bigram;
95 table->char_bigram = char_bigram;
98 for (
int new_big = table->max_char + 1; new_big <= ch1; new_big++) {
99 table->char_bigram[new_big].total_cnt = 0;
100 table->char_bigram[new_big].max_char = -1;
101 table->char_bigram[new_big].bigram = NULL;
103 table->max_char = ch1;
106 if (ch2 > table->char_bigram[ch1].max_char) {
107 Bigram *bigram =
new Bigram[ch2 + 1];
109 if (table->char_bigram[ch1].bigram != NULL &&
110 table->char_bigram[ch1].max_char >= 0) {
111 memcpy(bigram, table->char_bigram[ch1].bigram,
112 (table->char_bigram[ch1].max_char + 1) *
sizeof(*bigram));
113 delete []table->char_bigram[ch1].bigram;
115 table->char_bigram[ch1].bigram = bigram;
118 for (
int new_big = table->char_bigram[ch1].max_char + 1;
119 new_big <= ch2; new_big++) {
120 table->char_bigram[ch1].bigram[new_big].cnt = 0;
122 table->char_bigram[ch1].max_char = ch2;
125 table->char_bigram[ch1].bigram[ch2].cnt = cnt;
126 table->char_bigram[ch1].total_cnt += cnt;
127 table->total_cnt += cnt;
131 table->worst_cost =
static_cast<int>(
133 for (
char_32 ch1 = 0; ch1 <= table->max_char; ch1++) {
134 for (
char_32 ch2 = 0; ch2 <= table->char_bigram[ch1].max_char; ch2++) {
135 int cnt = table->char_bigram[ch1].bigram[ch2].cnt;
136 table->char_bigram[ch1].bigram[ch2].cost =
138 log(
MAX(0.5, static_cast<double>(cnt)) /
142 return char_bigrams_obj;
static bool ReadFileToString(const string &file_name, string *str)
static void SplitStringUsing(const string &str, const string &delims, vector< string > *str_vec)