30 #define HOTELLING 1 // If true use Hotelling's test to decide where to split. 31 #define FTABLE_X 10 // Size of FTable. 32 #define FTABLE_Y 100 // Size of FTable. 36 {4052.19, 4999.52, 5403.34, 5624.62, 5763.65, 5858.97, 5928.33, 5981.10, 6022.50, 6055.85,},
37 {98.502, 99.000, 99.166, 99.249, 99.300, 99.333, 99.356, 99.374, 99.388, 99.399,},
38 {34.116, 30.816, 29.457, 28.710, 28.237, 27.911, 27.672, 27.489, 27.345, 27.229,},
39 {21.198, 18.000, 16.694, 15.977, 15.522, 15.207, 14.976, 14.799, 14.659, 14.546,},
40 {16.258, 13.274, 12.060, 11.392, 10.967, 10.672, 10.456, 10.289, 10.158, 10.051,},
41 {13.745, 10.925, 9.780, 9.148, 8.746, 8.466, 8.260, 8.102, 7.976, 7.874,},
42 {12.246, 9.547, 8.451, 7.847, 7.460, 7.191, 6.993, 6.840, 6.719, 6.620,},
43 {11.259, 8.649, 7.591, 7.006, 6.632, 6.371, 6.178, 6.029, 5.911, 5.814,},
44 {10.561, 8.022, 6.992, 6.422, 6.057, 5.802, 5.613, 5.467, 5.351, 5.257,},
45 {10.044, 7.559, 6.552, 5.994, 5.636, 5.386, 5.200, 5.057, 4.942, 4.849,},
46 { 9.646, 7.206, 6.217, 5.668, 5.316, 5.069, 4.886, 4.744, 4.632, 4.539,},
47 { 9.330, 6.927, 5.953, 5.412, 5.064, 4.821, 4.640, 4.499, 4.388, 4.296,},
48 { 9.074, 6.701, 5.739, 5.205, 4.862, 4.620, 4.441, 4.302, 4.191, 4.100,},
49 { 8.862, 6.515, 5.564, 5.035, 4.695, 4.456, 4.278, 4.140, 4.030, 3.939,},
50 { 8.683, 6.359, 5.417, 4.893, 4.556, 4.318, 4.142, 4.004, 3.895, 3.805,},
51 { 8.531, 6.226, 5.292, 4.773, 4.437, 4.202, 4.026, 3.890, 3.780, 3.691,},
52 { 8.400, 6.112, 5.185, 4.669, 4.336, 4.102, 3.927, 3.791, 3.682, 3.593,},
53 { 8.285, 6.013, 5.092, 4.579, 4.248, 4.015, 3.841, 3.705, 3.597, 3.508,},
54 { 8.185, 5.926, 5.010, 4.500, 4.171, 3.939, 3.765, 3.631, 3.523, 3.434,},
55 { 8.096, 5.849, 4.938, 4.431, 4.103, 3.871, 3.699, 3.564, 3.457, 3.368,},
56 { 8.017, 5.780, 4.874, 4.369, 4.042, 3.812, 3.640, 3.506, 3.398, 3.310,},
57 { 7.945, 5.719, 4.817, 4.313, 3.988, 3.758, 3.587, 3.453, 3.346, 3.258,},
58 { 7.881, 5.664, 4.765, 4.264, 3.939, 3.710, 3.539, 3.406, 3.299, 3.211,},
59 { 7.823, 5.614, 4.718, 4.218, 3.895, 3.667, 3.496, 3.363, 3.256, 3.168,},
60 { 7.770, 5.568, 4.675, 4.177, 3.855, 3.627, 3.457, 3.324, 3.217, 3.129,},
61 { 7.721, 5.526, 4.637, 4.140, 3.818, 3.591, 3.421, 3.288, 3.182, 3.094,},
62 { 7.677, 5.488, 4.601, 4.106, 3.785, 3.558, 3.388, 3.256, 3.149, 3.062,},
63 { 7.636, 5.453, 4.568, 4.074, 3.754, 3.528, 3.358, 3.226, 3.120, 3.032,},
64 { 7.598, 5.420, 4.538, 4.045, 3.725, 3.499, 3.330, 3.198, 3.092, 3.005,},
65 { 7.562, 5.390, 4.510, 4.018, 3.699, 3.473, 3.305, 3.173, 3.067, 2.979,},
66 { 7.530, 5.362, 4.484, 3.993, 3.675, 3.449, 3.281, 3.149, 3.043, 2.955,},
67 { 7.499, 5.336, 4.459, 3.969, 3.652, 3.427, 3.258, 3.127, 3.021, 2.934,},
68 { 7.471, 5.312, 4.437, 3.948, 3.630, 3.406, 3.238, 3.106, 3.000, 2.913,},
69 { 7.444, 5.289, 4.416, 3.927, 3.611, 3.386, 3.218, 3.087, 2.981, 2.894,},
70 { 7.419, 5.268, 4.396, 3.908, 3.592, 3.368, 3.200, 3.069, 2.963, 2.876,},
71 { 7.396, 5.248, 4.377, 3.890, 3.574, 3.351, 3.183, 3.052, 2.946, 2.859,},
72 { 7.373, 5.229, 4.360, 3.873, 3.558, 3.334, 3.167, 3.036, 2.930, 2.843,},
73 { 7.353, 5.211, 4.343, 3.858, 3.542, 3.319, 3.152, 3.021, 2.915, 2.828,},
74 { 7.333, 5.194, 4.327, 3.843, 3.528, 3.305, 3.137, 3.006, 2.901, 2.814,},
75 { 7.314, 5.179, 4.313, 3.828, 3.514, 3.291, 3.124, 2.993, 2.888, 2.801,},
76 { 7.296, 5.163, 4.299, 3.815, 3.501, 3.278, 3.111, 2.980, 2.875, 2.788,},
77 { 7.280, 5.149, 4.285, 3.802, 3.488, 3.266, 3.099, 2.968, 2.863, 2.776,},
78 { 7.264, 5.136, 4.273, 3.790, 3.476, 3.254, 3.087, 2.957, 2.851, 2.764,},
79 { 7.248, 5.123, 4.261, 3.778, 3.465, 3.243, 3.076, 2.946, 2.840, 2.754,},
80 { 7.234, 5.110, 4.249, 3.767, 3.454, 3.232, 3.066, 2.935, 2.830, 2.743,},
81 { 7.220, 5.099, 4.238, 3.757, 3.444, 3.222, 3.056, 2.925, 2.820, 2.733,},
82 { 7.207, 5.087, 4.228, 3.747, 3.434, 3.213, 3.046, 2.916, 2.811, 2.724,},
83 { 7.194, 5.077, 4.218, 3.737, 3.425, 3.204, 3.037, 2.907, 2.802, 2.715,},
84 { 7.182, 5.066, 4.208, 3.728, 3.416, 3.195, 3.028, 2.898, 2.793, 2.706,},
85 { 7.171, 5.057, 4.199, 3.720, 3.408, 3.186, 3.020, 2.890, 2.785, 2.698,},
86 { 7.159, 5.047, 4.191, 3.711, 3.400, 3.178, 3.012, 2.882, 2.777, 2.690,},
87 { 7.149, 5.038, 4.182, 3.703, 3.392, 3.171, 3.005, 2.874, 2.769, 2.683,},
88 { 7.139, 5.030, 4.174, 3.695, 3.384, 3.163, 2.997, 2.867, 2.762, 2.675,},
89 { 7.129, 5.021, 4.167, 3.688, 3.377, 3.156, 2.990, 2.860, 2.755, 2.668,},
90 { 7.119, 5.013, 4.159, 3.681, 3.370, 3.149, 2.983, 2.853, 2.748, 2.662,},
91 { 7.110, 5.006, 4.152, 3.674, 3.363, 3.143, 2.977, 2.847, 2.742, 2.655,},
92 { 7.102, 4.998, 4.145, 3.667, 3.357, 3.136, 2.971, 2.841, 2.736, 2.649,},
93 { 7.093, 4.991, 4.138, 3.661, 3.351, 3.130, 2.965, 2.835, 2.730, 2.643,},
94 { 7.085, 4.984, 4.132, 3.655, 3.345, 3.124, 2.959, 2.829, 2.724, 2.637,},
95 { 7.077, 4.977, 4.126, 3.649, 3.339, 3.119, 2.953, 2.823, 2.718, 2.632,},
96 { 7.070, 4.971, 4.120, 3.643, 3.333, 3.113, 2.948, 2.818, 2.713, 2.626,},
97 { 7.062, 4.965, 4.114, 3.638, 3.328, 3.108, 2.942, 2.813, 2.708, 2.621,},
98 { 7.055, 4.959, 4.109, 3.632, 3.323, 3.103, 2.937, 2.808, 2.703, 2.616,},
99 { 7.048, 4.953, 4.103, 3.627, 3.318, 3.098, 2.932, 2.803, 2.698, 2.611,},
100 { 7.042, 4.947, 4.098, 3.622, 3.313, 3.093, 2.928, 2.798, 2.693, 2.607,},
101 { 7.035, 4.942, 4.093, 3.618, 3.308, 3.088, 2.923, 2.793, 2.689, 2.602,},
102 { 7.029, 4.937, 4.088, 3.613, 3.304, 3.084, 2.919, 2.789, 2.684, 2.598,},
103 { 7.023, 4.932, 4.083, 3.608, 3.299, 3.080, 2.914, 2.785, 2.680, 2.593,},
104 { 7.017, 4.927, 4.079, 3.604, 3.295, 3.075, 2.910, 2.781, 2.676, 2.589,},
105 { 7.011, 4.922, 4.074, 3.600, 3.291, 3.071, 2.906, 2.777, 2.672, 2.585,},
106 { 7.006, 4.917, 4.070, 3.596, 3.287, 3.067, 2.902, 2.773, 2.668, 2.581,},
107 { 7.001, 4.913, 4.066, 3.591, 3.283, 3.063, 2.898, 2.769, 2.664, 2.578,},
108 { 6.995, 4.908, 4.062, 3.588, 3.279, 3.060, 2.895, 2.765, 2.660, 2.574,},
109 { 6.990, 4.904, 4.058, 3.584, 3.275, 3.056, 2.891, 2.762, 2.657, 2.570,},
110 { 6.985, 4.900, 4.054, 3.580, 3.272, 3.052, 2.887, 2.758, 2.653, 2.567,},
111 { 6.981, 4.896, 4.050, 3.577, 3.268, 3.049, 2.884, 2.755, 2.650, 2.563,},
112 { 6.976, 4.892, 4.047, 3.573, 3.265, 3.046, 2.881, 2.751, 2.647, 2.560,},
113 { 6.971, 4.888, 4.043, 3.570, 3.261, 3.042, 2.877, 2.748, 2.644, 2.557,},
114 { 6.967, 4.884, 4.040, 3.566, 3.258, 3.039, 2.874, 2.745, 2.640, 2.554,},
115 { 6.963, 4.881, 4.036, 3.563, 3.255, 3.036, 2.871, 2.742, 2.637, 2.551,},
116 { 6.958, 4.877, 4.033, 3.560, 3.252, 3.033, 2.868, 2.739, 2.634, 2.548,},
117 { 6.954, 4.874, 4.030, 3.557, 3.249, 3.030, 2.865, 2.736, 2.632, 2.545,},
118 { 6.950, 4.870, 4.027, 3.554, 3.246, 3.027, 2.863, 2.733, 2.629, 2.542,},
119 { 6.947, 4.867, 4.024, 3.551, 3.243, 3.025, 2.860, 2.731, 2.626, 2.539,},
120 { 6.943, 4.864, 4.021, 3.548, 3.240, 3.022, 2.857, 2.728, 2.623, 2.537,},
121 { 6.939, 4.861, 4.018, 3.545, 3.238, 3.019, 2.854, 2.725, 2.621, 2.534,},
122 { 6.935, 4.858, 4.015, 3.543, 3.235, 3.017, 2.852, 2.723, 2.618, 2.532,},
123 { 6.932, 4.855, 4.012, 3.540, 3.233, 3.014, 2.849, 2.720, 2.616, 2.529,},
124 { 6.928, 4.852, 4.010, 3.538, 3.230, 3.012, 2.847, 2.718, 2.613, 2.527,},
125 { 6.925, 4.849, 4.007, 3.535, 3.228, 3.009, 2.845, 2.715, 2.611, 2.524,},
126 { 6.922, 4.846, 4.004, 3.533, 3.225, 3.007, 2.842, 2.713, 2.609, 2.522,},
127 { 6.919, 4.844, 4.002, 3.530, 3.223, 3.004, 2.840, 2.711, 2.606, 2.520,},
128 { 6.915, 4.841, 3.999, 3.528, 3.221, 3.002, 2.838, 2.709, 2.604, 2.518,},
129 { 6.912, 4.838, 3.997, 3.525, 3.218, 3.000, 2.835, 2.706, 2.602, 2.515,},
130 { 6.909, 4.836, 3.995, 3.523, 3.216, 2.998, 2.833, 2.704, 2.600, 2.513,},
131 { 6.906, 4.833, 3.992, 3.521, 3.214, 2.996, 2.831, 2.702, 2.598, 2.511,},
132 { 6.904, 4.831, 3.990, 3.519, 3.212, 2.994, 2.829, 2.700, 2.596, 2.509,},
133 { 6.901, 4.829, 3.988, 3.517, 3.210, 2.992, 2.827, 2.698, 2.594, 2.507,},
134 { 6.898, 4.826, 3.986, 3.515, 3.208, 2.990, 2.825, 2.696, 2.592, 2.505,},
135 { 6.895, 4.824, 3.984, 3.513, 3.206, 2.988, 2.823, 2.694, 2.590, 2.503}
142 #define MINVARIANCE 0.0004 150 #define MINSAMPLESPERBUCKET 5 151 #define MINSAMPLES (MINBUCKETS * MINSAMPLESPERBUCKET) 152 #define MINSAMPLESNEEDED 1 160 #define BUCKETTABLESIZE 1024 161 #define NORMALEXTENT 3.0 206 #define Odd(N) ((N)%2) 207 #define Mirror(N,R) ((R) - (N) - 1) 208 #define Abs(N) ( ( (N) < 0 ) ? ( -(N) ) : (N) ) 218 #define SqrtOf2Pi 2.506628275 220 static const FLOAT64 kNormalVariance =
222 static const FLOAT64 kNormalMagnitude =
228 #define LOOKUPTABLESIZE 8 229 #define MAXDEGREESOFFREEDOM MAXBUCKETS 232 MINSAMPLES, 200, 400, 600, 800, 1000, 1500, 2000
376 void *FunctionParams,
386 double InvertMatrix(
const float* input,
int size,
float* inv);
411 Clusterer->
Root = NULL;
417 for (i = 0; i < SampleSize; i++) {
425 (ParamDesc[i].
Max + ParamDesc[i].
Min) / 2;
462 if (Clusterer->
Root != NULL)
464 "Can't add samples after they have been clustered");
474 Sample->
Right = NULL;
478 Sample->
Mean[i] = Feature[i];
483 if (CharID >= Clusterer->
NumChar)
484 Clusterer->
NumChar = CharID + 1;
515 if (Clusterer->
Root == NULL)
548 if (Clusterer != NULL) {
550 if (Clusterer->
KDTree != NULL)
552 if (Clusterer->
Root != NULL)
592 if (Prototype->
Cluster != NULL)
596 if (Prototype->
Distrib != NULL)
598 if (Prototype->
Mean != NULL)
632 *SearchState =
pop (*SearchState);
634 if (Cluster->
Left == NULL)
636 *SearchState =
push (*SearchState, Cluster->
Right);
637 Cluster = Cluster->
Left;
651 return (Proto->
Mean[Dimension]);
664 switch (Proto->
Style) {
671 switch (Proto->
Distrib[Dimension]) {
719 while (context.
heap->
Pop(&HeapEntry)) {
720 PotentialCluster = HeapEntry.
data;
734 if (PotentialCluster->
Neighbor != NULL) {
746 if (PotentialCluster->
Neighbor != NULL) {
774 int next = context->
next;
805 #define MAXNEIGHBORS 2 806 #define MAXDISTANCE MAX_FLOAT32 810 int NumberOfNeighbors;
816 &NumberOfNeighbors, (
void **)Neighbor, Dist);
821 for (i = 0; i < NumberOfNeighbors; i++) {
822 if ((Dist[i] < *Distance) && (Neighbor[i] != Cluster)) {
824 BestNeighbor = Neighbor[i];
895 for (i = N; i > 0; i--, ParamDesc++, m++, m1++, m2++) {
896 if (ParamDesc->Circular) {
900 if ((*m2 - *m1) > ParamDesc->HalfRange) {
901 *m = (n1 * *m1 + n2 * (*m2 - ParamDesc->Range)) / n;
902 if (*m < ParamDesc->Min)
903 *m += ParamDesc->Range;
905 else if ((*m1 - *m2) > ParamDesc->HalfRange) {
906 *m = (n1 * (*m1 - ParamDesc->Range) + n2 * *m2) / n;
907 if (*m < ParamDesc->Min)
908 *m += ParamDesc->Range;
911 *m = (n1 * *m1 + n2 * *m2) / n;
914 *m = (n1 * *m1 + n2 * *m2) / n;
937 if (Clusterer->
Root != NULL)
946 ClusterStack =
pop (ClusterStack);
948 if (Prototype != NULL) {
952 ClusterStack =
push (ClusterStack, Cluster->
Right);
953 ClusterStack =
push (ClusterStack, Cluster->
Left);
1009 if (Proto != NULL) {
1120 const double kMagicSampleMargin = 0.0625;
1121 const double kFTableBoostMargin = 2.0;
1126 if (Left == NULL || Right == NULL)
1129 if (TotalDims < N + 1 || TotalDims < 2)
1131 const int kMatrixSize = N * N *
sizeof(
FLOAT32);
1136 for (
int i = 0; i < N; ++i) {
1137 int row_offset = i * N;
1139 for (
int j = 0; j < N; ++j) {
1141 Covariance[j + row_offset] = Statistics->
CoVariance[j + row_offset];
1143 Covariance[j + row_offset] = 0.0f;
1146 for (
int j = 0; j < N; ++j) {
1148 Covariance[j + row_offset] = 1.0f;
1150 Covariance[j + row_offset] = 0.0f;
1156 tprintf(
"Clustering error: Matrix inverse failed with error %g\n", err);
1159 for (
int dim = 0; dim < N; ++dim) {
1161 Delta[dim] = Left->
Mean[dim] - Right->
Mean[dim];
1169 for (
int x = 0; x < N; ++x) {
1171 for (
int y = 0; y < N; ++y) {
1172 temp += Inverse[y + N*x] * Delta[y];
1174 Tsq += Delta[x] * temp;
1183 double F = Tsq * (TotalDims - EssentialN - 1) / ((TotalDims - 2)*EssentialN);
1184 int Fx = EssentialN;
1188 int Fy = TotalDims - EssentialN - 1;
1192 double FTarget =
FTable[Fy][Fx];
1197 FTarget += kFTableBoostMargin;
1226 for (i = 0; i < Clusterer->
SampleSize; i++) {
1263 for (i = 0; i < Clusterer->
SampleSize; i++) {
1270 CoVariance[i * (Clusterer->
SampleSize + 1)]));
1304 BUCKETS *UniformBuckets = NULL;
1305 BUCKETS *RandomBuckets = NULL;
1311 for (i = 0; i < Clusterer->
SampleSize; i++) {
1321 if (RandomBuckets == NULL)
1330 if (UniformBuckets == NULL)
1341 if (i < Clusterer->SampleSize) {
1385 (Statistics->
Min[i] + Statistics->
Max[i]) / 2;
1387 (Statistics->
Max[i] - Statistics->
Min[i]) / 2;
1425 uinT32 SampleCountAdjustedForBias;
1439 for (i = 0; i < N; i++) {
1440 Statistics->
Min[i] = 0.0;
1441 Statistics->
Max[i] = 0.0;
1442 for (j = 0; j < N; j++, CoVariance++)
1447 while ((Sample =
NextSample (&SearchState)) != NULL) {
1448 for (i = 0; i < N; i++) {
1449 Distance[i] = Sample->
Mean[i] - Cluster->
Mean[i];
1450 if (ParamDesc[i].Circular) {
1451 if (Distance[i] > ParamDesc[i].HalfRange)
1452 Distance[i] -= ParamDesc[i].
Range;
1453 if (Distance[i] < -ParamDesc[i].HalfRange)
1454 Distance[i] += ParamDesc[i].
Range;
1456 if (Distance[i] < Statistics->
Min[i])
1457 Statistics->
Min[i] = Distance[i];
1458 if (Distance[i] > Statistics->
Max[i])
1459 Statistics->
Max[i] = Distance[i];
1462 for (i = 0; i < N; i++)
1463 for (j = 0; j < N; j++, CoVariance++)
1464 *CoVariance += Distance[i] * Distance[j];
1471 SampleCountAdjustedForBias = Cluster->
SampleCount - 1;
1473 SampleCountAdjustedForBias = 1;
1475 for (i = 0; i < N; i++)
1476 for (j = 0; j < N; j++, CoVariance++) {
1477 *CoVariance /= SampleCountAdjustedForBias;
1489 return (Statistics);
1552 for (i = 0; i < N; i++, CoVariance += N + 1) {
1589 for (i = 0; i < N; i++) {
1613 for (i = 0; i < N; i++)
1655 for (i = 0; i < N; i++, VARii += N + 1) {
1656 if (ParamDesc[i].NonEssential)
1659 VARjj = VARii + N + 1;
1660 CoVariance = VARii + 1;
1661 for (j = i + 1; j < N; j++, CoVariance++, VARjj += N + 1) {
1662 if (ParamDesc[j].NonEssential)
1665 if ((*VARii == 0.0) || (*VARjj == 0.0))
1666 CorrelationCoeff = 0.0;
1669 sqrt (sqrt (*CoVariance * *CoVariance / (*VARii * *VARjj)));
1670 if (CorrelationCoeff > Independence)
1704 if (Buckets == NULL) {
1705 Buckets =
MakeBuckets(Distribution, SampleCount, Confidence);
1770 Buckets->
Count[i] = 0;
1786 NextBucketBoundary = BucketProbability / 2;
1788 NextBucketBoundary = BucketProbability;
1794 ProbDensity = (*DensityFunction[(int) Distribution]) (i + 1);
1795 ProbabilityDelta =
Integral (LastProbDensity, ProbDensity, 1.0);
1796 Probability += ProbabilityDelta;
1797 if (Probability > NextBucketBoundary) {
1798 if (CurrentBucket < Buckets->NumberOfBuckets - 1)
1800 NextBucketBoundary += BucketProbability;
1802 Buckets->
Bucket[i] = CurrentBucket;
1804 (
FLOAT32) (ProbabilityDelta * SampleCount);
1805 LastProbDensity = ProbDensity;
1809 (
FLOAT32) ((0.5 - Probability) * SampleCount);
1842 if (SampleCount < kCountTable[0])
1843 return kBucketsTable[0];
1846 if (SampleCount <= kCountTable[Next]) {
1847 Slope = (
FLOAT32) (kBucketsTable[Next] - kBucketsTable[Last]) /
1848 (
FLOAT32) (kCountTable[Next] - kCountTable[Last]);
1849 return ((
uinT16) (kBucketsTable[Last] +
1850 Slope * (SampleCount - kCountTable[Last])));
1853 return kBucketsTable[Last];
1876 #define CHIACCURACY 0.01 1877 #define MINALPHA (1e-200) 1893 SearchKey.
Alpha = Alpha;
1897 if (OldChiSquared == NULL) {
1932 Distance = x - kNormalMean;
1933 return kNormalMagnitude * exp(-0.5 * Distance * Distance / kNormalVariance);
1949 return UniformDistributionDensity;
1965 return (f1 + f2) * Dx / 2.0;
2003 Buckets->
Count[i] = 0;
2005 if (StdDev == 0.0) {
2014 while ((Sample =
NextSample (&SearchState)) != NULL) {
2021 Buckets->
Count[BucketID] += 1;
2030 while ((Sample =
NextSample (&SearchState)) != NULL) {
2071 x -= ParamDesc->
Range;
2072 else if (x - Mean < -ParamDesc->HalfRange)
2073 x += ParamDesc->
Range;
2076 X = ((x -
Mean) / StdDev) * kNormalStdDev + kNormalMean;
2106 x -= ParamDesc->
Range;
2107 else if (x - Mean < -ParamDesc->HalfRange)
2108 x += ParamDesc->
Range;
2137 TotalDifference = 0.0;
2140 TotalDifference += (FrequencyDifference * FrequencyDifference) /
2190 if (Cluster != NULL) {
2212 static uinT8 DegreeOffsets[] = { 3, 3, 1 };
2214 uinT16 AdjustedNumBuckets;
2216 AdjustedNumBuckets = HistogramBuckets - DegreeOffsets[(int) Distribution];
2217 if (
Odd (AdjustedNumBuckets))
2218 AdjustedNumBuckets++;
2219 return (AdjustedNumBuckets);
2252 return (arg1 == arg2);
2270 AdjustFactor = (((
FLOAT64) NewSampleCount) /
2293 Buckets->
Count[i] = 0;
2317 return (ChiStruct->
Alpha == SearchKey->
Alpha);
2360 #define INITIALDELTA 0.1 2361 #define DELTARATIO 0.1 2375 f = (*Function) ((
CHISTRUCT *) FunctionParams, x);
2376 while (
Abs (LastPosX - LastNegX) > Accuracy) {
2385 ((*Function) ((
CHISTRUCT *) FunctionParams, x + Delta) - f) / Delta;
2394 if (NewDelta < Delta)
2398 f = (*Function) ((
CHISTRUCT *) FunctionParams, x);
2434 for (i = 1; i <= N; i++) {
2435 Denominator *= 2 * i;
2437 SeriesTotal += PowerOfx / Denominator;
2439 return ((SeriesTotal * exp (-0.5 * x)) - ChiParams->
Alpha);
2473 #define ILLEGAL_CHAR 2 2475 static BOOL8 *CharFlags = NULL;
2476 static inT32 NumFlags = 0;
2481 inT32 NumCharInCluster;
2482 inT32 NumIllegalInCluster;
2487 NumIllegalInCluster = 0;
2489 if (Clusterer->
NumChar > NumFlags) {
2490 if (CharFlags != NULL)
2492 NumFlags = Clusterer->
NumChar;
2496 for (i = 0; i < NumFlags; i++)
2497 CharFlags[i] =
FALSE;
2501 while ((Sample =
NextSample (&SearchState)) != NULL) {
2503 if (CharFlags[CharID] ==
FALSE) {
2504 CharFlags[CharID] =
TRUE;
2507 if (CharFlags[CharID] ==
TRUE) {
2508 NumIllegalInCluster++;
2512 PercentIllegal = (
FLOAT32) NumIllegalInCluster / NumCharInCluster;
2513 if (PercentIllegal > MaxIllegal) {
2537 for (row = 0; row < size; row++) {
2538 for (col = 0; col < size; col++) {
2539 U[row][col] = input[row*size + col];
2540 L[row][col] = row == col ? 1.0 : 0.0;
2541 U_inv[row][col] = 0.0;
2546 for (col = 0; col < size; ++col) {
2549 double best_pivot = -1.0;
2550 for (row = col; row < size; ++row) {
2551 if (
Abs(U[row][col]) > best_pivot) {
2552 best_pivot =
Abs(U[row][col]);
2557 if (best_row != col) {
2558 for (
int k = 0; k < size; ++k) {
2559 double tmp = U[best_row][k];
2560 U[best_row][k] = U[col][k];
2562 tmp = L[best_row][k];
2563 L[best_row][k] = L[col][k];
2568 for (row = col + 1; row < size; ++row) {
2569 double ratio = -U[row][col] / U[col][col];
2570 for (
int j = col; j < size; ++j) {
2571 U[row][j] += U[col][j] * ratio;
2573 for (
int k = 0; k < size; ++k) {
2574 L[row][k] += L[col][k] * ratio;
2579 for (col = 0; col < size; ++col) {
2580 U_inv[col][col] = 1.0 / U[col][col];
2581 for (row = col - 1; row >= 0; --row) {
2583 for (
int k = col; k > row; --k) {
2584 total += U[row][k] * U_inv[k][col];
2586 U_inv[row][col] = -total / U[row][row];
2590 for (row = 0; row < size; row++) {
2591 for (col = 0; col < size; col++) {
2593 for (
int k = row; k < size; ++k) {
2594 sum += U_inv[row][k] * L[k][col];
2596 inv[row*size + col] = sum;
2600 double error_sum = 0.0;
2601 for (row = 0; row < size; row++) {
2602 for (col = 0; col < size; col++) {
2604 for (
int k = 0; k < size; ++k) {
2605 sum += input[row*size + k] * inv[k *size + col];
2608 error_sum +=
Abs(sum);
const double FTable[FTABLE_Y][FTABLE_X]
int ListEntryMatch(void *arg1, void *arg2)
FLOAT64(* SOLVEFUNC)(CHISTRUCT *, double)
SAMPLE * MakeSample(CLUSTERER *Clusterer, const FLOAT32 *Feature, inT32 CharID)
tesseract::KDPairInc< float, TEMPCLUSTER * > ClusterPair
PROTOTYPE * NewEllipticalProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics)
BOOL8 MultipleCharSamples(CLUSTERER *Clusterer, CLUSTER *Cluster, FLOAT32 MaxIllegal)
BUCKETS * GetBuckets(CLUSTERER *clusterer, DISTRIBUTION Distribution, uinT32 SampleCount, FLOAT64 Confidence)
CLUSTERER * MakeClusterer(inT16 SampleSize, const PARAM_DESC ParamDesc[])
FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension)
STATISTICS * ComputeStatistics(inT16 N, PARAM_DESC ParamDesc[], CLUSTER *Cluster)
FLOAT64 UniformDensity(inT32 x)
DISTRIBUTION Distribution
#define MAXDEGREESOFFREEDOM
void KDDelete(KDTREE *Tree, FLOAT32 Key[], void *Data)
void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data)
void InitBuckets(BUCKETS *Buckets)
CHISTRUCT * NewChiStruct(uinT16 DegreesOfFreedom, FLOAT64 Alpha)
PROTOTYPE * MakeMixedProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATISTICS *Statistics, BUCKETS *NormalBuckets, FLOAT64 Confidence)
uinT16 DegreesOfFreedom(DISTRIBUTION Distribution, uinT16 HistogramBuckets)
void FreeCluster(CLUSTER *Cluster)
#define InitSampleSearch(S, C)
KDTREE * MakeKDTree(inT16 KeySize, const PARAM_DESC KeyDesc[])
uinT16 NormalBucket(PARAM_DESC *ParamDesc, FLOAT32 x, FLOAT32 Mean, FLOAT32 StdDev)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
void FreeBuckets(BUCKETS *Buckets)
void memfree(void *element)
FLOAT64 ComputeChiSquared(uinT16 DegreesOfFreedom, FLOAT64 Alpha)
void MakePotentialClusters(ClusteringContext *context, CLUSTER *Cluster, inT32 Level)
PROTOTYPE * NewSimpleProto(inT16 N, CLUSTER *Cluster)
void MakeDimRandom(uinT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc)
PROTOTYPE * NewMixedProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics)
PROTOTYPE * TestEllipticalProto(CLUSTERER *Clusterer, CLUSTERCONFIG *Config, CLUSTER *Cluster, STATISTICS *Statistics)
LIST search(LIST list, void *key, int_compare is_equal)
FLOAT64(* DENSITYFUNC)(inT32)
int AlphaMatch(void *arg1, void *arg2)
void KDNearestNeighborSearch(KDTREE *Tree, FLOAT32 Query[], int QuerySize, FLOAT32 MaxDistance, int *NumberOfResults, void **NBuffer, FLOAT32 DBuffer[])
void FreeStatistics(STATISTICS *Statistics)
FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension)
void FreeClusterer(CLUSTERER *Clusterer)
void FreePrototype(void *arg)
PROTOTYPE * NewSphericalProto(uinT16 N, CLUSTER *Cluster, STATISTICS *Statistics)
CLUSTER * FindNearestNeighbor(KDTREE *Tree, CLUSTER *Cluster, FLOAT32 *Distance)
LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
uinT16 OptimumNumberOfBuckets(uinT32 SampleCount)
void DoError(int Error, const char *Message)
void destroy_nodes(LIST list, void_dest destructor)
LIST push(LIST list, void *element)
void CreateClusterTree(CLUSTERER *Clusterer)
void FreeKDTree(KDTREE *Tree)
BOOL8 DistributionOK(BUCKETS *Buckets)
void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
uinT16 Bucket[BUCKETTABLESIZE]
BUCKETS * bucket_cache[DISTRIBUTION_COUNT][MAXBUCKETS+1 - MINBUCKETS]
CLUSTER * NextSample(LIST *SearchState)
BUCKETS * MakeBuckets(DISTRIBUTION Distribution, uinT32 SampleCount, FLOAT64 Confidence)
PROTOTYPE * MakeEllipticalProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATISTICS *Statistics, BUCKETS *Buckets)
double InvertMatrix(const float *input, int size, float *inv)
PROTOTYPE * MakeDegenerateProto(uinT16 N, CLUSTER *Cluster, STATISTICS *Statistics, PROTOSTYLE Style, inT32 MinSamples)
FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x)
FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx)
PROTOTYPE * MakeSphericalProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATISTICS *Statistics, BUCKETS *Buckets)
int NumBucketsMatch(void *arg1, void *arg2)
CLUSTER * MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster)
BOOL8 Independent(PARAM_DESC ParamDesc[], inT16 N, FLOAT32 *CoVariance, FLOAT32 Independence)
void AdjustBuckets(BUCKETS *Buckets, uinT32 NewSampleCount)
tesseract::GenericHeap< ClusterPair > ClusterHeap
void FillBuckets(BUCKETS *Buckets, CLUSTER *Cluster, uinT16 Dim, PARAM_DESC *ParamDesc, FLOAT32 Mean, FLOAT32 StdDev)
void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics)
FLOAT64 Solve(SOLVEFUNC Function, void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy)
uinT16 UniformBucket(PARAM_DESC *ParamDesc, FLOAT32 x, FLOAT32 Mean, FLOAT32 StdDev)
inT32 MergeClusters(inT16 N, register PARAM_DESC ParamDesc[], register inT32 n1, register inT32 n2, register FLOAT32 m[], register FLOAT32 m1[], register FLOAT32 m2[])
FLOAT64 NormalDensity(inT32 x)
void FreeProtoList(LIST *ProtoList)
PROTOTYPE * MakePrototype(CLUSTERER *Clusterer, CLUSTERCONFIG *Config, CLUSTER *Cluster)
void KDWalk(KDTREE *Tree, void_proc action, void *context)