tesseract  3.05.02
statistc.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: statistc.c (Formerly stats.c)
3  * Description: Simple statistical package for integer values.
4  * Author: Ray Smith
5  * Created: Mon Feb 04 16:56:05 GMT 1991
6  *
7  * (C) Copyright 1991, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include "statistc.h"
26 #include <string.h>
27 #include <math.h>
28 #include <stdlib.h>
29 #include "helpers.h"
30 #include "scrollview.h"
31 #include "tprintf.h"
32 
34 
35 /**********************************************************************
36  * STATS::STATS
37  *
38  * Construct a new stats element by allocating and zeroing the memory.
39  **********************************************************************/
40 STATS::STATS(inT32 min_bucket_value, inT32 max_bucket_value_plus_1) {
41  if (max_bucket_value_plus_1 <= min_bucket_value) {
42  min_bucket_value = 0;
43  max_bucket_value_plus_1 = 1;
44  }
45  rangemin_ = min_bucket_value; // setup
46  rangemax_ = max_bucket_value_plus_1;
47  buckets_ = new inT32[rangemax_ - rangemin_];
48  clear();
49 }
50 
52  rangemax_ = 0;
53  rangemin_ = 0;
54  buckets_ = NULL;
55 }
56 
57 /**********************************************************************
58  * STATS::set_range
59  *
60  * Alter the range on an existing stats element.
61  **********************************************************************/
62 bool STATS::set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1) {
63  if (max_bucket_value_plus_1 <= min_bucket_value) {
64  return false;
65  }
66  if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
67  delete [] buckets_;
68  buckets_ = new inT32[max_bucket_value_plus_1 - min_bucket_value];
69  }
70  rangemin_ = min_bucket_value; // setup
71  rangemax_ = max_bucket_value_plus_1;
72  clear(); // zero it
73  return true;
74 }
75 
76 /**********************************************************************
77  * STATS::clear
78  *
79  * Clear out the STATS class by zeroing all the buckets.
80  **********************************************************************/
81 void STATS::clear() { // clear out buckets
82  total_count_ = 0;
83  if (buckets_ != NULL)
84  memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0]));
85 }
86 
87 /**********************************************************************
88  * STATS::~STATS
89  *
90  * Destructor for a stats class.
91  **********************************************************************/
93  delete [] buckets_;
94 }
95 
96 /**********************************************************************
97  * STATS::add
98  *
99  * Add a set of samples to (or delete from) a pile.
100  **********************************************************************/
101 void STATS::add(inT32 value, inT32 count) {
102  if (buckets_ == NULL) {
103  return;
104  }
105  value = ClipToRange(value, rangemin_, rangemax_ - 1);
106  buckets_[value - rangemin_] += count;
107  total_count_ += count; // keep count of total
108 }
109 
110 /**********************************************************************
111  * STATS::mode
112  *
113  * Find the mode of a stats class.
114  **********************************************************************/
115 inT32 STATS::mode() const { // get mode of samples
116  if (buckets_ == NULL) {
117  return rangemin_;
118  }
119  inT32 max = buckets_[0]; // max cell count
120  inT32 maxindex = 0; // index of max
121  for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
122  if (buckets_[index] > max) {
123  max = buckets_[index]; // find biggest
124  maxindex = index;
125  }
126  }
127  return maxindex + rangemin_; // index of biggest
128 }
129 
130 /**********************************************************************
131  * STATS::mean
132  *
133  * Find the mean of a stats class.
134  **********************************************************************/
135 double STATS::mean() const { //get mean of samples
136  if (buckets_ == NULL || total_count_ <= 0) {
137  return static_cast<double>(rangemin_);
138  }
139  inT64 sum = 0;
140  for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
141  sum += static_cast<inT64>(index) * buckets_[index];
142  }
143  return static_cast<double>(sum) / total_count_ + rangemin_;
144 }
145 
146 /**********************************************************************
147  * STATS::sd
148  *
149  * Find the standard deviation of a stats class.
150  **********************************************************************/
151 double STATS::sd() const { //standard deviation
152  if (buckets_ == NULL || total_count_ <= 0) {
153  return 0.0;
154  }
155  inT64 sum = 0;
156  double sqsum = 0.0;
157  for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
158  sum += static_cast<inT64>(index) * buckets_[index];
159  sqsum += static_cast<double>(index) * index * buckets_[index];
160  }
161  double variance = static_cast<double>(sum) / total_count_;
162  variance = sqsum / total_count_ - variance * variance;
163  if (variance > 0.0)
164  return sqrt(variance);
165  return 0.0;
166 }
167 
168 /**********************************************************************
169  * STATS::ile
170  *
171  * Returns the fractile value such that frac fraction (in [0,1]) of samples
172  * has a value less than the return value.
173  **********************************************************************/
174 double STATS::ile(double frac) const {
175  if (buckets_ == NULL || total_count_ == 0) {
176  return static_cast<double>(rangemin_);
177  }
178 #if 0
179  // TODO(rays) The existing code doesn't seem to be doing the right thing
180  // with target a double but this substitute crashes the code that uses it.
181  // Investigate and fix properly.
182  int target = IntCastRounded(frac * total_count_);
183  target = ClipToRange(target, 1, total_count_);
184 #else
185  double target = frac * total_count_;
186  target = ClipToRange(target, 1.0, static_cast<double>(total_count_));
187 #endif
188  int sum = 0;
189  int index = 0;
190  for (index = 0; index < rangemax_ - rangemin_ && sum < target;
191  sum += buckets_[index++]);
192  if (index > 0) {
193  ASSERT_HOST(buckets_[index - 1] > 0);
194  return rangemin_ + index -
195  static_cast<double>(sum - target) / buckets_[index - 1];
196  } else {
197  return static_cast<double>(rangemin_);
198  }
199 }
200 
201 /**********************************************************************
202  * STATS::min_bucket
203  *
204  * Find REAL minimum bucket - ile(0.0) isn't necessarily correct
205  **********************************************************************/
206 inT32 STATS::min_bucket() const { // Find min
207  if (buckets_ == NULL || total_count_ == 0) {
208  return rangemin_;
209  }
210  inT32 min = 0;
211  for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++);
212  return rangemin_ + min;
213 }
214 
215 /**********************************************************************
216  * STATS::max_bucket
217  *
218  * Find REAL maximum bucket - ile(1.0) isn't necessarily correct
219  **********************************************************************/
220 
221 inT32 STATS::max_bucket() const { // Find max
222  if (buckets_ == NULL || total_count_ == 0) {
223  return rangemin_;
224  }
225  inT32 max;
226  for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--);
227  return rangemin_ + max;
228 }
229 
230 /**********************************************************************
231  * STATS::median
232  *
233  * Finds a more useful estimate of median than ile(0.5).
234  *
235  * Overcomes a problem with ile() - if the samples are, for example,
236  * 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway
237  * between 6 and 13 = 9.5
238  **********************************************************************/
239 double STATS::median() const { //get median
240  if (buckets_ == NULL) {
241  return static_cast<double>(rangemin_);
242  }
243  double median = ile(0.5);
244  int median_pile = static_cast<int>(floor(median));
245  if ((total_count_ > 1) && (pile_count(median_pile) == 0)) {
246  inT32 min_pile;
247  inT32 max_pile;
248  /* Find preceding non zero pile */
249  for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--);
250  /* Find following non zero pile */
251  for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++);
252  median = (min_pile + max_pile) / 2.0;
253  }
254  return median;
255 }
256 
257 /**********************************************************************
258  * STATS::local_min
259  *
260  * Return TRUE if this point is a local min.
261  **********************************************************************/
262 bool STATS::local_min(inT32 x) const {
263  if (buckets_ == NULL) {
264  return false;
265  }
266  x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
267  if (buckets_[x] == 0)
268  return true;
269  inT32 index; // table index
270  for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index);
271  if (index >= 0 && buckets_[index] < buckets_[x])
272  return false;
273  for (index = x + 1; index < rangemax_ - rangemin_ &&
274  buckets_[index] == buckets_[x]; ++index);
275  if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x])
276  return false;
277  else
278  return true;
279 }
280 
281 /**********************************************************************
282  * STATS::smooth
283  *
284  * Apply a triangular smoothing filter to the stats.
285  * This makes the modes a bit more useful.
286  * The factor gives the height of the triangle, i.e. the weight of the
287  * centre.
288  **********************************************************************/
289 void STATS::smooth(inT32 factor) {
290  if (buckets_ == NULL || factor < 2) {
291  return;
292  }
293  STATS result(rangemin_, rangemax_);
294  int entrycount = rangemax_ - rangemin_;
295  for (int entry = 0; entry < entrycount; entry++) {
296  //centre weight
297  int count = buckets_[entry] * factor;
298  for (int offset = 1; offset < factor; offset++) {
299  if (entry - offset >= 0)
300  count += buckets_[entry - offset] * (factor - offset);
301  if (entry + offset < entrycount)
302  count += buckets_[entry + offset] * (factor - offset);
303  }
304  result.add(entry + rangemin_, count);
305  }
306  total_count_ = result.total_count_;
307  memcpy(buckets_, result.buckets_, entrycount * sizeof(buckets_[0]));
308 }
309 
310 /**********************************************************************
311  * STATS::cluster
312  *
313  * Cluster the samples into max_cluster clusters.
314  * Each call runs one iteration. The array of clusters must be
315  * max_clusters+1 in size as cluster 0 is used to indicate which samples
316  * have been used.
317  * The return value is the current number of clusters.
318  **********************************************************************/
319 
320 inT32 STATS::cluster(float lower, // thresholds
321  float upper,
322  float multiple, // distance threshold
323  inT32 max_clusters, // max no to make
324  STATS *clusters) { // array of clusters
325  BOOL8 new_cluster; // added one
326  float *centres; // cluster centres
327  inT32 entry; // bucket index
328  inT32 cluster; // cluster index
329  inT32 best_cluster; // one to assign to
330  inT32 new_centre = 0; // residual mode
331  inT32 new_mode; // pile count of new_centre
332  inT32 count; // pile to place
333  float dist; // from cluster
334  float min_dist; // from best_cluster
335  inT32 cluster_count; // no of clusters
336 
337  if (buckets_ == NULL || max_clusters < 1)
338  return 0;
339  centres = new float[max_clusters + 1];
340  for (cluster_count = 1; cluster_count <= max_clusters
341  && clusters[cluster_count].buckets_ != NULL
342  && clusters[cluster_count].total_count_ > 0;
343  cluster_count++) {
344  centres[cluster_count] =
345  static_cast<float>(clusters[cluster_count].ile(0.5));
346  new_centre = clusters[cluster_count].mode();
347  for (entry = new_centre - 1; centres[cluster_count] - entry < lower
348  && entry >= rangemin_
349  && pile_count(entry) <= pile_count(entry + 1);
350  entry--) {
351  count = pile_count(entry) - clusters[0].pile_count(entry);
352  if (count > 0) {
353  clusters[cluster_count].add(entry, count);
354  clusters[0].add (entry, count);
355  }
356  }
357  for (entry = new_centre + 1; entry - centres[cluster_count] < lower
358  && entry < rangemax_
359  && pile_count(entry) <= pile_count(entry - 1);
360  entry++) {
361  count = pile_count(entry) - clusters[0].pile_count(entry);
362  if (count > 0) {
363  clusters[cluster_count].add(entry, count);
364  clusters[0].add(entry, count);
365  }
366  }
367  }
368  cluster_count--;
369 
370  if (cluster_count == 0) {
371  clusters[0].set_range(rangemin_, rangemax_);
372  }
373  do {
374  new_cluster = FALSE;
375  new_mode = 0;
376  for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
377  count = buckets_[entry] - clusters[0].buckets_[entry];
378  //remaining pile
379  if (count > 0) { //any to handle
380  min_dist = static_cast<float>(MAX_INT32);
381  best_cluster = 0;
382  for (cluster = 1; cluster <= cluster_count; cluster++) {
383  dist = entry + rangemin_ - centres[cluster];
384  //find distance
385  if (dist < 0)
386  dist = -dist;
387  if (dist < min_dist) {
388  min_dist = dist; //find least
389  best_cluster = cluster;
390  }
391  }
392  if (min_dist > upper //far enough for new
393  && (best_cluster == 0
394  || entry + rangemin_ > centres[best_cluster] * multiple
395  || entry + rangemin_ < centres[best_cluster] / multiple)) {
396  if (count > new_mode) {
397  new_mode = count;
398  new_centre = entry + rangemin_;
399  }
400  }
401  }
402  }
403  // need new and room
404  if (new_mode > 0 && cluster_count < max_clusters) {
405  cluster_count++;
406  new_cluster = TRUE;
407  if (!clusters[cluster_count].set_range(rangemin_, rangemax_)) {
408  delete [] centres;
409  return 0;
410  }
411  centres[cluster_count] = static_cast<float>(new_centre);
412  clusters[cluster_count].add(new_centre, new_mode);
413  clusters[0].add(new_centre, new_mode);
414  for (entry = new_centre - 1; centres[cluster_count] - entry < lower
415  && entry >= rangemin_
416  && pile_count (entry) <= pile_count(entry + 1); entry--) {
417  count = pile_count(entry) - clusters[0].pile_count(entry);
418  if (count > 0) {
419  clusters[cluster_count].add(entry, count);
420  clusters[0].add(entry, count);
421  }
422  }
423  for (entry = new_centre + 1; entry - centres[cluster_count] < lower
424  && entry < rangemax_
425  && pile_count (entry) <= pile_count(entry - 1); entry++) {
426  count = pile_count(entry) - clusters[0].pile_count(entry);
427  if (count > 0) {
428  clusters[cluster_count].add(entry, count);
429  clusters[0].add (entry, count);
430  }
431  }
432  centres[cluster_count] =
433  static_cast<float>(clusters[cluster_count].ile(0.5));
434  }
435  } while (new_cluster && cluster_count < max_clusters);
436  delete [] centres;
437  return cluster_count;
438 }
439 
440 // Helper tests that the current index is still part of the peak and gathers
441 // the data into the peak, returning false when the peak is ended.
442 // src_buckets[index] - used_buckets[index] is the unused part of the histogram.
443 // prev_count is the histogram count of the previous index on entry and is
444 // updated to the current index on return.
445 // total_count and total_value are accumulating the mean of the peak.
446 static bool GatherPeak(int index, const int* src_buckets, int* used_buckets,
447  int* prev_count, int* total_count, double* total_value) {
448  int pile_count = src_buckets[index] - used_buckets[index];
449  if (pile_count <= *prev_count && pile_count > 0) {
450  // Accumulate count and index.count product.
451  *total_count += pile_count;
452  *total_value += index * pile_count;
453  // Mark this index as used
454  used_buckets[index] = src_buckets[index];
455  *prev_count = pile_count;
456  return true;
457  } else {
458  return false;
459  }
460 }
461 
462 // Finds (at most) the top max_modes modes, well actually the whole peak around
463 // each mode, returning them in the given modes vector as a <mean of peak,
464 // total count of peak> pair in order of decreasing total count.
465 // Since the mean is the key and the count the data in the pair, a single call
466 // to sort on the output will re-sort by increasing mean of peak if that is
467 // more useful than decreasing total count.
468 // Returns the actual number of modes found.
469 int STATS::top_n_modes(int max_modes,
470  GenericVector<KDPairInc<float, int> >* modes) const {
471  if (max_modes <= 0) return 0;
472  int src_count = rangemax_ - rangemin_;
473  // Used copies the counts in buckets_ as they get used.
474  STATS used(rangemin_, rangemax_);
475  modes->truncate(0);
476  // Total count of the smallest peak found so far.
477  int least_count = 1;
478  // Mode that is used as a seed for each peak
479  int max_count = 0;
480  do {
481  // Find an unused mode.
482  max_count = 0;
483  int max_index = 0;
484  for (int src_index = 0; src_index < src_count; src_index++) {
485  int pile_count = buckets_[src_index] - used.buckets_[src_index];
486  if (pile_count > max_count) {
487  max_count = pile_count;
488  max_index = src_index;
489  }
490  }
491  if (max_count > 0) {
492  // Copy the bucket count to used so it doesn't get found again.
493  used.buckets_[max_index] = max_count;
494  // Get the entire peak.
495  double total_value = max_index * max_count;
496  int total_count = max_count;
497  int prev_pile = max_count;
498  for (int offset = 1; max_index + offset < src_count; ++offset) {
499  if (!GatherPeak(max_index + offset, buckets_, used.buckets_,
500  &prev_pile, &total_count, &total_value))
501  break;
502  }
503  prev_pile = buckets_[max_index];
504  for (int offset = 1; max_index - offset >= 0; ++offset) {
505  if (!GatherPeak(max_index - offset, buckets_, used.buckets_,
506  &prev_pile, &total_count, &total_value))
507  break;
508  }
509  if (total_count > least_count || modes->size() < max_modes) {
510  // We definitely want this mode, so if we have enough discard the least.
511  if (modes->size() == max_modes)
512  modes->truncate(max_modes - 1);
513  int target_index = 0;
514  // Linear search for the target insertion point.
515  while (target_index < modes->size() &&
516  (*modes)[target_index].data >= total_count)
517  ++target_index;
518  float peak_mean =
519  static_cast<float>(total_value / total_count + rangemin_);
520  modes->insert(KDPairInc<float, int>(peak_mean, total_count),
521  target_index);
522  least_count = modes->back().data;
523  }
524  }
525  } while (max_count > 0);
526  return modes->size();
527 }
528 
529 /**********************************************************************
530  * STATS::print
531  *
532  * Prints a summary and table of the histogram.
533  **********************************************************************/
534 void STATS::print() const {
535  if (buckets_ == NULL) {
536  return;
537  }
538  inT32 min = min_bucket() - rangemin_;
539  inT32 max = max_bucket() - rangemin_;
540 
541  int num_printed = 0;
542  for (int index = min; index <= max; index++) {
543  if (buckets_[index] != 0) {
544  tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]);
545  if (++num_printed % 8 == 0)
546  tprintf ("\n");
547  }
548  }
549  tprintf ("\n");
550  print_summary();
551 }
552 
553 
554 
555 /**********************************************************************
556  * STATS::print_summary
557  *
558  * Print a summary of the stats.
559  **********************************************************************/
560 void STATS::print_summary() const {
561  if (buckets_ == NULL) {
562  return;
563  }
564  inT32 min = min_bucket();
565  inT32 max = max_bucket();
566  tprintf("Total count=%d\n", total_count_);
567  tprintf("Min=%.2f Really=%d\n", ile(0.0), min);
568  tprintf("Lower quartile=%.2f\n", ile(0.25));
569  tprintf("Median=%.2f, ile(0.5)=%.2f\n", median(), ile(0.5));
570  tprintf("Upper quartile=%.2f\n", ile(0.75));
571  tprintf("Max=%.2f Really=%d\n", ile(1.0), max);
572  tprintf("Range=%d\n", max + 1 - min);
573  tprintf("Mean= %.2f\n", mean());
574  tprintf("SD= %.2f\n", sd());
575 }
576 
577 
578 /**********************************************************************
579  * STATS::plot
580  *
581  * Draw a histogram of the stats table.
582  **********************************************************************/
583 
584 #ifndef GRAPHICS_DISABLED
585 void STATS::plot(ScrollView* window, // to draw in
586  float xorigin, // bottom left
587  float yorigin,
588  float xscale, // one x unit
589  float yscale, // one y unit
590  ScrollView::Color colour) const { // colour to draw in
591  if (buckets_ == NULL) {
592  return;
593  }
594  window->Pen(colour);
595 
596  for (int index = 0; index < rangemax_ - rangemin_; index++) {
597  window->Rectangle( xorigin + xscale * index, yorigin,
598  xorigin + xscale * (index + 1),
599  yorigin + yscale * buckets_[index]);
600  }
601 }
602 #endif
603 
604 
605 /**********************************************************************
606  * STATS::plotline
607  *
608  * Draw a histogram of the stats table. (Line only)
609  **********************************************************************/
610 
611 #ifndef GRAPHICS_DISABLED
612 void STATS::plotline(ScrollView* window, // to draw in
613  float xorigin, // bottom left
614  float yorigin,
615  float xscale, // one x unit
616  float yscale, // one y unit
617  ScrollView::Color colour) const { // colour to draw in
618  if (buckets_ == NULL) {
619  return;
620  }
621  window->Pen(colour);
622  window->SetCursor(xorigin, yorigin + yscale * buckets_[0]);
623  for (int index = 0; index < rangemax_ - rangemin_; index++) {
624  window->DrawTo(xorigin + xscale * index,
625  yorigin + yscale * buckets_[index]);
626  }
627 }
628 #endif
629 
630 
631 /**********************************************************************
632  * choose_nth_item
633  *
634  * Returns the index of what would b the nth item in the array
635  * if the members were sorted, without actually sorting.
636  **********************************************************************/
637 
638 inT32 choose_nth_item(inT32 index, float *array, inT32 count) {
639  inT32 next_sample; // next one to do
640  inT32 next_lesser; // space for new
641  inT32 prev_greater; // last one saved
642  inT32 equal_count; // no of equal ones
643  float pivot; // proposed median
644  float sample; // current sample
645 
646  if (count <= 1)
647  return 0;
648  if (count == 2) {
649  if (array[0] < array[1]) {
650  return index >= 1 ? 1 : 0;
651  }
652  else {
653  return index >= 1 ? 0 : 1;
654  }
655  }
656  else {
657  if (index < 0)
658  index = 0; // ensure legal
659  else if (index >= count)
660  index = count - 1;
661  equal_count = (inT32) (rand() % count);
662  pivot = array[equal_count];
663  // fill gap
664  array[equal_count] = array[0];
665  next_lesser = 0;
666  prev_greater = count;
667  equal_count = 1;
668  for (next_sample = 1; next_sample < prev_greater;) {
669  sample = array[next_sample];
670  if (sample < pivot) {
671  // shuffle
672  array[next_lesser++] = sample;
673  next_sample++;
674  }
675  else if (sample > pivot) {
676  prev_greater--;
677  // juggle
678  array[next_sample] = array[prev_greater];
679  array[prev_greater] = sample;
680  }
681  else {
682  equal_count++;
683  next_sample++;
684  }
685  }
686  for (next_sample = next_lesser; next_sample < prev_greater;)
687  array[next_sample++] = pivot;
688  if (index < next_lesser)
689  return choose_nth_item (index, array, next_lesser);
690  else if (index < prev_greater)
691  return next_lesser; // in equal bracket
692  else
693  return choose_nth_item (index - prev_greater,
694  array + prev_greater,
695  count - prev_greater) + prev_greater;
696  }
697 }
698 
699 /**********************************************************************
700  * choose_nth_item
701  *
702  * Returns the index of what would be the nth item in the array
703  * if the members were sorted, without actually sorting.
704  **********************************************************************/
705 inT32 choose_nth_item(inT32 index, void *array, inT32 count, size_t size,
706  int (*compar)(const void*, const void*)) {
707  int result; // of compar
708  inT32 next_sample; // next one to do
709  inT32 next_lesser; // space for new
710  inT32 prev_greater; // last one saved
711  inT32 equal_count; // no of equal ones
712  inT32 pivot; // proposed median
713 
714  if (count <= 1)
715  return 0;
716  if (count == 2) {
717  if (compar (array, (char *) array + size) < 0) {
718  return index >= 1 ? 1 : 0;
719  }
720  else {
721  return index >= 1 ? 0 : 1;
722  }
723  }
724  if (index < 0)
725  index = 0; // ensure legal
726  else if (index >= count)
727  index = count - 1;
728  pivot = (inT32) (rand () % count);
729  swap_entries (array, size, pivot, 0);
730  next_lesser = 0;
731  prev_greater = count;
732  equal_count = 1;
733  for (next_sample = 1; next_sample < prev_greater;) {
734  result =
735  compar ((char *) array + size * next_sample,
736  (char *) array + size * next_lesser);
737  if (result < 0) {
738  swap_entries (array, size, next_lesser++, next_sample++);
739  // shuffle
740  }
741  else if (result > 0) {
742  prev_greater--;
743  swap_entries(array, size, prev_greater, next_sample);
744  }
745  else {
746  equal_count++;
747  next_sample++;
748  }
749  }
750  if (index < next_lesser)
751  return choose_nth_item (index, array, next_lesser, size, compar);
752  else if (index < prev_greater)
753  return next_lesser; // in equal bracket
754  else
755  return choose_nth_item (index - prev_greater,
756  (char *) array + size * prev_greater,
757  count - prev_greater, size,
758  compar) + prev_greater;
759 }
760 
761 /**********************************************************************
762  * swap_entries
763  *
764  * Swap 2 entries of arbitrary size in-place in a table.
765  **********************************************************************/
766 void swap_entries(void *array, // array of entries
767  size_t size, // size of entry
768  inT32 index1, // entries to swap
769  inT32 index2) {
770  char tmp;
771  char *ptr1; // to entries
772  char *ptr2;
773  size_t count; // of bytes
774 
775  ptr1 = reinterpret_cast<char*>(array) + index1 * size;
776  ptr2 = reinterpret_cast<char*>(array) + index2 * size;
777  for (count = 0; count < size; count++) {
778  tmp = *ptr1;
779  *ptr1++ = *ptr2;
780  *ptr2++ = tmp; // tedious!
781  }
782 }
void DrawTo(int x, int y)
Definition: scrollview.cpp:531
double sd() const
Definition: statistc.cpp:151
int count(LIST var_list)
Definition: oldlist.cpp:103
inT32 min_bucket() const
Definition: statistc.cpp:206
inT32 mode() const
Definition: statistc.cpp:115
#define TRUE
Definition: capi.h:45
int IntCastRounded(double x)
Definition: helpers.h:172
void plotline(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:612
void clear()
Definition: statistc.cpp:81
void SetCursor(int x, int y)
Definition: scrollview.cpp:525
inT32 max_bucket() const
Definition: statistc.cpp:221
void swap_entries(void *array, size_t size, inT32 index1, inT32 index2)
Definition: statistc.cpp:766
void add(inT32 value, inT32 count)
Definition: statistc.cpp:101
bool local_min(inT32 x) const
Definition: statistc.cpp:262
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:115
~STATS()
Definition: statistc.cpp:92
void print_summary() const
Definition: statistc.cpp:560
STATS()
Definition: statistc.cpp:51
unsigned char BOOL8
Definition: host.h:46
inT32 choose_nth_item(inT32 index, float *array, inT32 count)
Definition: statistc.cpp:638
void print() const
Definition: statistc.cpp:534
#define FALSE
Definition: capi.h:46
void smooth(inT32 factor)
Definition: statistc.cpp:289
long long int inT64
Definition: host.h:41
void Pen(Color color)
Definition: scrollview.cpp:726
int top_n_modes(int max_modes, GenericVector< tesseract::KDPairInc< float, int > > *modes) const
Definition: statistc.cpp:469
int inT32
Definition: host.h:35
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:606
#define tprintf(...)
Definition: tprintf.h:31
inT32 cluster(float lower, float upper, float multiple, inT32 max_clusters, STATS *clusters)
Definition: statistc.cpp:320
#define MAX_INT32
Definition: host.h:53
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
Definition: cluster.h:32
double ile(double frac) const
Definition: statistc.cpp:174
double median() const
Definition: statistc.cpp:239
double mean() const
Definition: statistc.cpp:135
Definition: statistc.h:33
#define ASSERT_HOST(x)
Definition: errcode.h:84
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:585
bool set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1)
Definition: statistc.cpp:62