function table_merge ( input_file_name, tolerance ) %*****************************************************************************80 % %% MAIN is the main program for TABLE_MERGE. % % Discussion: % % TABLE_MERGE merges points in a dataset. % % The dataset is presumed to be an M by N array of real numbers, % where M is the spatial dimension, and N is the number of sample points. % % The dataset is presumed to be stored in a file, with N records, % one per each sample point. (Comment records may be included, % which begin with '#'.) % % The program reads the data file and a tolerance, merges those % points that are closer than some tolerance, and writes out % a file of the merged points. % % Licensing: % % This code is distributed under the GNU LGPL license. % % Modified: % % 18 March 2008 % % Author: % % John Burkardt % % Usage: % % table_merge ( 'input_file_name', tolerance ) % timestamp ( ); fprintf ( 1, '\n' ); fprintf ( 1, 'TABLE_MERGE\n' ); fprintf ( 1, ' MATLAB version\n' ); fprintf ( 1, '\n' ); fprintf ( 1, ' Read a dataset of N points in M dimensions,\n' ); fprintf ( 1, ' and a tolerance TOL,\n' ); fprintf ( 1, ' write the modified dataset to a file.\n' ); % % If at least one command line argument, it's the input file name. % if ( 1 <= nargin ) else fprintf ( 1, '\n' ); fprintf ( 1, 'TABLE_MERGE:\n' ); input_file_name = input ( ' Enter the name of the input file.' ); end % % If at least two command line arguments, the second one is the tolerance. % if ( 2 <= nargin ) else fprintf ( 1, '\n' ); fprintf ( 1, 'TABLE_MERGE:\n' ); tolerance = input ( ' Enter the tolerance.' ); end % % Create the output file name from the input file name. % output_file_name = file_name_ext_swap ( input_file_name, 'merge.txt' ); [ m, input_n ] = r8mat_header_read ( input_file_name ); fprintf ( 1, '\n' ); fprintf ( 1, ' Read the header of "%s".\n', input_file_name ); fprintf ( 1, '\n' ); fprintf ( 1, ' Spatial dimension M = %d\n', m ); fprintf ( 1, ' Number of points N = %d\n', input_n ); input_table = r8mat_data_read ( input_file_name, m, input_n ); fprintf ( 1, '\n' ); fprintf ( 1, ' Read the data in "%s".\n', input_file_name ) ; r8mat_transpose_print_some ( m, input_n, input_table, 1, 1, 5, 5, ... ' 5 by 5 portion of data read from file:' ); [ output_table, output_n ] = node_cluster_epsilon ( m, input_n, input_table, tolerance ); % % Display the output values. % fprintf ( 1, '\n' ); fprintf ( 1, ' Number of input points = %d\n', input_n ); fprintf ( 1, ' Number of output points = %d\n', output_n ); fprintf ( 1, ' Number of eliminated points = %d\n', input_n - output_n ); r8mat_transpose_print_some ( m, output_n, output_table, 1, 1, 5, 5, ... ' 5 by 5 portion of merged data:' ); % % Write the output values to a file. % r8mat_write ( output_file_name, m, output_n, input_table ); fprintf ( 1, '\n' ); fprintf ( 1, ' Wrote the merged data to "%s".\n', output_file_name ); fprintf ( 1, '\n' ); fprintf ( 1, 'TABLE_MERGE\n' ); fprintf ( 1, ' Normal end of execution.\n' ); fprintf ( 1, '\n' ); timestamp ( ); return end function column_num = file_column_count ( input_file_name ) %*****************************************************************************80 % %% FILE_COLUMN_COUNT counts the columns in the first line of a file. % % Discussion: % % The file is assumed to be a simple text file. % % Most lines of the file are presumed to consist of COLUMN_NUM words, % separated by spaces. There may also be some blank lines, and some % comment lines, which have a "#" in column 1. % % The routine tries to find the first non-comment non-blank line and % counts the number of words in that line. % % If all lines are blanks or comments, it goes back and tries to analyze % a comment line. % % Licensing: % % This code is distributed under the GNU LGPL license. % % Modified: % % 21 February 2004 % % Author: % % John Burkardt % % Parameters: % % Input, string INPUT_FILE_NAME, the name of the file. % % Output, integer COLUMN_NUM, the number of columns in the file. % FALSE = 0; TRUE = 1; % % Open the file. % input_unit = fopen ( input_file_name ); if ( input_unit < 0 ) fprintf ( 1, '\n' ); fprintf ( 1, 'FILE_COLUMN_COUNT - Error!\n' ); fprintf ( 1, ' Could not open the file "%s".\n', input_file_name ); error ( 'FILE_COLUMN_COUNT - Error!' ); column_num = -1; return; end % % Read one line, but skip blank lines and comment lines. % Use FGETL so we drop the newline character! % got_one = FALSE; while ( 1 ) line = fgetl ( input_unit ); if ( line == -1 ) break; end if ( s_len_trim ( line ) == 0 ) elseif ( line(1) == '#' ) else got_one = TRUE; break; end end fclose ( input_unit ); if ( got_one == FALSE ) fprintf ( 1, '\n' ); fprintf ( 1, 'FILE_COLUMN_COUNT - Warning!\n' ); fprintf ( 1, ' The file does not seem to contain any data.\n' ); column_num = -1; return; end column_num = s_word_count ( line ); return end function file_name_new = file_name_ext_swap ( file_name, ext ) %*****************************************************************************80 % %% FILE_NAME_EXT_SWAP replaces the current "extension" of a file name. % % Discussion: % % The "extension" of a filename is the string of characters % that appears after the LAST period in the name. A file % with no period, or with a period as the last character % in the name, has a "null" extension. % % Example: % % Input Output % ================ ============= % FILE_NAME EXT FILE_NAME_NEW % % bob.for obj bob.obj % bob.bob.bob txt bob.bob.txt % bob yak bob.yak % % Licensing: % % This code is distributed under the GNU LGPL license. % % Modified: % % 15 August 2005 % % Author: % % John Burkardt % % Parameters: % % Input, character FILE_NAME(*), a file name. % On output, the extension of the file has been changed. % % Input, character EXT(*), the extension to be used on the output % copy of FILE_NAME, replacing the current extension if any. % % Output, character FILE_NAME_NEW(*), a copy of the input file name, % with the new extension. % file_name_len = length ( file_name ); ext_len = length ( ext ); period = file_name_len + 1; for i = file_name_len : -1 : 1 if ( file_name(i:i) == '.' ) period = i; break end end file_name_new(1:period-1) = file_name(1:period-1); file_name_new(period) = '.'; file_name_new(period+1:period+ext_len) = ext(1:ext_len); return end function row_num = file_row_count ( input_file_name ) %*****************************************************************************80 % %% FILE_ROW_COUNT counts the number of row records in a file. % % Discussion: % % Each input line is a "RECORD". % % The records are divided into three groups: % % * BLANK LINES (nothing but blanks) % * COMMENT LINES (begin with a '#') % * DATA RECORDS (anything else) % % The value returned by the function is the number of data records. % % By the way, if the MATLAB routine FGETS is used, instead of % FGETL, then the variable LINE will include line termination % characters, which means that a blank line would not actually % have zero characters. % % Licensing: % % This code is distributed under the GNU LGPL license. % % Modified: % % 31 December 2006 % % Author: % % John Burkardt % % Parameters: % % Input, string INPUT_FILE_NAME, the name of the input file. % % Output, integer ROW_NUM, the number of rows found. % input_unit = fopen ( input_file_name ); if ( input_unit < 0 ) fprintf ( 1, '\n' ); fprintf ( 1, 'FILE_ROW_COUNT - Error!\n' ); fprintf ( 1, ' Could not open the file "%s".\n', input_file_name ); error ( 'FILE_ROW_COUNT - Error!' ); row_num = -1; return; end blank_num = 0; comment_num = 0; row_num = 0; record_num = 0; while ( 1 ) line = fgetl ( input_unit ); if ( line == -1 ) break; end record_num = record_num + 1; record_length = s_len_trim ( line ); if ( record_length <= 0 ) blank_num = blank_num + 1; elseif ( line(1) == '#' ) comment_num = comment_num + 1; else row_num = row_num + 1; end end fclose ( input_unit ); return end function [ node_coord, cluster_num ] = node_cluster_epsilon ( dim_num, ... node_num, node_coord, tolerance ) %*****************************************************************************80 % %% NODE_CLUSTER_EPSILON clusters points with a tolerance. % % Discussion: % % Initially, each point is in its own cluster. If two clusters % are separated by a distance of less than the tolerance, then % they are replaced by one cluster, whose representative point % is the weighted sum of the two cluster representatives. % % The clusters are repeatedly processed until all clusters are % separated by a distance of at least the tolerance. % % This algorithm may be useful when a dataset containing a number % of points is given, and % % 1) duplicate points must be removed (in this case, a value of % TOLERANCE = 0 will work) % OR % 2) points that are very close to each other should be discarded. % In this case, a "small" value of TOLERANCE is appropriate. % % Note that this procedure depends on the ordering of the points. % If you have three points A < B < C separated by slightly % less than the tolerance, then A and B will be merged or B and C % will be merged, depending on the labels of the nodes. Thus % this process is not a strictly geometric procedure. % % Secondly, it should be obvious that the procedure may return % points that were not input points, because of the use of centroids. % % Third, this procedure does guarantee that the output points are % separated by more than TOLERANCE units of distance. % % Licensing: % % This code is distributed under the GNU LGPL license. % % Modified: % % 18 March 2008 % % Author: % % John Burkardt % % Parameters: % % Input, integer DIM_NUM, the spatial dimension. % % Input, integer NODE_NUM, the number of nodes. % % Input, real NODE_COORD(DIM_NUM,NODE_NUM), the node coordinates. % % Input, real ( kind = 8 ) TOLERANCE, the distance tolerance. % % Output, real NODE_COORD(DIM_NUM,NODE_NUM), the coordinates of % the cluster representatives. % % Output, integer CLUSTER_NUM, the number of clusters. % weight(1:node_num) = 1.0; while ( 1 ) found = 0; for cluster1 = 1 : node_num if ( weight(cluster1) == 0.0 ) continue end for cluster2 = 1 : cluster1 - 1 if ( weight(cluster2) == 0.0 ) continue end dist = norm ( node_coord(1:dim_num,cluster1) - node_coord(1:dim_num,cluster2) ); if ( dist <= tolerance ) found = 1; node_coord(1:dim_num,cluster2) = ... ( weight(cluster1) * node_coord(1:dim_num,cluster1) ... + weight(cluster2) * node_coord(1:dim_num,cluster2) ) / ... ( weight(cluster1) + weight(cluster2) ); weight(cluster2) = weight(cluster1) + weight(cluster2); weight(cluster1) = 0.0; end end end if ( ~found ) break end end cluster_num = 0; for node = 1 : node_num if ( 0.0 < weight(node) ) cluster_num = cluster_num + 1; node_coord(1:dim_num,cluster_num) = node_coord(1:dim_num,node); end end return end function table = r8mat_data_read ( input_filename, m, n ) %*****************************************************************************80 % %% R8MAT_DATA_READ reads data from an R8MAT file. % % Licensing: % % This code is distributed under the GNU LGPL license. % % Modified: % % 27 January 2006 % % Author: % % John Burkardt % % Parameters: % % Input, string INPUT_FILENAME, the name of the input file. % % Input, integer M, N, the number of rows and columns of data. % % Output, real TABLE(M,N), the point coordinates. % table = []; % % Build up the format string for reading M real numbers. % string = ' '; for i = 0 : m string = strcat ( string, ' %f' ); end input_unit = fopen ( input_filename ); if ( input_unit < 0 ) fprintf ( 1, '\n' ); fprintf ( 1, 'R8MAT_DATA_READ - Error!\n' ); fprintf ( 1, ' Could not open the file.\n' ); error ( 'R8MAT_DATA_READ - Error!' ); return; end i = 0; while ( i < n ) line = fgets ( input_unit ); if ( line == -1 ) break; end if ( line(1) == '#' ) elseif ( s_len_trim ( line ) == 0 ) else [ x, count ] = sscanf ( line, string ); if ( count == m ) i = i + 1; table(1:m,i) = x(1:m); end end end fclose ( input_unit ); return end function [ m, n ] = r8mat_header_read ( input_filename ) %*****************************************************************************80 % %% R8MAT_HEADER_READ reads the header from an R8MAT file. % % Licensing: % % This code is distributed under the GNU LGPL license. % % Modified: % % 22 October 2004 % % Author: % % John Burkardt % % Parameters: % % Input, string INPUT_FILENAME, the name of the input file. % % Output, integer M, the spatial dimension. % % Output, integer N, the number of points. % m = file_column_count ( input_filename ); if ( m <= 0 ) fprintf ( 1, '\n' ); fprintf ( 1, 'R8MAT_HEADER_READ - Fatal error!\n' ); fprintf ( 1, ' There was some kind of I/O problem while trying\n' ); fprintf ( 1, ' to count the number of data columns in\n' ); fprintf ( 1, ' the file %s.\n', input_filename ); end n = file_row_count ( input_filename ); if ( n <= 0 ) fprintf ( 1, '\n' ); fprintf ( 1, 'R8MAT_HEADER_READ - Fatal error!\n' ); fprintf ( 1, ' There was some kind of I/O problem while trying\n' ); fprintf ( 1, ' to count the number of data rows in\n' ); fprintf ( 1, ' the file %s\n', input_filename ); end return end function r8mat_transpose_print ( m, n, a, title ) %*****************************************************************************80 % %% R8MAT_TRANSPOSE_PRINT prints an R8MAT, transposed. % % Licensing: % % This code is distributed under the GNU LGPL license. % % Modified: % % 10 August 2004 % % Author: % % John Burkardt % % Parameters: % % Input, integer M, N, the number of rows and columns. % % Input, real A(M,N), an M by N matrix to be printed. % % Input, string TITLE, an optional title. % r8mat_transpose_print_some ( m, n, a, 1, 1, m, n, title ); return end function r8mat_transpose_print_some ( m, n, a, ilo, jlo, ihi, jhi, title ) %*****************************************************************************80 % %% R8MAT_TRANSPOSE_PRINT_SOME prints some of an R8MAT, transposed. % % Licensing: % % This code is distributed under the GNU LGPL license. % % Modified: % % 23 May 2005 % % Author: % % John Burkardt % % Parameters: % % Input, integer M, N, the number of rows and columns. % % Input, real A(M,N), an M by N matrix to be printed. % % Input, integer ILO, JLO, the first row and column to print. % % Input, integer IHI, JHI, the last row and column to print. % % Input, string TITLE, an optional title. % incx = 5; if ( 0 < s_len_trim ( title ) ) fprintf ( 1, '\n' ); fprintf ( 1, '%s\n', title ); end for i2lo = max ( ilo, 1 ) : incx : min ( ihi, m ) i2hi = i2lo + incx - 1; i2hi = min ( i2hi, m ); i2hi = min ( i2hi, ihi ); inc = i2hi + 1 - i2lo; fprintf ( 1, '\n' ); fprintf ( 1, ' Row: ' ); for i = i2lo : i2hi fprintf ( 1, '%7d ', i ); end fprintf ( 1, '\n' ); fprintf ( 1, ' Col\n' ); j2lo = max ( jlo, 1 ); j2hi = min ( jhi, n ); for j = j2lo : j2hi fprintf ( 1, '%5d ', j ); for i2 = 1 : inc i = i2lo - 1 + i2; fprintf ( 1, '%12f', a(i,j) ); end fprintf ( 1, '\n' ); end end return end function r8mat_write ( output_filename, m, n, table ) %*****************************************************************************80 % %% R8MAT_WRITE writes an R8MAT file. % % Licensing: % % This code is distributed under the GNU LGPL license. % % Modified: % % 11 August 2009 % % Author: % % John Burkardt % % Parameters: % % Input, string OUTPUT_FILENAME, the output filename. % % Input, integer M, the spatial dimension. % % Input, integer N, the number of points. % % Input, real TABLE(M,N), the points. % % % Open the file. % output_unit = fopen ( output_filename, 'wt' ); if ( output_unit < 0 ) fprintf ( 1, '\n' ); fprintf ( 1, 'R8MAT_WRITE - Error!\n' ); fprintf ( 1, ' Could not open the output file.\n' ); error ( 'R8MAT_WRITE - Error!' ); return; end % % Write the data. % % For smaller data files, and less precision, try: % % fprintf ( output_unit, ' %14.6f', table(i,j) ); % for j = 1 : n for i = 1 : m fprintf ( output_unit, ' %24.16f', table(i,j) ); end fprintf ( output_unit, '\n' ); end % % Close the file. % fclose ( output_unit ); return end function len = s_len_trim ( s ) %*****************************************************************************80 % %% S_LEN_TRIM returns the length of a character string to the last nonblank. % % Licensing: % % This code is distributed under the GNU LGPL license. % % Modified: % % 14 June 2003 % % Author: % % John Burkardt % % Parameters: % % Input, string S, the string to be measured. % % Output, integer LEN, the length of the string up to the last nonblank. % len = length ( s ); while ( 0 < len ) if ( s(len) ~= ' ' ) return end len = len - 1; end return end function word_num = s_word_count ( s ) %*****************************************************************************80 % %% S_WORD_COUNT counts the number of "words" in a string. % % Licensing: % % This code is distributed under the GNU LGPL license. % % Modified: % % 30 January 2006 % % Author: % % John Burkardt % % Parameters: % % Input, string S, the string to be examined. % % Output, integer WORD_NUM, the number of "words" in the string. % Words are presumed to be separated by one or more blanks. % FALSE = 0; TRUE = 1; word_num = 0; s_length = length ( s ); if ( s_length <= 0 ) return; end blank = TRUE; for i = 1 : s_length if ( s(i) == ' ' ) blank = TRUE; elseif ( blank == TRUE ) word_num = word_num + 1; blank = FALSE; end end return end function timestamp ( ) %*****************************************************************************80 % %% TIMESTAMP prints the current YMDHMS date as a timestamp. % % Licensing: % % This code is distributed under the GNU LGPL license. % % Modified: % % 14 February 2003 % % Author: % % John Burkardt % t = now; c = datevec ( t ); s = datestr ( c, 0 ); fprintf ( 1, '%s\n', s ); return end