30 # ifndef WIN32_LEAN_AND_MEAN
31 # define WIN32_LEAN_AND_MEAN
32 # endif // WIN32_LEAN_AND_MEAN
52 template<
class T>
int SparseMatrix<T>::UseAlloc=0;
60 internalAllocator.set(blockSize);
72 _maxEntriesPerRow = 0;
84 if( M._contiguous )
Resize( M.
rows , M._maxEntriesPerRow );
86 for(
int i=0 ; i<
rows ; i++ )
96 for(
int i=0 ; i<rows ; i++ ) e +=
int( rowSizes[i] );
102 if( M._contiguous ) Resize( M.
rows , M._maxEntriesPerRow );
103 else Resize( M.
rows );
104 for(
int i=0 ; i<rows ; i++ )
118 FILE* fp = fopen( fileName ,
"wb" );
119 if( !fp )
return false;
120 bool ret =
write( fp );
127 FILE* fp = fopen( fileName ,
"rb" );
128 if( !fp )
return false;
129 bool ret =
read( fp );
136 if( fwrite( &rows ,
sizeof(
int ) , 1 , fp )!=1 )
return false;
137 if( fwrite( rowSizes ,
sizeof(
int ) , rows , fp )!=rows )
return false;
138 for(
int i=0 ; i<rows ; i++ )
if( fwrite( (*
this)[i] ,
sizeof(
MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] )
return false;
145 if( fread( &r ,
sizeof(
int ) , 1 , fp )!=1 )
return false;
147 if( fread( rowSizes ,
sizeof(
int ) , rows , fp )!=rows )
return false;
148 for(
int i=0 ; i<rows ; i++ )
153 if( fread( (*
this)[i] ,
sizeof(
MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] )
return false;
166 if( _contiguous ){
if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
167 else for(
int i=0 ; i<rows ; i++ ){
if( rowSizes[i] ) free( m_ppElements[i] ); }
168 free( m_ppElements );
174 rowSizes = (
int* )malloc(
sizeof(
int ) * r );
175 memset( rowSizes , 0 ,
sizeof(
int ) * r );
179 _maxEntriesPerRow = 0;
187 if( _contiguous ){
if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
188 else for(
int i=0 ; i<rows ; i++ ){
if( rowSizes[i] ) free( m_ppElements[i] ); }
189 free( m_ppElements );
195 rowSizes = (
int* )malloc(
sizeof(
int ) * r );
196 memset( rowSizes , 0 ,
sizeof(
int ) * r );
199 for(
int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
202 _maxEntriesPerRow = e;
210 if (count > _maxEntriesPerRow)
212 POISSON_THROW_EXCEPTION (
pcl::poisson::PoissonBadArgumentException,
"Attempted to set row size on contiguous matrix larger than max row size: (requested)"<< count <<
" > (maximum)" << _maxEntriesPerRow );
214 rowSizes[row] = count;
216 else if( row>=0 && row<rows )
218 if( UseAlloc ) m_ppElements[row] = internalAllocator.newElements(count);
221 if( rowSizes[row] ) free( m_ppElements[row] );
231 Resize(this->m_N, this->m_M);
238 for(
int ij=0; ij < Min( this->Rows(), this->Columns() ); ij++)
239 (*
this)(ij,ij) = T(1);
253 for (
int i=0; i<this->Rows(); i++)
255 for(
int ii=0;ii<m_ppElements[i].size();i++){m_ppElements[i][ii].Value*=V;}
264 for(
int i=0; i<R.Rows(); i++){
265 for(
int ii=0;ii<m_ppElements[i].size();ii++){
266 int N=m_ppElements[i][ii].N;
267 T Value=m_ppElements[i][ii].Value;
282 for (
int i=0; i<rows; i++)
285 for(
int ii=0;ii<rowSizes[i];ii++){
286 temp+=m_ppElements[i][ii].Value * V.
m_pV[m_ppElements[i][ii].N];
297 #pragma omp parallel for num_threads( threads ) schedule( static )
298 for(
int i=0 ; i<rows ; i++ )
302 for(
int j=0 ; j<rowSizes[i] ; j++ ) temp += m_ppElements[i][j].Value * In.
m_pV[m_ppElements[i][j].N];
324 for (
int i=0; i<this->Rows(); i++)
326 for(
int ii=0;ii<m_ppElements[i].size();ii++){
327 M(m_ppElements[i][ii].N,i) = m_ppElements[i][ii].Value;
347 double delta_new , delta_0;
348 for(
int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.
m_pV[i] * r.m_pV[i];
350 if( delta_new<eps )
return 0;
354 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
357 double dDotQ = 0 , alpha = 0;
359 alpha = delta_new / dDotQ;
360 #pragma omp parallel
for num_threads( threads ) schedule(
static )
361 for(
int i=0 ; i<r.Dimensions() ; i++ ) solution.
m_pV[i] += d.
m_pV[i] * T2( alpha );
365 M.
Multiply( solution , r , threads );
369 #pragma omp parallel for num_threads( threads ) schedule( static )
370 for(
int i=0 ; i<r.Dimensions() ; i++ ) r.
m_pV[i] = r.m_pV[i] - q.
m_pV[i] * T2(alpha);
372 double delta_old = delta_new , beta;
374 for(
int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i]*r.m_pV[i];
375 beta = delta_new / delta_old;
376 #pragma omp parallel
for num_threads( threads ) schedule(
static )
377 for(
int i=0 ; i<d.
Dimensions() ; i++ ) d.
m_pV[i] = r.m_pV[i] + d.
m_pV[i] * T2( beta );
391 solution.
Resize(M.Columns());
396 for(i=0;i<iters && rDotR>eps;i++){
399 alpha=rDotR/d.
Dot(Md);
425 for(
int i=0; i<SparseMatrix<T>::rows; i++){
426 for(
int ii=0;ii<SparseMatrix<T>::rowSizes[i];ii++){
440 const T2* in = &In[0];
445 for(
int i=0 ; i<SparseMatrix<T>::rows ; i++ ) dcTerm += in[i];
452 const T2& in_i_ = in[i];
454 for( ; temp!=end ; temp++ )
463 if( addDCTerm )
for(
int i=0 ; i<SparseMatrix<T>::rows ; i++ ) out[i] += dcTerm;
470 const T2* in = &In[0];
471 int threads = OutScratch.
threads();
475 #pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm )
476 for(
int t=0 ; t<threads ; t++ )
478 T2* out = OutScratch[t];
479 memset( out , 0 ,
sizeof( T2 ) * dim );
482 const T2& in_i_ = in[i];
497 #pragma omp parallel for num_threads( threads ) schedule( static )
498 for(
int i=0 ; i<dim ; i++ )
501 for(
int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
507 #pragma omp parallel for num_threads( threads )
508 for(
int t=0 ; t<threads ; t++ )
510 T2* out = OutScratch[t];
511 memset( out , 0 ,
sizeof( T2 ) * dim );
514 const T2& in_i_ = in[i];
527 #pragma omp parallel for num_threads( threads ) schedule( static )
528 for(
int i=0 ; i<dim ; i++ )
531 for(
int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
541 const T2* in = &In[0];
542 int threads = OutScratch.size();
543 #pragma omp parallel for num_threads( threads )
544 for(
int t=0 ; t<threads ; t++ )
545 for(
int i=0 ; i<dim ; i++ ) OutScratch[t][i] = T2(0);
546 #pragma omp parallel for num_threads( threads )
547 for(
int t=0 ; t<threads ; t++ )
549 T2* out = OutScratch[t];
550 for(
int i=bounds[t] ; i<bounds[t+1] ; i++ )
554 const T2& in_i_ = in[i];
556 for( ; temp!=end ; temp++ )
566 #pragma omp parallel for num_threads( threads ) schedule( static )
571 for(
int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
574 #if defined _WIN32 && !defined __MINGW32__
575 #ifndef _AtomicIncrement_
576 #define _AtomicIncrement_
579 float newValue = *ptr;
580 LONG& _newValue = *( (LONG*)&newValue );
584 _oldValue = _newValue;
586 _newValue = InterlockedCompareExchange( (LONG*) ptr , _newValue , _oldValue );
587 if( _newValue==_oldValue )
break;
593 double newValue = *ptr;
594 LONGLONG& _newValue = *( (LONGLONG*)&newValue );
598 _oldValue = _newValue;
600 _newValue = InterlockedCompareExchange64( (LONGLONG*) ptr , _newValue , _oldValue );
602 while( _newValue!=_oldValue );
604 #endif // _AtomicIncrement_
609 const float* in = &In[0];
610 float* out = &Out[0];
612 #pragma omp parallel for num_threads( threads )
613 for(
int t=0 ; t<threads ; t++ )
614 for(
int i=partition[t] ; i<partition[t+1] ; i++ )
618 const float& in_i = in[i];
620 for( ; temp!=end ; temp++ )
623 float v = temp->
Value;
630 #pragma omp parallel for num_threads( threads )
631 for(
int i=0 ; i<A.
rows ; i++ )
635 const float& in_i = in[i];
637 for( ; temp!=end ; temp++ )
640 float v = temp->
Value;
651 const double* in = &In[0];
652 double* out = &Out[0];
655 #pragma omp parallel for num_threads( threads )
656 for(
int t=0 ; t<threads ; t++ )
657 for(
int i=partition[t] ; i<partition[t+1] ; i++ )
661 const double& in_i = in[i];
663 for( ; temp!=end ; temp++ )
673 #pragma omp parallel for num_threads( threads )
674 for(
int i=0 ; i<A.
rows ; i++ )
678 const double& in_i = in[i];
680 for( ; temp!=end ; temp++ )
704 if( solveNormal ) temp.
Resize( dim );
705 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
706 const T2* _b = &b[0];
708 std::vector< int > partition( threads+1 );
711 for(
int i=0 ; i<A.
rows ; i++ ) eCount += A.
rowSizes[i];
713 #pragma omp parallel
for num_threads( threads )
714 for(
int t=0 ; t<threads ; t++ )
717 for(
int i=0 ; i<A.
rows ; i++ )
720 if( _eCount*threads>=eCount*(t+1) )
727 partition[threads] = A.
rows;
734 #pragma omp parallel for num_threads( threads ) schedule( static )
735 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i];
740 #pragma omp parallel for num_threads( threads ) schedule( static )
741 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i];
743 double delta_new = 0 , delta_0;
744 for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
748 fprintf( stderr ,
"[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
752 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
757 for(
int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
758 T2 alpha = T2( delta_new / dDotQ );
759 #pragma omp parallel for num_threads( threads ) schedule( static )
760 for(
int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
761 if( (ii%50)==(50-1) )
766 #pragma omp parallel for num_threads( threads ) schedule( static )
767 for(
int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i];
770 #pragma omp parallel for num_threads( threads ) schedule( static )
771 for(
int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha;
773 double delta_old = delta_new;
775 for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
776 T2 beta = T2( delta_new / delta_old );
777 #pragma omp parallel for num_threads( threads ) schedule( static )
778 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
782 #endif // _WIN32 && !__MINGW32__
787 int threads = scratch.
threads();
791 if( reset ) x.
Resize( dim );
792 if( solveNormal ) temp.
Resize( dim );
793 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
794 const T2* _b = &b[0];
796 double delta_new = 0 , delta_0;
799 A.
Multiply( x , temp , scratch , addDCTerm ) , A.
Multiply( temp , r , scratch , addDCTerm ) , A.
Multiply( b , temp , scratch , addDCTerm );
800 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
801 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
805 A.
Multiply( x , r , scratch , addDCTerm );
806 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
807 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
812 fprintf( stderr ,
"[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
816 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
818 if( solveNormal ) A.
Multiply( d , temp , scratch , addDCTerm ) , A.
Multiply( temp , q , scratch , addDCTerm );
819 else A.
Multiply( d , q , scratch , addDCTerm );
821 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
822 for(
int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
823 T2 alpha = T2( delta_new / dDotQ );
824 double delta_old = delta_new;
826 if( (ii%50)==(50-1) )
828 #pragma omp parallel for num_threads( threads )
829 for(
int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
831 if( solveNormal ) A.
Multiply( x , temp , scratch , addDCTerm ) , A.
Multiply( temp , r , scratch , addDCTerm );
832 else A.
Multiply( x , r , scratch , addDCTerm );
833 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
834 for(
int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
837 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
838 for(
int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
840 T2 beta = T2( delta_new / delta_old );
841 #pragma omp parallel for num_threads( threads )
842 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
853 if( threads<1 ) threads = 1;
854 if( threads>1 ) outScratch.
resize( threads , dim );
855 if( reset ) x.
Resize( dim );
858 if( solveNormal ) temp.
Resize( dim );
859 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
860 const T2* _b = &b[0];
862 double delta_new = 0 , delta_0;
866 if( threads>1 ) A.
Multiply( x , temp , outScratch , addDCTerm ) , A.
Multiply( temp , r , outScratch , addDCTerm ) , A.
Multiply( b , temp , outScratch , addDCTerm );
868 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
869 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
873 if( threads>1 ) A.
Multiply( x , r , outScratch , addDCTerm );
874 else A.
Multiply( x , r , addDCTerm );
875 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
876 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
882 fprintf( stderr ,
"[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
886 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
890 if( threads>1 ) A.
Multiply( d , temp , outScratch , addDCTerm ) , A.
Multiply( temp , q , outScratch , addDCTerm );
891 else A.
Multiply( d , temp , addDCTerm ) , A.
Multiply( temp , q , addDCTerm );
895 if( threads>1 ) A.
Multiply( d , q , outScratch , addDCTerm );
896 else A.
Multiply( d , q , addDCTerm );
899 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
900 for(
int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
901 T2 alpha = T2( delta_new / dDotQ );
902 double delta_old = delta_new;
905 if( (ii%50)==(50-1) )
907 #pragma omp parallel for num_threads( threads )
908 for(
int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
912 if( threads>1 ) A.
Multiply( x , temp , outScratch , addDCTerm ) , A.
Multiply( temp , r , outScratch , addDCTerm );
913 else A.
Multiply( x , temp , addDCTerm ) , A.
Multiply( temp , r , addDCTerm );
917 if( threads>1 ) A.
Multiply( x , r , outScratch , addDCTerm );
918 else A.
Multiply( x , r , addDCTerm );
920 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
921 for(
int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
925 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
926 for(
int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
929 T2 beta = T2( delta_new / delta_old );
930 #pragma omp parallel for num_threads( threads )
931 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
948 for(
int i=0 ; i<iters ; i++ )
955 for(
int j=0 ; j<int(M.
rows) ; j++ ) solution[j] += (b[j]-Md[j])/diagonal[j];
964 for(
int i=0 ; i<SparseMatrix<T>::rows ; i++ )