////////////////////////////////////////////////////////////////////// // // NxM Matrix // // File: matrixnm.cpp // Description : matrixnm template class implementation for float and real // // History: // -:Created by Anton Knyazev // ////////////////////////////////////////////////////////////////////// #include "stdafx.h" #include "utils.h" #ifdef USE_MATRIX_POOLS DECLARE_MTXNxM_POOL(float,512) DECLARE_VECTORN_POOL(float,256) #ifndef REAL_IS_FLOAT DECLARE_MTXNxM_POOL(real,512) DECLARE_VECTORN_POOL(real,256) #endif #endif int g_bHasSSE; inline float getlothresh(float) { return 1E-10f; } inline float gethithresh(float) { return 1E10f; } inline double getlothresh(double) { return 1E-20; } inline double gethithresh(double) { return 1E20; } template int matrix_tpl::jacobi_transformation(matrix_tpl &evec, ftype *eval, ftype prec) const { if (!(flags & mtx_symmetric) || nCols!=nRows) return 0; matrix_tpl a(*this); int n = nRows, p,q,r,iter,pmax,qmax,sz=nRows*nCols; ftype theta,t,s,c,apr,aqr,arp,arq,thresh=prec,amax; evec.identity(); evec.flags = (evec.flags & mtx_foreign_data) | mtx_orthogonal | mtx_normal; for(iter=0; iteramax) { amax=sqr(a[p][q]); pmax=p; qmax=q; } if (pmax==-1) goto exitjacobi; p=pmax; q=qmax; theta = (ftype)0.5*(a[q][q]-a[p][p])/a[p][q]; if (fabs_tpl(theta)0) t = -theta-t; else t -= theta; c = 1/sqrt_tpl(1+t*t); s = t*c; for(r=0;r int matrix_tpl::conjugate_gradient(ftype *startx,ftype *rightside, ftype minlen,ftype minel) const { ftype a,b,r2,r2new,denom,maxel; int i,iter=nRows*3; minlen*=minlen; ftype buf[24],*pbuf = nRows>8 ? new ftype[nRows*3]:buf; vectorn_tpl x(nRows,startx),rh(nRows,rightside),r(nRows,pbuf),p(nRows,pbuf+nRows),Ap(nRows,pbuf+nRows*2); (r=rh)-=*this*x; p=r; r2=r.len2(); do { Ap = *this*p; denom = p*Ap; if (sqr(denom)<1E-30) break; a = r2/denom; #if defined(LINUX) r = ::operator-=(r, (const vectorn_tpl&)(Ap*a)); #else r -= Ap*a; #endif r2new=r.len2(); if (r2new>r2*500) break; #if defined(LINUX) x = ::operator+=(x, (const vectorn_tpl&)(p*a)); #else x += p*a; #endif b = r2new/r2; r2=r2new; (p*=b)+=r; for(i=0,maxel=0;iminlen || maxel>minel)); if (pbuf!=buf) delete pbuf; return nRows-iter; } template int matrix_tpl::biconjugate_gradient(ftype *startx,ftype *rightside, ftype minlen,ftype minel) const { ftype a,b,r2,r2new,err,denom,maxel; int i,iter=nRows*3; minlen*=minlen; ftype buf[40],*pbuf = nRows>8 ? new ftype[nRows*5]:buf; vectorn_tpl x(nRows,startx),rh(nRows,rightside),r(nRows,pbuf),rc(nRows,pbuf+nRows),p(nRows,pbuf+nRows*2), pc(nRows,pbuf+nRows*3),Ap(nRows,pbuf+nRows*4); (r=rh)-=*this*x; rc=r; p=r; pc=rc; r2 = r.len2(); do { Ap = *this*p; denom = pc*Ap; if (sqr(denom)<1E-30) break; a = r2/denom; #if defined(LINUX) r = ::operator-=(r, (const vectorn_tpl&)(Ap*a)); rc = ::operator-=(rc, (const vectorn_tpl&)((pc**this)*a)); x = ::operator+=(x, (const vectorn_tpl&)(p*a)); #else r -= Ap*a; rc -= (pc**this)*a; x += p*a; #endif r2new = rc*r; b = r2new/r2; r2 = r2new; (p*=b)+=r; (pc*=b)+=rc; for(err=maxel=0,i=0;iminlen || maxel>minel) && sqr(r2)>1E-30); if (pbuf!=buf) delete pbuf; return nRows-iter; } template int matrix_tpl::minimum_residual(ftype *startx,ftype *rightside, ftype minlen,ftype minel) const { ftype a,b,r2,r2new,err,denom,maxel; int i,iter=nRows*3; minlen*=minlen; ftype buf[40],*pbuf = nRows>8 ? new ftype[nRows*5]:buf; vectorn_tpl x(nRows,startx),rh(nRows,rightside),r(nRows,pbuf),rc(nRows,pbuf+nRows),p(nRows,pbuf+nRows*2), Ap(nRows,pbuf+nRows*4); (r=rh)-=*this*x; rc=*this*r; p=r; r2 = rc*r; do { Ap = *this*p; denom = Ap.len2(); if (sqr(denom)<1E-30) break; a = r2/denom; #if defined(LINUX) r = ::operator-=(r, (const vectorn_tpl&)(Ap*a)); #else r -= Ap*a; #endif rc = *this*r; #if defined(LINUX) x = ::operator+=(x, (const vectorn_tpl&)(p*a)); #else x += p*a; #endif r2new = rc*r; b = r2new/r2; r2 = r2new; (p*=b)+=r; for(err=maxel=0,i=0;iminlen || maxel>minel) && sqr(r2)>1E-30); if (pbuf!=buf) delete pbuf; return nRows-iter; } template int matrix_tpl::LUdecomposition(ftype *&LUdata,int *&LUidx) const { if (nRows!=nCols) return 0; int i,j,k,imax,alloc=(LUdata==0 | (LUidx==0)<<1); ftype aij,bij,maxaij,t; if (alloc & 1) LUdata = new ftype[nRows*nRows]; if (alloc & 2) LUidx = new int[nRows]; matrix_tpl LU(nRows,nRows,0,LUdata); LU = *this; for(j=0;j maxaij*maxaij) { maxaij=aij; imax=i; } } LUidx[j] = imax; if (j==nRows-1 && LU[j][j]!=0) break; // no aij in this case if (maxaij==0) { // the matrix is singular if (alloc & 1) delete[] LUdata; if (alloc & 2) delete[] LUidx; return 0; } if (imax!=j) for(k=0;k int matrix_tpl::solveAx_b(ftype *x,ftype *b, ftype *LUdata,int *LUidx) const { int LUidx_buf[16],alloc=0; if (!LUdata) { if (nRows*nRows*2 sizeof(mtx_pool)/sizeof(mtx_pool[0])) mtx_pool_pos = 0; LUdata = mtx_pool+mtx_pool_pos; mtx_pool_pos += nRows*nRows; } if (nRows<=sizeof(LUidx_buf)/sizeof(LUidx_buf[0])) LUidx = LUidx_buf; alloc = LUdata==0 | (LUidx==0)<<1; if (!LUdecomposition(LUdata,LUidx)) return 0; } int i,j; ftype xi; matrix_tpl LU(nRows,nRows,0,LUdata); for(i=0;i=0;i--) { for(j=i+1;j matrix_tpl& matrix_tpl::invert() { if (flags & mtx_orthogonal) return transpose(); if (nRows!=nCols) return *this; int i,j; ftype det=0; if (nRows==1) data[0]=(ftype)1.0/data[0]; else if (nRows==2) { det = data[0]*data[3]-data[1]*data[2]; if (det==0) return *this; det = (ftype)1.0/det; ftype oldata[4]; for(i=0;i<4;i++) oldata[i]=data[i]; data[0]=oldata[3]*det; data[1]=-oldata[1]*det; data[2]=-oldata[2]*det; data[3]=oldata[0]*det; } else if (nRows==3) { for(i=0;i<3;i++) det+=data[i]*data[inc_mod3[i]+3]*data[dec_mod3[i]+6]; for(i=0;i<3;i++) det-=data[dec_mod3[i]]*data[inc_mod3[i]+3]*data[i+6]; if (det==0) return *this; det = (ftype)1.0/det; ftype oldata[9]; for(i=0;i<9;i++) oldata[i]=data[i]; for(i=0;i<3;i++) for(j=0;j<3;j++) data[i+j*3] = (oldata[dec_mod3[i]*3+dec_mod3[j]]*oldata[inc_mod3[i]*3+inc_mod3[j]]- oldata[dec_mod3[i]*3+inc_mod3[j]]*oldata[inc_mod3[i]*3+dec_mod3[j]])*det; } else { ftype *LUdata=0,*colmark; int *LUidx=0,LUidx_buf[32], alloc=0; if (nRows*nRows*2 sizeof(mtx_pool)/sizeof(mtx_pool[0])) mtx_pool_pos = 0; LUdata = mtx_pool+mtx_pool_pos; mtx_pool_pos += nRows*nRows; } else alloc = 1; if (nRows<=sizeof(LUidx_buf)/sizeof(LUidx_buf[0])) LUidx = LUidx_buf; else alloc |= 2; if (!LUdecomposition(LUdata,LUidx)) return *this; if (nRows*2 sizeof(mtx_pool)/sizeof(mtx_pool[0])) mtx_pool_pos=0; colmark=mtx_pool+mtx_pool_pos; mtx_pool_pos+=nRows; } else { colmark = new ftype[nRows]; alloc |= 4; } for(i=0;i ftype matrix_tpl::determinant(ftype *LUdata,int *LUidx) const { if (nRows!=nCols) return 0; int i,j; ftype det=0; if (nRows==1) det = data[0]; else if (nRows==2) det = data[0]*data[3]-data[1]*data[2]; else if (nRows==3) { for(i=0;i<3;i++) det+=data[i]*data[inc_mod3[i]+3]*data[dec_mod3[i]+6]; for(i=0;i<3;i++) det-=data[dec_mod3[i]]*data[inc_mod3[i]+3]*data[i+6]; } else if (LUdecomposition(LUdata,LUidx)) { ftype *LUdata=0; int *LUidx=0,LUidx_buf[32], alloc=0; if (nRows*nRows*2 sizeof(mtx_pool)/sizeof(mtx_pool[0])) mtx_pool_pos = 0; LUdata = mtx_pool+mtx_pool_pos; mtx_pool_pos += nRows*nRows; } else alloc = 1; if (nRows<=sizeof(LUidx_buf)/sizeof(LUidx_buf[0])) LUidx = LUidx_buf; else alloc |= 2; if (!LUdecomposition(LUdata,LUidx)) return 0; for(i=j=0,det=1;i int matrix_tpl::LPsimplex(int m1,int m2, ftype &objfunout,ftype *xout, int nvars, ftype e) const { int M=nRows-2,N=nCols-1,i,j,imax,jmax,iobjfun=M, res=0,iter=(M+N)*8; ftype rpivot,t; const int imask=0x7FFFFFFF; int *irow=new int[M],*icol=new int[N]; #define a (*this) if (e<0) { for(i=nRows*nCols-1,e=0;i>=0;i--) e+=data[i]; e *= (ftype)1E-6/(nRows*nCols); } if (nvars<0) nvars = N+m1+m2; if (m1t) { jmax=j; t=a[iobjfun][j]; } if (jmax<0) { res=1; break; } for(imax=0; imax0; imax++); if (imax==M) { res=2; break; } for(i=imax+1;i=N+m1 && (irow[imax]&imask)M+m1+m2) { for(jmax=0;jmax=nvars;jmax++); imax=i; goto varexchange; } else if ((irow[i]&imask)>N+m1 && (irow[i]&~imask)!=0) { for(j=0;j