From cf8e39e5a552e74ea12770b1c7d13c5bd0f647b5 Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 11:21:07 +0200 Subject: [PATCH 01/25] Add files via upload --- Biopool/APPS/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Biopool/APPS/Makefile b/Biopool/APPS/Makefile index cb26b82..7b38bcc 100644 --- a/Biopool/APPS/Makefile +++ b/Biopool/APPS/Makefile @@ -27,16 +27,16 @@ INC_PATH = -I. -I../../tools/ -I../../Biopool/Sources -I../../Energy/Sources -I. # SOURCES = PdbCorrector.cc PdbSecondary.cc PdbEditor.cc Pdb2Seq.cc pdb2secondary.cc pdbshifter.cc \ - pdbMover.cc + pdbMover.cc CEmain.cc OBJECTS = PdbCorrector.o PdbSecondary.o PdbEditor.o Pdb2Seq.o pdb2secondary.o pdbshifter.o \ - pdbMover.o + pdbMover.o CEmain.o TARGETS = PdbCorrector PdbSecondary PdbEditor Pdb2Seq pdb2secondary pdbshifter \ - pdbMover + pdbMover CEmain EXECS = PdbCorrector PdbSecondary PdbEditor Pdb2Seq pdb2secondary pdbshifter \ - pdbMover + pdbMover CEmain LIBRARY = APPSlibBiopool.a From 9032a9b97a2a709a6507e34b0503e1ad87fa5c8c Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 11:38:27 +0200 Subject: [PATCH 02/25] Add files via upload --- Biopool/APPS/CEmain.cc | 189 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 Biopool/APPS/CEmain.cc diff --git a/Biopool/APPS/CEmain.cc b/Biopool/APPS/CEmain.cc new file mode 100644 index 0000000..fd6af09 --- /dev/null +++ b/Biopool/APPS/CEmain.cc @@ -0,0 +1,189 @@ +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . + */ +/* + * Author: Alberto Tilocca + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "XyzLoader.h" +#include +#include + +#include "CEFunctions.h" + +using namespace Victor;using namespace Victor::Biopool; + + +void sShowHelp() { + cout << "\n\nCE - Combinatiorial Extension -- calculate the protein structure alignment by incremental combinatorial extension of the optimal path\n" + << "Options: \n" + << "\t -i first pdb inputFile , \t \t(mandatory)\n" + << "\t -j second pdb inputFile, \t \t(mandatory)\n" + << "\t -m AFP size (defaul 8)\n" + << "\t -c select the chain of the first pdb file (default: first chain)\n" + << "\t -d select the chain of the second pdb file (default: first chain)\n" + << "\t -l select the model of the first pdb file\n" + << "\t -n select the model of the first pdb file\n" + << "\t -o outputFile (deafult: chainID1_chainID2.pdb)\n" + << "\t -v set verbose\n" + << "\t -h this message\n" + << "Usage exemple: \n" + << "\t CEmain -i input_1cpc.pdb -j input_1col.pdb -c L -d A \n"; +} + +int main(int argc, char** argv) { + + if (getArg("h", argc, argv)) { + sShowHelp(); + return 1; + } + + string inputFile1; + string inputFile2; + string chainID1; + string chainID2; + int m; + unsigned int modelNum1; + unsigned int modelNum2; + + // Output + string outputFile; + string outputFile2="onlyModifiedChain.pdb"; + + bool verbose; + + // Input + getArg("i", inputFile1, argc, argv, "!"); + getArg("j", inputFile2, argc, argv, "!"); + getArg("c", chainID1, argc, argv, "!"); + getArg("d", chainID2, argc, argv, "!"); + getArg("m", m, argc, argv, 8); + getArg("l", modelNum1, argc, argv, 999); + getArg("n", modelNum2, argc, argv, 999); + verbose = getArg("v", argc, argv); + // Output + string outputFileDefault="chains"+chainID1+"_"+chainID2+".pdb"; + getArg("o", outputFile, argc, argv, outputFileDefault); + + // ---------------------------------------------------------------------- // + + + if (inputFile1 == "!" || inputFile2 == "!" ) { + cout << "Missing input files specification. Aborting. " << endl; + return -1; + } + + ifstream inFile1(inputFile1.c_str()); + ifstream inFile2(inputFile2.c_str()); + // Initialize PdbLoaders + PdbLoader pl1(inFile1); + PdbLoader pl2(inFile2); + + pl1.setModel(modelNum1); + pl2.setModel(modelNum2); + + // Set chain(s) 1 + + if (chainID1 != "!") + pl1.setChain(chainID1[0]); + else + pl1.setChain(pl1.getAllChains()[0]); + + // Set chain(s) 2 + if (chainID2 != "!") + pl2.setChain(chainID2[0]); + else + pl2.setChain(pl2.getAllChains()[0]); + + if (!verbose){ + pl1.setNoVerbose(); // Set PdbLoader verbosity + pl2.setNoVerbose(); + } + + if (verbose) + cout << "PdbLoader set" << endl; + + // Create and load the protein object + Protein* prot1 = new Protein ; + Protein* prot2 = new Protein ; + + prot1->load(pl1); + prot2->load(pl2); + + + inFile1.close(); + inFile2.close(); + //Prendo gli spacer delle due catene + + Spacer* sp1 = prot1->getSpacer(chainID1[0]); // Get spacer + Spacer* sp2 = prot2->getSpacer(chainID2[0]); // Get spacer + + int dimSpacer1=(int)sp1->sizeAmino(); + int dimSpacer2=(int)sp2->sizeAmino(); + int combination=(m-1)*(m-2)/2; //to exclude the neighbors + int smaller = ( dimSpacer1 < dimSpacer2 ) ? dimSpacer1 : dimSpacer2; + + //calculate the inner distances for each spacer. + vector > distA(dimSpacer1);//inner distance matrix chain1 + vector > distB(dimSpacer2);//inner distance matrix chain1 + + for (int i = 0; i < dimSpacer1; i++) + distA[i].resize(dimSpacer1); + + for (int i = 0; i< dimSpacer2; i++) + distB[i].resize(dimSpacer2); + + CEFunctions::innerDistance(sp1,distA); + CEFunctions::innerDistance(sp2,distB); + + //Calculate the score Matrix + vector > score(dimSpacer1); + for (int i = 0; i< dimSpacer1; i++) + score[i].resize(dimSpacer2); + + CEFunctions::createSMatrix(sp1,sp2, combination, m,score,distA,distB); + + + //search of the path + int MAXPATH=30; + vector scoreBuffer(MAXPATH); + vector > tempPathBuffer(smaller); + for (int i = 0; i< smaller; i++) + tempPathBuffer[i].resize(smaller); + vector lenBuffer(MAXPATH); + + CEFunctions::CESearch(sp1,sp2,tempPathBuffer, scoreBuffer,score,smaller,combination,m,distA, distB,lenBuffer); + + //here I have the path + + // superimposition with kabash algorithm and take the path who have the lower RMSD + + int indexFinalPath = CEFunctions::finalSteps(sp1,sp2,tempPathBuffer,smaller,lenBuffer ,m,outputFile,outputFile2); + + cout << "scoreCE: "< Date: Thu, 8 Sep 2016 11:39:21 +0200 Subject: [PATCH 03/25] Add files via upload --- Biopool/Sources/CEFunctions.cc | 557 +++++++++++++++++++++++++++++++++ Biopool/Sources/CEFunctions.h | 50 +++ 2 files changed, 607 insertions(+) create mode 100644 Biopool/Sources/CEFunctions.cc create mode 100644 Biopool/Sources/CEFunctions.h diff --git a/Biopool/Sources/CEFunctions.cc b/Biopool/Sources/CEFunctions.cc new file mode 100644 index 0000000..95bb03b --- /dev/null +++ b/Biopool/Sources/CEFunctions.cc @@ -0,0 +1,557 @@ +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . + */ +/* + * Author: Alberto Tilocca + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "XyzLoader.h" +#include +#include +#include "CEFunctions.h" + + +using namespace Victor;using namespace Victor::Biopool; +/** + * Constructor + * @param none + */ +CEFunctions::CEFunctions() { + +} + +/** + * Destructor + * @param none + */ +CEFunctions::~CEFunctions() { + +} +/** + * calculate the distances of CA atoms of a spacer. + * @param Spacer* vector >& + */ +void CEFunctions::innerDistance(Spacer*sp ,vector >&dist ){ + + int len=sp->sizeAmino(); + for (int row=0;rowgetAmino(row)[CA].distance(sp->getAmino(col)[CA]); + + +} +/** + * calculate the similarity matrix using a full set of inter-residue distaces, where all possibile distances except those for neighboring residues are evaluated. + * @param Spacer* + * @param Spacer* + * @param vector >& + * @param vector >& + * @param vector >& + * */ +void CEFunctions::createSMatrix(Spacer *sp1,Spacer *sp2, int combination,int m,vector > &score,vector > &distA ,vector > &distB) + { + + int dimSpacer1=sp1->sizeAmino(); + int dimSpacer2=sp2->sizeAmino(); + + for (int i=0;i >& + * @param vector >& + * @param vector >& + * @param int + * @param int + * @param int + * @param vector > + * @param vector > + * @param vector + * */ + void CEFunctions::CESearch(Spacer *sp1,Spacer *sp2, vector >&tempPathBuffer,vector &scoreBuffer,vector >&score,int smaller,int combination,int m,vector > &distA,vector > &distB,vector &lenBuffer){ + + int D0=3; //threshold + int D1=4; //threshold + double bestScore= 1e6; + int MAXPATH=30; + int gapMax=30; + double bestScoreGap =1e6; + int bestPositionGap=-1; + vector bestPath(smaller); + vector pathCorr(smaller); + int bufferIndex=0; + int bufferSize=0; + + int dimSpacer1=sp1->sizeAmino(); + int dimSpacer2=sp2->sizeAmino(); + + + for ( int i = 0; i < smaller; i++ ) { + bestPath[i].first = -1; + bestPath[i].second = -1; + } + for ( int i = 0; i < MAXPATH; i++ ) { + for(int j=0;j winCache(smaller); + for (int i=0;i index(smaller); + //in this matrix there will be the score releted to the gaps + vector > partialScoreGap(smaller); + + for (int i = 0; i< smaller; i++) + partialScoreGap[i].resize(gapMax*2+1); + + //inizialize + for ( int i = 0; i < smaller; i++ ) + for ( int j = 0; j < gapMax*2+1; j++ ) + partialScoreGap[i][j] = 1e6; + + int bestLength=0; + + //here start the searching + + for(int i=0;idimSpacer1-m*(bestLength-1)){ + + break; + } + for(int j=0;j=D0 ) + continue; + + if(score[i][j]==-1 ) + continue; + + if(j> dimSpacer2 - m*(bestLength-1)) + break; + + //reset corrent path + for (int z=0;z dimSpacer1-m-1 || succ2 > dimSpacer2-m-1 ){ + continue; + } + //se non รจ buono con i gap. + if ( score[succ1][succ2] > D0 ) + continue; + // sicurezza nel caso non abbia modificato tutta la mtrice + if ( score[succ1][succ2] == -1.0 ) + continue; + + double scoreCorr=0.0; + + //calculate formula 6 + for(int s=0;s= D1){ + continue; + } + //keep the best score with gaps + + if( scoreCorr < bestScoreGap ){ + pathCorr[pathLengthCorr].first =succ1; + pathCorr[pathLengthCorr].second=succ2; + bestScoreGap = scoreCorr; + bestPositionGap = g; + partialScoreGap[pathLengthCorr -1][g] = scoreCorr; + } + }//end of gapping + allScoreCorr =0.0; + double score1=0.0; + double score2=0.0; + + + int addGap; + int gap1,gap2; + if (bestPositionGap != -1){ + addGap=(bestPositionGap +1)/2; + if ((bestPositionGap +1)%2==0){ //gap nel primo + gap1=pathCorr[pathLengthCorr-1].first + m + addGap; + gap2=pathCorr[pathLengthCorr-1].second + m; + } + else{ //gap nel secondo + gap1=pathCorr[pathLengthCorr-1].first + m ; + gap2=pathCorr[pathLengthCorr-1].second + m + addGap; + } + + score1= (partialScoreGap[pathLengthCorr-1][bestPositionGap] + *m*pathLengthCorr + score[gap1][gap2]*combination)/ + (m*pathLengthCorr+combination); + score2= ((pathLengthCorr>1 ? (partialScoreGap[pathLengthCorr-2][index[pathLengthCorr-1]]) + :score[i][j])*winCache[pathLengthCorr-1]+ score1 + * (winCache[pathLengthCorr]-winCache[pathLengthCorr-1])) + /winCache[pathLengthCorr]; + + allScoreCorr=score2; + + if(allScoreCorr >D1){ + ok=true; + bestPositionGap = -1; + break; + + } + else { + partialScoreGap[pathLengthCorr-1][bestPositionGap] = allScoreCorr; + index[pathLengthCorr]=bestPositionGap; + pathLengthCorr++; + } + } + else{ + //no good path with gaps + ok=true; + pathLengthCorr--; + break; + } + + //if the gapped path is longer or same size but higher score keep the the gapped one. + if(pathLengthCorr > bestLength || (pathLengthCorr == bestLength && allScoreCorr lenBuffer[bufferIndex] || + ( bestLength == lenBuffer[bufferIndex] && + bestScore < scoreBuffer[bufferIndex] )){ + bufferIndex = ( bufferIndex == MAXPATH-1 ) ? 0 : bufferIndex+1; + bufferSize = ( bufferSize < MAXPATH ) ? bufferSize+1 : MAXPATH; + + vector pathCopy(smaller); + for (int i=0;i >& + * @param int + * @param vector + * @param int + * @param string + * @param int + * */ +int CEFunctions::finalSteps(Spacer *sp1,Spacer *sp2,vector >&tempPathBuffer,int smaller,vector &lenBuffer,int m,string outputFile,string outputFile2 ){ + + int MAXPATH=30; + Eigen::Matrix3Xd Matrix1; + Eigen::Matrix3Xd Matrix2; + Matrix1.resize(3,smaller); + Matrix2.resize(3,smaller); + double minRMSD=1e06; + int indexFinalPath; + Spacer *bestSp1; + Spacer *bestSp2; + + for (int p=0;p=(int)(sp1->sizeAmino()-1)) + end1=true; + if (lremoveComponentFromIndex(cnt*m); + iter++; + l++; + } + else{ + l=l+m+cnt; + cnt++; + iter++; + end1=true; + } + } + + //chain 2 + end2 =false; + while(end2==false) + { + if (iter2>=(int)(sp2->sizeAmino()-1)) + end2=true; + if (l2removeComponentFromIndex(cnt2*m); + iter2++; + l2++; + } + else{ + l2=l2+m+cnt2; + cnt2++; + iter2++; + end2=true; + } + } + } + double RMSD=0.0; + vgVector3CaCoordsSp1; + vgVector3CaCoordsSp2; + + for (int c=0; c< smaller;c++){ + CaCoordsSp1 = copia1->getAmino(c)[CA].getCoords(); + CaCoordsSp2 = copia2->getAmino(c)[CA].getCoords(); + Matrix1(0,c)=CaCoordsSp1.x; + Matrix1(1,c)=CaCoordsSp1.y; + Matrix1(2,c)=CaCoordsSp1.z; + Matrix2(0,c)=CaCoordsSp2.x; + Matrix2(1,c)=CaCoordsSp2.y; + Matrix2(2,c)=CaCoordsSp2.z; + } + + vgMatrix3 vgMat; + vgVector3vgVet; + //apply kabasch for the first time + Eigen::Affine3d A= Find3DAffineTransform(Matrix2,Matrix1); + //coordinates of rototaion matrix in victor format + vgMat.x.x=A.rotation().coeff(0,0); + vgMat.x.y=A.rotation().coeff(0,1); + vgMat.x.z=A.rotation().coeff(0,2); + vgMat.y.x=A.rotation().coeff(1,0); + vgMat.y.y=A.rotation().coeff(1,1); + vgMat.y.z=A.rotation().coeff(1,2); + vgMat.z.x=A.rotation().coeff(2,0); + vgMat.z.y=A.rotation().coeff(2,1); + vgMat.z.z=A.rotation().coeff(2,2); + + //coordinates of rototaion matrix in victor format + vgVet.x=A.translation().coeff(0); + vgVet.y=A.translation().coeff(1); + vgVet.z=A.translation().coeff(2); + + //apply rotation + copia2->getAmino(0)[CA].addRot(vgMat); + copia2->sync(); + + //new coordinates after ONLY rotation + for (int c=0; c< smaller;c++) + { + CaCoordsSp2 = copia2->getAmino(c)[CA].getCoords(); + Matrix2(0,c)=CaCoordsSp2.x; + Matrix2(1,c)=CaCoordsSp2.y; + Matrix2(2,c)=CaCoordsSp2.z; + } + + //again Kabash after applied rotation + A= Find3DAffineTransform(Matrix2,Matrix1); + + //Victor format + vgVet.x=A.translation().coeff(0); + vgVet.y=A.translation().coeff(1); + vgVet.z=A.translation().coeff(2); + + //apply translation + copia2->getAmino(0)[CA].addTrans(vgVet); + copia2->sync(); + //calculate RMSD + for (int i=0; igetAmino(i)[CA].distance(copia2->getAmino(i)[CA]),2); + RMSD= sqrt((RMSD/smaller)); + + if (RMSDsave(ps); + outFile <<"ENDMDL"<save(ps); + outFile <<"ENDMDL"<save(ps2); + outFile.close(); + cout<<"The pdb file has been created."<. + */ +/* + * Author: Alberto Tilocca + * + */ +#ifndef CEFUNCTIONS_H +#define CEFUNCTIONS_H + +typedef struct { + int first; + int second; +} afp; +namespace Victor { + namespace Biopool { + class CEFunctions { + public: + //CONSTRUCTOR/DESTRUCTOR + CEFunctions(); + ~CEFunctions(); + static void stampa(); + static void innerDistance(Spacer* ,vector >& ); + static void createSMatrix(Spacer* ,Spacer*, int ,int ,vector >&,vector >& ,vector >&); + static void CESearch(Spacer*,Spacer*, vector >&,vector&,vector >&,int ,int,int,vector >&,vector >&,vector&); + static int finalSteps(Spacer*,Spacer*,vector >&,int ,vector&,int ,string ,string ); + + + protected: + + private: + + }; + } +} + +#endif /* CEFUNCTIONS_H */ + From 06cb87d760e4a7eeec379382e8c2682fcfbe90a7 Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 11:47:06 +0200 Subject: [PATCH 04/25] Create Eigen --- tools/Eigen | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/Eigen diff --git a/tools/Eigen b/tools/Eigen new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tools/Eigen @@ -0,0 +1 @@ + From 7286e338aae99b5216386a94a7b48b149d6469f6 Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 11:49:00 +0200 Subject: [PATCH 05/25] Delete Eigen --- tools/Eigen | 1 - 1 file changed, 1 deletion(-) delete mode 100644 tools/Eigen diff --git a/tools/Eigen b/tools/Eigen deleted file mode 100644 index 8b13789..0000000 --- a/tools/Eigen +++ /dev/null @@ -1 +0,0 @@ - From 0afc95f626af164837c6ef03fe38d7d773274323 Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 11:49:33 +0200 Subject: [PATCH 06/25] Create a --- tools/Eigen/a | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/Eigen/a diff --git a/tools/Eigen/a b/tools/Eigen/a new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tools/Eigen/a @@ -0,0 +1 @@ + From 5bf730a9d80449cd0879c271ef3b8c0b14277597 Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 11:51:42 +0200 Subject: [PATCH 07/25] Delete a --- tools/Eigen/a | 1 - 1 file changed, 1 deletion(-) delete mode 100644 tools/Eigen/a diff --git a/tools/Eigen/a b/tools/Eigen/a deleted file mode 100644 index 8b13789..0000000 --- a/tools/Eigen/a +++ /dev/null @@ -1 +0,0 @@ - From c80fe92bf761bb780efff4250ba8b7c2f87c2770 Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 13:58:57 +0200 Subject: [PATCH 08/25] add Eigen folders --- tools/Eigen/Array | 11 + tools/Eigen/CMakeLists.txt | 19 ++ tools/Eigen/Cholesky | 32 ++ tools/Eigen/CholmodSupport | 45 +++ tools/Eigen/Core | 376 ++++++++++++++++++++++++ tools/Eigen/Dense | 7 + tools/Eigen/Eigen | 2 + tools/Eigen/Eigen2Support | 95 ++++++ tools/Eigen/Eigenvalues | 48 +++ tools/Eigen/Geometry | 63 ++++ tools/Eigen/Householder | 23 ++ tools/Eigen/IterativeLinearSolvers | 40 +++ tools/Eigen/Jacobi | 26 ++ tools/Eigen/LU | 41 +++ tools/Eigen/LeastSquares | 32 ++ tools/Eigen/MetisSupport | 28 ++ tools/Eigen/OrderingMethods | 66 +++++ tools/Eigen/PaStiXSupport | 46 +++ tools/Eigen/PardisoSupport | 30 ++ tools/Eigen/QR | 45 +++ tools/Eigen/QtAlignedMalloc | 34 +++ tools/Eigen/SPQRSupport | 29 ++ tools/Eigen/Sparse | 27 ++ tools/Eigen/SparseCholesky | 47 +++ tools/Eigen/SparseCore | 64 ++++ tools/Eigen/SparseLU | 49 +++ tools/Eigen/SparseQR | 33 +++ tools/Eigen/src/Cholesky/CMakeLists.txt | 6 + 28 files changed, 1364 insertions(+) create mode 100644 tools/Eigen/Array create mode 100644 tools/Eigen/CMakeLists.txt create mode 100644 tools/Eigen/Cholesky create mode 100644 tools/Eigen/CholmodSupport create mode 100644 tools/Eigen/Core create mode 100644 tools/Eigen/Dense create mode 100644 tools/Eigen/Eigen create mode 100644 tools/Eigen/Eigen2Support create mode 100644 tools/Eigen/Eigenvalues create mode 100644 tools/Eigen/Geometry create mode 100644 tools/Eigen/Householder create mode 100644 tools/Eigen/IterativeLinearSolvers create mode 100644 tools/Eigen/Jacobi create mode 100644 tools/Eigen/LU create mode 100644 tools/Eigen/LeastSquares create mode 100644 tools/Eigen/MetisSupport create mode 100644 tools/Eigen/OrderingMethods create mode 100644 tools/Eigen/PaStiXSupport create mode 100644 tools/Eigen/PardisoSupport create mode 100644 tools/Eigen/QR create mode 100644 tools/Eigen/QtAlignedMalloc create mode 100644 tools/Eigen/SPQRSupport create mode 100644 tools/Eigen/Sparse create mode 100644 tools/Eigen/SparseCholesky create mode 100644 tools/Eigen/SparseCore create mode 100644 tools/Eigen/SparseLU create mode 100644 tools/Eigen/SparseQR create mode 100644 tools/Eigen/src/Cholesky/CMakeLists.txt diff --git a/tools/Eigen/Array b/tools/Eigen/Array new file mode 100644 index 0000000..3d004fb --- /dev/null +++ b/tools/Eigen/Array @@ -0,0 +1,11 @@ +#ifndef EIGEN_ARRAY_MODULE_H +#define EIGEN_ARRAY_MODULE_H + +// include Core first to handle Eigen2 support macros +#include "Core" + +#ifndef EIGEN2_SUPPORT + #error The Eigen/Array header does no longer exist in Eigen3. All that functionality has moved to Eigen/Core. +#endif + +#endif // EIGEN_ARRAY_MODULE_H diff --git a/tools/Eigen/CMakeLists.txt b/tools/Eigen/CMakeLists.txt new file mode 100644 index 0000000..a92dd6f --- /dev/null +++ b/tools/Eigen/CMakeLists.txt @@ -0,0 +1,19 @@ +include(RegexUtils) +test_escape_string_as_regex() + +file(GLOB Eigen_directory_files "*") + +escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") + +foreach(f ${Eigen_directory_files}) + if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/src") + list(APPEND Eigen_directory_files_to_install ${f}) + endif() +endforeach(f ${Eigen_directory_files}) + +install(FILES + ${Eigen_directory_files_to_install} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel + ) + +add_subdirectory(src) diff --git a/tools/Eigen/Cholesky b/tools/Eigen/Cholesky new file mode 100644 index 0000000..f727f5d --- /dev/null +++ b/tools/Eigen/Cholesky @@ -0,0 +1,32 @@ +#ifndef EIGEN_CHOLESKY_MODULE_H +#define EIGEN_CHOLESKY_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup Cholesky_Module Cholesky module + * + * + * + * This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices. + * Those decompositions are accessible via the following MatrixBase methods: + * - MatrixBase::llt(), + * - MatrixBase::ldlt() + * + * \code + * #include + * \endcode + */ + +#include "src/misc/Solve.h" +#include "src/Cholesky/LLT.h" +#include "src/Cholesky/LDLT.h" +#ifdef EIGEN_USE_LAPACKE +#include "src/Cholesky/LLT_MKL.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_CHOLESKY_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/tools/Eigen/CholmodSupport b/tools/Eigen/CholmodSupport new file mode 100644 index 0000000..88c29a6 --- /dev/null +++ b/tools/Eigen/CholmodSupport @@ -0,0 +1,45 @@ +#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H +#define EIGEN_CHOLMODSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { + #include +} + +/** \ingroup Support_modules + * \defgroup CholmodSupport_Module CholmodSupport module + * + * This module provides an interface to the Cholmod library which is part of the suitesparse package. + * It provides the two following main factorization classes: + * - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization. + * - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial). + * + * For the sake of completeness, this module also propose the two following classes: + * - class CholmodSimplicialLLT + * - class CholmodSimplicialLDLT + * Note that these classes does not bring any particular advantage compared to the built-in + * SimplicialLLT and SimplicialLDLT factorization classes. + * + * \code + * #include + * \endcode + * + * In order to use this module, the cholmod headers must be accessible from the include paths, and your binary must be linked to the cholmod library and its dependencies. + * The dependencies depend on how cholmod has been compiled. + * For a cmake based project, you can use our FindCholmod.cmake module to help you in this task. + * + */ + +#include "src/misc/Solve.h" +#include "src/misc/SparseSolve.h" + +#include "src/CholmodSupport/CholmodSupport.h" + + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_CHOLMODSUPPORT_MODULE_H + diff --git a/tools/Eigen/Core b/tools/Eigen/Core new file mode 100644 index 0000000..509c529 --- /dev/null +++ b/tools/Eigen/Core @@ -0,0 +1,376 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2007-2011 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CORE_H +#define EIGEN_CORE_H + +// first thing Eigen does: stop the compiler from committing suicide +#include "src/Core/util/DisableStupidWarnings.h" + +// then include this file where all our macros are defined. It's really important to do it first because +// it's where we do all the alignment settings (platform detection and honoring the user's will if he +// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization. +#include "src/Core/util/Macros.h" + +// Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3) +// See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details. +#if defined(__MINGW32__) && EIGEN_GNUC_AT_LEAST(4,6) + #pragma GCC optimize ("-fno-ipa-cp-clone") +#endif + +#include + +// this include file manages BLAS and MKL related macros +// and inclusion of their respective header files +#include "src/Core/util/MKL_support.h" + +// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into +// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks +#if !EIGEN_ALIGN + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif +#endif + +#ifdef _MSC_VER + #include // for _aligned_malloc -- need it regardless of whether vectorization is enabled + #if (_MSC_VER >= 1500) // 2008 or later + // Remember that usage of defined() in a #define is undefined by the standard. + // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP. + #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(_M_X64) + #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER + #endif + #endif +#else + // Remember that usage of defined() in a #define is undefined by the standard + #if (defined __SSE2__) && ( (!defined __GNUC__) || (defined __INTEL_COMPILER) || EIGEN_GNUC_AT_LEAST(4,2) ) + #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC + #endif +#endif + +#ifndef EIGEN_DONT_VECTORIZE + + #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER) + + // Defines symbols for compile-time detection of which instructions are + // used. + // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_SSE + #define EIGEN_VECTORIZE_SSE2 + + // Detect sse3/ssse3/sse4: + // gcc and icc defines __SSE3__, ... + // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you + // want to force the use of those instructions with msvc. + #ifdef __SSE3__ + #define EIGEN_VECTORIZE_SSE3 + #endif + #ifdef __SSSE3__ + #define EIGEN_VECTORIZE_SSSE3 + #endif + #ifdef __SSE4_1__ + #define EIGEN_VECTORIZE_SSE4_1 + #endif + #ifdef __SSE4_2__ + #define EIGEN_VECTORIZE_SSE4_2 + #endif + + // include files + + // This extern "C" works around a MINGW-w64 compilation issue + // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354 + // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do). + // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations + // with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know; + // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too. + // notice that since these are C headers, the extern "C" is theoretically needed anyways. + extern "C" { + // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly. + // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus: + #if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1110 + #include + #else + #include + #include + #ifdef EIGEN_VECTORIZE_SSE3 + #include + #endif + #ifdef EIGEN_VECTORIZE_SSSE3 + #include + #endif + #ifdef EIGEN_VECTORIZE_SSE4_1 + #include + #endif + #ifdef EIGEN_VECTORIZE_SSE4_2 + #include + #endif + #endif + } // end extern "C" + #elif defined __ALTIVEC__ + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_ALTIVEC + #include + // We need to #undef all these ugly tokens defined in + // => use __vector instead of vector + #undef bool + #undef vector + #undef pixel + #elif defined __ARM_NEON + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_NEON + #include + #endif +#endif + +#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE) + #define EIGEN_HAS_OPENMP +#endif + +#ifdef EIGEN_HAS_OPENMP +#include +#endif + +// MSVC for windows mobile does not have the errno.h file +#if !(defined(_MSC_VER) && defined(_WIN32_WCE)) && !defined(__ARMCC_VERSION) +#define EIGEN_HAS_ERRNO +#endif + +#ifdef EIGEN_HAS_ERRNO +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // for CHAR_BIT +// for min/max: +#include + +// for outputting debug info +#ifdef EIGEN_DEBUG_ASSIGN +#include +#endif + +// required for __cpuid, needs to be included after cmath +#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64)) && (!defined(_WIN32_WCE)) + #include +#endif + +#if defined(_CPPUNWIND) || defined(__EXCEPTIONS) + #define EIGEN_EXCEPTIONS +#endif + +#ifdef EIGEN_EXCEPTIONS + #include +#endif + +/** \brief Namespace containing all symbols from the %Eigen library. */ +namespace Eigen { + +inline static const char *SimdInstructionSetsInUse(void) { +#if defined(EIGEN_VECTORIZE_SSE4_2) + return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_SSE4_1) + return "SSE, SSE2, SSE3, SSSE3, SSE4.1"; +#elif defined(EIGEN_VECTORIZE_SSSE3) + return "SSE, SSE2, SSE3, SSSE3"; +#elif defined(EIGEN_VECTORIZE_SSE3) + return "SSE, SSE2, SSE3"; +#elif defined(EIGEN_VECTORIZE_SSE2) + return "SSE, SSE2"; +#elif defined(EIGEN_VECTORIZE_ALTIVEC) + return "AltiVec"; +#elif defined(EIGEN_VECTORIZE_NEON) + return "ARM NEON"; +#else + return "None"; +#endif +} + +} // end namespace Eigen + +#define STAGE10_FULL_EIGEN2_API 10 +#define STAGE20_RESOLVE_API_CONFLICTS 20 +#define STAGE30_FULL_EIGEN3_API 30 +#define STAGE40_FULL_EIGEN3_STRICTNESS 40 +#define STAGE99_NO_EIGEN2_SUPPORT 99 + +#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS + #define EIGEN2_SUPPORT + #define EIGEN2_SUPPORT_STAGE STAGE40_FULL_EIGEN3_STRICTNESS +#elif defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API + #define EIGEN2_SUPPORT + #define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API +#elif defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS + #define EIGEN2_SUPPORT + #define EIGEN2_SUPPORT_STAGE STAGE20_RESOLVE_API_CONFLICTS +#elif defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API + #define EIGEN2_SUPPORT + #define EIGEN2_SUPPORT_STAGE STAGE10_FULL_EIGEN2_API +#elif defined EIGEN2_SUPPORT + // default to stage 3, that's what it's always meant + #define EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API + #define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API +#else + #define EIGEN2_SUPPORT_STAGE STAGE99_NO_EIGEN2_SUPPORT +#endif + +#ifdef EIGEN2_SUPPORT +#undef minor +#endif + +// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to +// ensure QNX/QCC support +using std::size_t; +// gcc 4.6.0 wants std:: for ptrdiff_t +using std::ptrdiff_t; + +/** \defgroup Core_Module Core module + * This is the main module of Eigen providing dense matrix and vector support + * (both fixed and dynamic size) with all the features corresponding to a BLAS library + * and much more... + * + * \code + * #include + * \endcode + */ + +#include "src/Core/util/Constants.h" +#include "src/Core/util/ForwardDeclarations.h" +#include "src/Core/util/Meta.h" +#include "src/Core/util/StaticAssert.h" +#include "src/Core/util/XprHelper.h" +#include "src/Core/util/Memory.h" + +#include "src/Core/NumTraits.h" +#include "src/Core/MathFunctions.h" +#include "src/Core/GenericPacketMath.h" + +#if defined EIGEN_VECTORIZE_SSE + #include "src/Core/arch/SSE/PacketMath.h" + #include "src/Core/arch/SSE/MathFunctions.h" + #include "src/Core/arch/SSE/Complex.h" +#elif defined EIGEN_VECTORIZE_ALTIVEC + #include "src/Core/arch/AltiVec/PacketMath.h" + #include "src/Core/arch/AltiVec/Complex.h" +#elif defined EIGEN_VECTORIZE_NEON + #include "src/Core/arch/NEON/PacketMath.h" + #include "src/Core/arch/NEON/Complex.h" +#endif + +#include "src/Core/arch/Default/Settings.h" + +#include "src/Core/Functors.h" +#include "src/Core/DenseCoeffsBase.h" +#include "src/Core/DenseBase.h" +#include "src/Core/MatrixBase.h" +#include "src/Core/EigenBase.h" + +#ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874 + // at least confirmed with Doxygen 1.5.5 and 1.5.6 + #include "src/Core/Assign.h" +#endif + +#include "src/Core/util/BlasUtil.h" +#include "src/Core/DenseStorage.h" +#include "src/Core/NestByValue.h" +#include "src/Core/ForceAlignedAccess.h" +#include "src/Core/ReturnByValue.h" +#include "src/Core/NoAlias.h" +#include "src/Core/PlainObjectBase.h" +#include "src/Core/Matrix.h" +#include "src/Core/Array.h" +#include "src/Core/CwiseBinaryOp.h" +#include "src/Core/CwiseUnaryOp.h" +#include "src/Core/CwiseNullaryOp.h" +#include "src/Core/CwiseUnaryView.h" +#include "src/Core/SelfCwiseBinaryOp.h" +#include "src/Core/Dot.h" +#include "src/Core/StableNorm.h" +#include "src/Core/MapBase.h" +#include "src/Core/Stride.h" +#include "src/Core/Map.h" +#include "src/Core/Block.h" +#include "src/Core/VectorBlock.h" +#include "src/Core/Ref.h" +#include "src/Core/Transpose.h" +#include "src/Core/DiagonalMatrix.h" +#include "src/Core/Diagonal.h" +#include "src/Core/DiagonalProduct.h" +#include "src/Core/PermutationMatrix.h" +#include "src/Core/Transpositions.h" +#include "src/Core/Redux.h" +#include "src/Core/Visitor.h" +#include "src/Core/Fuzzy.h" +#include "src/Core/IO.h" +#include "src/Core/Swap.h" +#include "src/Core/CommaInitializer.h" +#include "src/Core/Flagged.h" +#include "src/Core/ProductBase.h" +#include "src/Core/GeneralProduct.h" +#include "src/Core/TriangularMatrix.h" +#include "src/Core/SelfAdjointView.h" +#include "src/Core/products/GeneralBlockPanelKernel.h" +#include "src/Core/products/Parallelizer.h" +#include "src/Core/products/CoeffBasedProduct.h" +#include "src/Core/products/GeneralMatrixVector.h" +#include "src/Core/products/GeneralMatrixMatrix.h" +#include "src/Core/SolveTriangular.h" +#include "src/Core/products/GeneralMatrixMatrixTriangular.h" +#include "src/Core/products/SelfadjointMatrixVector.h" +#include "src/Core/products/SelfadjointMatrixMatrix.h" +#include "src/Core/products/SelfadjointProduct.h" +#include "src/Core/products/SelfadjointRank2Update.h" +#include "src/Core/products/TriangularMatrixVector.h" +#include "src/Core/products/TriangularMatrixMatrix.h" +#include "src/Core/products/TriangularSolverMatrix.h" +#include "src/Core/products/TriangularSolverVector.h" +#include "src/Core/BandMatrix.h" +#include "src/Core/CoreIterators.h" + +#include "src/Core/BooleanRedux.h" +#include "src/Core/Select.h" +#include "src/Core/VectorwiseOp.h" +#include "src/Core/Random.h" +#include "src/Core/Replicate.h" +#include "src/Core/Reverse.h" +#include "src/Core/ArrayBase.h" +#include "src/Core/ArrayWrapper.h" + +#ifdef EIGEN_USE_BLAS +#include "src/Core/products/GeneralMatrixMatrix_MKL.h" +#include "src/Core/products/GeneralMatrixVector_MKL.h" +#include "src/Core/products/GeneralMatrixMatrixTriangular_MKL.h" +#include "src/Core/products/SelfadjointMatrixMatrix_MKL.h" +#include "src/Core/products/SelfadjointMatrixVector_MKL.h" +#include "src/Core/products/TriangularMatrixMatrix_MKL.h" +#include "src/Core/products/TriangularMatrixVector_MKL.h" +#include "src/Core/products/TriangularSolverMatrix_MKL.h" +#endif // EIGEN_USE_BLAS + +#ifdef EIGEN_USE_MKL_VML +#include "src/Core/Assign_MKL.h" +#endif + +#include "src/Core/GlobalFunctions.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#ifdef EIGEN2_SUPPORT +#include "Eigen2Support" +#endif + +#endif // EIGEN_CORE_H diff --git a/tools/Eigen/Dense b/tools/Eigen/Dense new file mode 100644 index 0000000..5768910 --- /dev/null +++ b/tools/Eigen/Dense @@ -0,0 +1,7 @@ +#include "Core" +#include "LU" +#include "Cholesky" +#include "QR" +#include "SVD" +#include "Geometry" +#include "Eigenvalues" diff --git a/tools/Eigen/Eigen b/tools/Eigen/Eigen new file mode 100644 index 0000000..19b40ea --- /dev/null +++ b/tools/Eigen/Eigen @@ -0,0 +1,2 @@ +#include "Dense" +//#include "Sparse" diff --git a/tools/Eigen/Eigen2Support b/tools/Eigen/Eigen2Support new file mode 100644 index 0000000..6aa009d --- /dev/null +++ b/tools/Eigen/Eigen2Support @@ -0,0 +1,95 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN2SUPPORT_H +#define EIGEN2SUPPORT_H + +#if (!defined(EIGEN2_SUPPORT)) || (!defined(EIGEN_CORE_H)) +#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header +#endif + +#ifndef EIGEN_NO_EIGEN2_DEPRECATED_WARNING + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) +#warning "Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)" +#else +#pragma message ("Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)") +#endif + +#endif // EIGEN_NO_EIGEN2_DEPRECATED_WARNING + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \ingroup Support_modules + * \defgroup Eigen2Support_Module Eigen2 support module + * + * \warning Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. + * + * This module provides a couple of deprecated functions improving the compatibility with Eigen2. + * + * To use it, define EIGEN2_SUPPORT before including any Eigen header + * \code + * #define EIGEN2_SUPPORT + * \endcode + * + */ + +#include "src/Eigen2Support/Macros.h" +#include "src/Eigen2Support/Memory.h" +#include "src/Eigen2Support/Meta.h" +#include "src/Eigen2Support/Lazy.h" +#include "src/Eigen2Support/Cwise.h" +#include "src/Eigen2Support/CwiseOperators.h" +#include "src/Eigen2Support/TriangularSolver.h" +#include "src/Eigen2Support/Block.h" +#include "src/Eigen2Support/VectorBlock.h" +#include "src/Eigen2Support/Minor.h" +#include "src/Eigen2Support/MathFunctions.h" + + +#include "src/Core/util/ReenableStupidWarnings.h" + +// Eigen2 used to include iostream +#include + +#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \ +using Eigen::Matrix##SizeSuffix##TypeSuffix; \ +using Eigen::Vector##SizeSuffix##TypeSuffix; \ +using Eigen::RowVector##SizeSuffix##TypeSuffix; + +#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \ +EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \ +EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \ +EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \ +EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \ + +#define EIGEN_USING_MATRIX_TYPEDEFS \ +EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \ +EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \ +EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \ +EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \ +EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd) + +#define USING_PART_OF_NAMESPACE_EIGEN \ +EIGEN_USING_MATRIX_TYPEDEFS \ +using Eigen::Matrix; \ +using Eigen::MatrixBase; \ +using Eigen::ei_random; \ +using Eigen::ei_real; \ +using Eigen::ei_imag; \ +using Eigen::ei_conj; \ +using Eigen::ei_abs; \ +using Eigen::ei_abs2; \ +using Eigen::ei_sqrt; \ +using Eigen::ei_exp; \ +using Eigen::ei_log; \ +using Eigen::ei_sin; \ +using Eigen::ei_cos; + +#endif // EIGEN2SUPPORT_H diff --git a/tools/Eigen/Eigenvalues b/tools/Eigen/Eigenvalues new file mode 100644 index 0000000..53c5a73 --- /dev/null +++ b/tools/Eigen/Eigenvalues @@ -0,0 +1,48 @@ +#ifndef EIGEN_EIGENVALUES_MODULE_H +#define EIGEN_EIGENVALUES_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include "Cholesky" +#include "Jacobi" +#include "Householder" +#include "LU" +#include "Geometry" + +/** \defgroup Eigenvalues_Module Eigenvalues module + * + * + * + * This module mainly provides various eigenvalue solvers. + * This module also provides some MatrixBase methods, including: + * - MatrixBase::eigenvalues(), + * - MatrixBase::operatorNorm() + * + * \code + * #include + * \endcode + */ + +#include "src/Eigenvalues/Tridiagonalization.h" +#include "src/Eigenvalues/RealSchur.h" +#include "src/Eigenvalues/EigenSolver.h" +#include "src/Eigenvalues/SelfAdjointEigenSolver.h" +#include "src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h" +#include "src/Eigenvalues/HessenbergDecomposition.h" +#include "src/Eigenvalues/ComplexSchur.h" +#include "src/Eigenvalues/ComplexEigenSolver.h" +#include "src/Eigenvalues/RealQZ.h" +#include "src/Eigenvalues/GeneralizedEigenSolver.h" +#include "src/Eigenvalues/MatrixBaseEigenvalues.h" +#ifdef EIGEN_USE_LAPACKE +#include "src/Eigenvalues/RealSchur_MKL.h" +#include "src/Eigenvalues/ComplexSchur_MKL.h" +#include "src/Eigenvalues/SelfAdjointEigenSolver_MKL.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_EIGENVALUES_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/tools/Eigen/Geometry b/tools/Eigen/Geometry new file mode 100644 index 0000000..efd9d45 --- /dev/null +++ b/tools/Eigen/Geometry @@ -0,0 +1,63 @@ +#ifndef EIGEN_GEOMETRY_MODULE_H +#define EIGEN_GEOMETRY_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include "SVD" +#include "LU" +#include + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +/** \defgroup Geometry_Module Geometry module + * + * + * + * This module provides support for: + * - fixed-size homogeneous transformations + * - translation, scaling, 2D and 3D rotations + * - quaternions + * - \ref MatrixBase::cross() "cross product" + * - \ref MatrixBase::unitOrthogonal() "orthognal vector generation" + * - some linear components: parametrized-lines and hyperplanes + * + * \code + * #include + * \endcode + */ + +#include "src/Geometry/OrthoMethods.h" +#include "src/Geometry/EulerAngles.h" + +#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS + #include "src/Geometry/Homogeneous.h" + #include "src/Geometry/RotationBase.h" + #include "src/Geometry/Rotation2D.h" + #include "src/Geometry/Quaternion.h" + #include "src/Geometry/AngleAxis.h" + #include "src/Geometry/Transform.h" + #include "src/Geometry/Translation.h" + #include "src/Geometry/Scaling.h" + #include "src/Geometry/Hyperplane.h" + #include "src/Geometry/ParametrizedLine.h" + #include "src/Geometry/AlignedBox.h" + #include "src/Geometry/Umeyama.h" + + #if defined EIGEN_VECTORIZE_SSE + #include "src/Geometry/arch/Geometry_SSE.h" + #endif +#endif + +#ifdef EIGEN2_SUPPORT +#include "src/Eigen2Support/Geometry/All.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_GEOMETRY_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ + diff --git a/tools/Eigen/Householder b/tools/Eigen/Householder new file mode 100644 index 0000000..6e348db --- /dev/null +++ b/tools/Eigen/Householder @@ -0,0 +1,23 @@ +#ifndef EIGEN_HOUSEHOLDER_MODULE_H +#define EIGEN_HOUSEHOLDER_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup Householder_Module Householder module + * This module provides Householder transformations. + * + * \code + * #include + * \endcode + */ + +#include "src/Householder/Householder.h" +#include "src/Householder/HouseholderSequence.h" +#include "src/Householder/BlockHouseholder.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_HOUSEHOLDER_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/tools/Eigen/IterativeLinearSolvers b/tools/Eigen/IterativeLinearSolvers new file mode 100644 index 0000000..0f4159d --- /dev/null +++ b/tools/Eigen/IterativeLinearSolvers @@ -0,0 +1,40 @@ +#ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H +#define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H + +#include "SparseCore" +#include "OrderingMethods" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** + * \defgroup IterativeLinearSolvers_Module IterativeLinearSolvers module + * + * This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse. + * Those solvers are accessible via the following classes: + * - ConjugateGradient for selfadjoint (hermitian) matrices, + * - BiCGSTAB for general square matrices. + * + * These iterative solvers are associated with some preconditioners: + * - IdentityPreconditioner - not really useful + * - DiagonalPreconditioner - also called JAcobi preconditioner, work very well on diagonal dominant matrices. + * - IncompleteILUT - incomplete LU factorization with dual thresholding + * + * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport. + * + * \code + * #include + * \endcode + */ + +#include "src/misc/Solve.h" +#include "src/misc/SparseSolve.h" + +#include "src/IterativeLinearSolvers/IterativeSolverBase.h" +#include "src/IterativeLinearSolvers/BasicPreconditioners.h" +#include "src/IterativeLinearSolvers/ConjugateGradient.h" +#include "src/IterativeLinearSolvers/BiCGSTAB.h" +#include "src/IterativeLinearSolvers/IncompleteLUT.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_ITERATIVELINEARSOLVERS_MODULE_H diff --git a/tools/Eigen/Jacobi b/tools/Eigen/Jacobi new file mode 100644 index 0000000..ba8a4dc --- /dev/null +++ b/tools/Eigen/Jacobi @@ -0,0 +1,26 @@ +#ifndef EIGEN_JACOBI_MODULE_H +#define EIGEN_JACOBI_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup Jacobi_Module Jacobi module + * This module provides Jacobi and Givens rotations. + * + * \code + * #include + * \endcode + * + * In addition to listed classes, it defines the two following MatrixBase methods to apply a Jacobi or Givens rotation: + * - MatrixBase::applyOnTheLeft() + * - MatrixBase::applyOnTheRight(). + */ + +#include "src/Jacobi/Jacobi.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_JACOBI_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ + diff --git a/tools/Eigen/LU b/tools/Eigen/LU new file mode 100644 index 0000000..db57955 --- /dev/null +++ b/tools/Eigen/LU @@ -0,0 +1,41 @@ +#ifndef EIGEN_LU_MODULE_H +#define EIGEN_LU_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup LU_Module LU module + * This module includes %LU decomposition and related notions such as matrix inversion and determinant. + * This module defines the following MatrixBase methods: + * - MatrixBase::inverse() + * - MatrixBase::determinant() + * + * \code + * #include + * \endcode + */ + +#include "src/misc/Solve.h" +#include "src/misc/Kernel.h" +#include "src/misc/Image.h" +#include "src/LU/FullPivLU.h" +#include "src/LU/PartialPivLU.h" +#ifdef EIGEN_USE_LAPACKE +#include "src/LU/PartialPivLU_MKL.h" +#endif +#include "src/LU/Determinant.h" +#include "src/LU/Inverse.h" + +#if defined EIGEN_VECTORIZE_SSE + #include "src/LU/arch/Inverse_SSE.h" +#endif + +#ifdef EIGEN2_SUPPORT + #include "src/Eigen2Support/LU.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_LU_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/tools/Eigen/LeastSquares b/tools/Eigen/LeastSquares new file mode 100644 index 0000000..35137c2 --- /dev/null +++ b/tools/Eigen/LeastSquares @@ -0,0 +1,32 @@ +#ifndef EIGEN_REGRESSION_MODULE_H +#define EIGEN_REGRESSION_MODULE_H + +#ifndef EIGEN2_SUPPORT +#error LeastSquares is only available in Eigen2 support mode (define EIGEN2_SUPPORT) +#endif + +// exclude from normal eigen3-only documentation +#ifdef EIGEN2_SUPPORT + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include "Eigenvalues" +#include "Geometry" + +/** \defgroup LeastSquares_Module LeastSquares module + * This module provides linear regression and related features. + * + * \code + * #include + * \endcode + */ + +#include "src/Eigen2Support/LeastSquares.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN2_SUPPORT + +#endif // EIGEN_REGRESSION_MODULE_H diff --git a/tools/Eigen/MetisSupport b/tools/Eigen/MetisSupport new file mode 100644 index 0000000..6a113f7 --- /dev/null +++ b/tools/Eigen/MetisSupport @@ -0,0 +1,28 @@ +#ifndef EIGEN_METISSUPPORT_MODULE_H +#define EIGEN_METISSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include +} + + +/** \ingroup Support_modules + * \defgroup MetisSupport_Module MetisSupport module + * + * \code + * #include + * \endcode + * This module defines an interface to the METIS reordering package (http://glaros.dtc.umn.edu/gkhome/views/metis). + * It can be used just as any other built-in method as explained in \link OrderingMethods_Module here. \endlink + */ + + +#include "src/MetisSupport/MetisSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_METISSUPPORT_MODULE_H diff --git a/tools/Eigen/OrderingMethods b/tools/Eigen/OrderingMethods new file mode 100644 index 0000000..7c0f1ff --- /dev/null +++ b/tools/Eigen/OrderingMethods @@ -0,0 +1,66 @@ +#ifndef EIGEN_ORDERINGMETHODS_MODULE_H +#define EIGEN_ORDERINGMETHODS_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** + * \defgroup OrderingMethods_Module OrderingMethods module + * + * This module is currently for internal use only + * + * It defines various built-in and external ordering methods for sparse matrices. + * They are typically used to reduce the number of elements during + * the sparse matrix decomposition (LLT, LU, QR). + * Precisely, in a preprocessing step, a permutation matrix P is computed using + * those ordering methods and applied to the columns of the matrix. + * Using for instance the sparse Cholesky decomposition, it is expected that + * the nonzeros elements in LLT(A*P) will be much smaller than that in LLT(A). + * + * + * Usage : + * \code + * #include + * \endcode + * + * A simple usage is as a template parameter in the sparse decomposition classes : + * + * \code + * SparseLU > solver; + * \endcode + * + * \code + * SparseQR > solver; + * \endcode + * + * It is possible as well to call directly a particular ordering method for your own purpose, + * \code + * AMDOrdering ordering; + * PermutationMatrix perm; + * SparseMatrix A; + * //Fill the matrix ... + * + * ordering(A, perm); // Call AMD + * \endcode + * + * \note Some of these methods (like AMD or METIS), need the sparsity pattern + * of the input matrix to be symmetric. When the matrix is structurally unsymmetric, + * Eigen computes internally the pattern of \f$A^T*A\f$ before calling the method. + * If your matrix is already symmetric (at leat in structure), you can avoid that + * by calling the method with a SelfAdjointView type. + * + * \code + * // Call the ordering on the pattern of the lower triangular matrix A + * ordering(A.selfadjointView(), perm); + * \endcode + */ + +#ifndef EIGEN_MPL2_ONLY +#include "src/OrderingMethods/Amd.h" +#endif + +#include "src/OrderingMethods/Ordering.h" +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_ORDERINGMETHODS_MODULE_H diff --git a/tools/Eigen/PaStiXSupport b/tools/Eigen/PaStiXSupport new file mode 100644 index 0000000..7c616ee --- /dev/null +++ b/tools/Eigen/PaStiXSupport @@ -0,0 +1,46 @@ +#ifndef EIGEN_PASTIXSUPPORT_MODULE_H +#define EIGEN_PASTIXSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include +extern "C" { +#include +#include +} + +#ifdef complex +#undef complex +#endif + +/** \ingroup Support_modules + * \defgroup PaStiXSupport_Module PaStiXSupport module + * + * This module provides an interface to the PaSTiX library. + * PaSTiX is a general \b supernodal, \b parallel and \b opensource sparse solver. + * It provides the two following main factorization classes: + * - class PastixLLT : a supernodal, parallel LLt Cholesky factorization. + * - class PastixLDLT: a supernodal, parallel LDLt Cholesky factorization. + * - class PastixLU : a supernodal, parallel LU factorization (optimized for a symmetric pattern). + * + * \code + * #include + * \endcode + * + * In order to use this module, the PaSTiX headers must be accessible from the include paths, and your binary must be linked to the PaSTiX library and its dependencies. + * The dependencies depend on how PaSTiX has been compiled. + * For a cmake based project, you can use our FindPaSTiX.cmake module to help you in this task. + * + */ + +#include "src/misc/Solve.h" +#include "src/misc/SparseSolve.h" + +#include "src/PaStiXSupport/PaStiXSupport.h" + + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_PASTIXSUPPORT_MODULE_H diff --git a/tools/Eigen/PardisoSupport b/tools/Eigen/PardisoSupport new file mode 100644 index 0000000..99330ce --- /dev/null +++ b/tools/Eigen/PardisoSupport @@ -0,0 +1,30 @@ +#ifndef EIGEN_PARDISOSUPPORT_MODULE_H +#define EIGEN_PARDISOSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include + +#include + +/** \ingroup Support_modules + * \defgroup PardisoSupport_Module PardisoSupport module + * + * This module brings support for the Intel(R) MKL PARDISO direct sparse solvers. + * + * \code + * #include + * \endcode + * + * In order to use this module, the MKL headers must be accessible from the include paths, and your binary must be linked to the MKL library and its dependencies. + * See this \ref TopicUsingIntelMKL "page" for more information on MKL-Eigen integration. + * + */ + +#include "src/PardisoSupport/PardisoSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_PARDISOSUPPORT_MODULE_H diff --git a/tools/Eigen/QR b/tools/Eigen/QR new file mode 100644 index 0000000..ac5b026 --- /dev/null +++ b/tools/Eigen/QR @@ -0,0 +1,45 @@ +#ifndef EIGEN_QR_MODULE_H +#define EIGEN_QR_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include "Cholesky" +#include "Jacobi" +#include "Householder" + +/** \defgroup QR_Module QR module + * + * + * + * This module provides various QR decompositions + * This module also provides some MatrixBase methods, including: + * - MatrixBase::qr(), + * + * \code + * #include + * \endcode + */ + +#include "src/misc/Solve.h" +#include "src/QR/HouseholderQR.h" +#include "src/QR/FullPivHouseholderQR.h" +#include "src/QR/ColPivHouseholderQR.h" +#ifdef EIGEN_USE_LAPACKE +#include "src/QR/HouseholderQR_MKL.h" +#include "src/QR/ColPivHouseholderQR_MKL.h" +#endif + +#ifdef EIGEN2_SUPPORT +#include "src/Eigen2Support/QR.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#ifdef EIGEN2_SUPPORT +#include "Eigenvalues" +#endif + +#endif // EIGEN_QR_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/tools/Eigen/QtAlignedMalloc b/tools/Eigen/QtAlignedMalloc new file mode 100644 index 0000000..46f7d83 --- /dev/null +++ b/tools/Eigen/QtAlignedMalloc @@ -0,0 +1,34 @@ + +#ifndef EIGEN_QTMALLOC_MODULE_H +#define EIGEN_QTMALLOC_MODULE_H + +#include "Core" + +#if (!EIGEN_MALLOC_ALREADY_ALIGNED) + +#include "src/Core/util/DisableStupidWarnings.h" + +void *qMalloc(size_t size) +{ + return Eigen::internal::aligned_malloc(size); +} + +void qFree(void *ptr) +{ + Eigen::internal::aligned_free(ptr); +} + +void *qRealloc(void *ptr, size_t size) +{ + void* newPtr = Eigen::internal::aligned_malloc(size); + memcpy(newPtr, ptr, size); + Eigen::internal::aligned_free(ptr); + return newPtr; +} + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif + +#endif // EIGEN_QTMALLOC_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/tools/Eigen/SPQRSupport b/tools/Eigen/SPQRSupport new file mode 100644 index 0000000..7f1eb47 --- /dev/null +++ b/tools/Eigen/SPQRSupport @@ -0,0 +1,29 @@ +#ifndef EIGEN_SPQRSUPPORT_MODULE_H +#define EIGEN_SPQRSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include "SuiteSparseQR.hpp" + +/** \ingroup Support_modules + * \defgroup SPQRSupport_Module SuiteSparseQR module + * + * This module provides an interface to the SPQR library, which is part of the suitesparse package. + * + * \code + * #include + * \endcode + * + * In order to use this module, the SPQR headers must be accessible from the include paths, and your binary must be linked to the SPQR library and its dependencies (Cholmod, AMD, COLAMD,...). + * For a cmake based project, you can use our FindSPQR.cmake and FindCholmod.Cmake modules + * + */ + +#include "src/misc/Solve.h" +#include "src/misc/SparseSolve.h" +#include "src/CholmodSupport/CholmodSupport.h" +#include "src/SPQRSupport/SuiteSparseQRSupport.h" + +#endif diff --git a/tools/Eigen/Sparse b/tools/Eigen/Sparse new file mode 100644 index 0000000..7cc9c09 --- /dev/null +++ b/tools/Eigen/Sparse @@ -0,0 +1,27 @@ +#ifndef EIGEN_SPARSE_MODULE_H +#define EIGEN_SPARSE_MODULE_H + +/** \defgroup Sparse_Module Sparse meta-module + * + * Meta-module including all related modules: + * - \ref SparseCore_Module + * - \ref OrderingMethods_Module + * - \ref SparseCholesky_Module + * - \ref SparseLU_Module + * - \ref SparseQR_Module + * - \ref IterativeLinearSolvers_Module + * + * \code + * #include + * \endcode + */ + +#include "SparseCore" +#include "OrderingMethods" +#include "SparseCholesky" +#include "SparseLU" +#include "SparseQR" +#include "IterativeLinearSolvers" + +#endif // EIGEN_SPARSE_MODULE_H + diff --git a/tools/Eigen/SparseCholesky b/tools/Eigen/SparseCholesky new file mode 100644 index 0000000..9f5056a --- /dev/null +++ b/tools/Eigen/SparseCholesky @@ -0,0 +1,47 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2013 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSECHOLESKY_MODULE_H +#define EIGEN_SPARSECHOLESKY_MODULE_H + +#include "SparseCore" +#include "OrderingMethods" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** + * \defgroup SparseCholesky_Module SparseCholesky module + * + * This module currently provides two variants of the direct sparse Cholesky decomposition for selfadjoint (hermitian) matrices. + * Those decompositions are accessible via the following classes: + * - SimplicialLLt, + * - SimplicialLDLt + * + * Such problems can also be solved using the ConjugateGradient solver from the IterativeLinearSolvers module. + * + * \code + * #include + * \endcode + */ + +#ifdef EIGEN_MPL2_ONLY +#error The SparseCholesky module has nothing to offer in MPL2 only mode +#endif + +#include "src/misc/Solve.h" +#include "src/misc/SparseSolve.h" +#include "src/SparseCholesky/SimplicialCholesky.h" + +#ifndef EIGEN_MPL2_ONLY +#include "src/SparseCholesky/SimplicialCholesky_impl.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SPARSECHOLESKY_MODULE_H diff --git a/tools/Eigen/SparseCore b/tools/Eigen/SparseCore new file mode 100644 index 0000000..24bcf01 --- /dev/null +++ b/tools/Eigen/SparseCore @@ -0,0 +1,64 @@ +#ifndef EIGEN_SPARSECORE_MODULE_H +#define EIGEN_SPARSECORE_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include +#include +#include +#include +#include + +/** + * \defgroup SparseCore_Module SparseCore module + * + * This module provides a sparse matrix representation, and basic associated matrix manipulations + * and operations. + * + * See the \ref TutorialSparse "Sparse tutorial" + * + * \code + * #include + * \endcode + * + * This module depends on: Core. + */ + +namespace Eigen { + +/** The type used to identify a general sparse storage. */ +struct Sparse {}; + +} + +#include "src/SparseCore/SparseUtil.h" +#include "src/SparseCore/SparseMatrixBase.h" +#include "src/SparseCore/CompressedStorage.h" +#include "src/SparseCore/AmbiVector.h" +#include "src/SparseCore/SparseMatrix.h" +#include "src/SparseCore/MappedSparseMatrix.h" +#include "src/SparseCore/SparseVector.h" +#include "src/SparseCore/SparseBlock.h" +#include "src/SparseCore/SparseTranspose.h" +#include "src/SparseCore/SparseCwiseUnaryOp.h" +#include "src/SparseCore/SparseCwiseBinaryOp.h" +#include "src/SparseCore/SparseDot.h" +#include "src/SparseCore/SparsePermutation.h" +#include "src/SparseCore/SparseRedux.h" +#include "src/SparseCore/SparseFuzzy.h" +#include "src/SparseCore/ConservativeSparseSparseProduct.h" +#include "src/SparseCore/SparseSparseProductWithPruning.h" +#include "src/SparseCore/SparseProduct.h" +#include "src/SparseCore/SparseDenseProduct.h" +#include "src/SparseCore/SparseDiagonalProduct.h" +#include "src/SparseCore/SparseTriangularView.h" +#include "src/SparseCore/SparseSelfAdjointView.h" +#include "src/SparseCore/TriangularSolver.h" +#include "src/SparseCore/SparseView.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SPARSECORE_MODULE_H + diff --git a/tools/Eigen/SparseLU b/tools/Eigen/SparseLU new file mode 100644 index 0000000..8527a49 --- /dev/null +++ b/tools/Eigen/SparseLU @@ -0,0 +1,49 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Dรฉsirรฉ Nuentsa-Wakam +// Copyright (C) 2012 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSELU_MODULE_H +#define EIGEN_SPARSELU_MODULE_H + +#include "SparseCore" + +/** + * \defgroup SparseLU_Module SparseLU module + * This module defines a supernodal factorization of general sparse matrices. + * The code is fully optimized for supernode-panel updates with specialized kernels. + * Please, see the documentation of the SparseLU class for more details. + */ + +#include "src/misc/Solve.h" +#include "src/misc/SparseSolve.h" + +// Ordering interface +#include "OrderingMethods" + +#include "src/SparseLU/SparseLU_gemm_kernel.h" + +#include "src/SparseLU/SparseLU_Structs.h" +#include "src/SparseLU/SparseLU_SupernodalMatrix.h" +#include "src/SparseLU/SparseLUImpl.h" +#include "src/SparseCore/SparseColEtree.h" +#include "src/SparseLU/SparseLU_Memory.h" +#include "src/SparseLU/SparseLU_heap_relax_snode.h" +#include "src/SparseLU/SparseLU_relax_snode.h" +#include "src/SparseLU/SparseLU_pivotL.h" +#include "src/SparseLU/SparseLU_panel_dfs.h" +#include "src/SparseLU/SparseLU_kernel_bmod.h" +#include "src/SparseLU/SparseLU_panel_bmod.h" +#include "src/SparseLU/SparseLU_column_dfs.h" +#include "src/SparseLU/SparseLU_column_bmod.h" +#include "src/SparseLU/SparseLU_copy_to_ucol.h" +#include "src/SparseLU/SparseLU_pruneL.h" +#include "src/SparseLU/SparseLU_Utils.h" +#include "src/SparseLU/SparseLU.h" + +#endif // EIGEN_SPARSELU_MODULE_H diff --git a/tools/Eigen/SparseQR b/tools/Eigen/SparseQR new file mode 100644 index 0000000..4ee4206 --- /dev/null +++ b/tools/Eigen/SparseQR @@ -0,0 +1,33 @@ +#ifndef EIGEN_SPARSEQR_MODULE_H +#define EIGEN_SPARSEQR_MODULE_H + +#include "SparseCore" +#include "OrderingMethods" +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup SparseQR_Module SparseQR module + * \brief Provides QR decomposition for sparse matrices + * + * This module provides a simplicial version of the left-looking Sparse QR decomposition. + * The columns of the input matrix should be reordered to limit the fill-in during the + * decomposition. Built-in methods (COLAMD, AMD) or external methods (METIS) can be used to this end. + * See the \link OrderingMethods_Module OrderingMethods\endlink module for the list + * of built-in and external ordering methods. + * + * \code + * #include + * \endcode + * + * + */ + +#include "src/misc/Solve.h" +#include "src/misc/SparseSolve.h" + +#include "OrderingMethods" +#include "src/SparseCore/SparseColEtree.h" +#include "src/SparseQR/SparseQR.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif diff --git a/tools/Eigen/src/Cholesky/CMakeLists.txt b/tools/Eigen/src/Cholesky/CMakeLists.txt new file mode 100644 index 0000000..d01488b --- /dev/null +++ b/tools/Eigen/src/Cholesky/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_Cholesky_SRCS "*.h") + +INSTALL(FILES + ${Eigen_Cholesky_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Cholesky COMPONENT Devel + ) From bb9a463b73fb1b4da432e34157a12c92e6cb80fa Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 14:07:32 +0200 Subject: [PATCH 09/25] add more eigen files --- tools/Eigen/src/Cholesky/LDLT.h | 611 ++++++++++++++++++++++++++++++++ 1 file changed, 611 insertions(+) create mode 100644 tools/Eigen/src/Cholesky/LDLT.h diff --git a/tools/Eigen/src/Cholesky/LDLT.h b/tools/Eigen/src/Cholesky/LDLT.h new file mode 100644 index 0000000..abd30bd --- /dev/null +++ b/tools/Eigen/src/Cholesky/LDLT.h @@ -0,0 +1,611 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// Copyright (C) 2009 Keir Mierle +// Copyright (C) 2009 Benoit Jacob +// Copyright (C) 2011 Timothy E. Holy +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LDLT_H +#define EIGEN_LDLT_H + +namespace Eigen { + +namespace internal { + template struct LDLT_Traits; + + // PositiveSemiDef means positive semi-definite and non-zero; same for NegativeSemiDef + enum SignMatrix { PositiveSemiDef, NegativeSemiDef, ZeroSign, Indefinite }; +} + +/** \ingroup Cholesky_Module + * + * \class LDLT + * + * \brief Robust Cholesky decomposition of a matrix with pivoting + * + * \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition + * \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. + * The other triangular part won't be read. + * + * Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite + * matrix \f$ A \f$ such that \f$ A = P^TLDL^*P \f$, where P is a permutation matrix, L + * is lower triangular with a unit diagonal and D is a diagonal matrix. + * + * The decomposition uses pivoting to ensure stability, so that L will have + * zeros in the bottom right rank(A) - n submatrix. Avoiding the square root + * on D also stabilizes the computation. + * + * Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky + * decomposition to determine whether a system of equations has a solution. + * + * \sa MatrixBase::ldlt(), class LLT + */ +template class LDLT +{ + public: + typedef _MatrixType MatrixType; + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + Options = MatrixType::Options & ~RowMajorBit, // these are the options for the TmpMatrixType, we need a ColMajor matrix here! + MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, + UpLo = _UpLo + }; + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef typename MatrixType::Index Index; + typedef Matrix TmpMatrixType; + + typedef Transpositions TranspositionType; + typedef PermutationMatrix PermutationType; + + typedef internal::LDLT_Traits Traits; + + /** \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via LDLT::compute(const MatrixType&). + */ + LDLT() + : m_matrix(), + m_transpositions(), + m_sign(internal::ZeroSign), + m_isInitialized(false) + {} + + /** \brief Default Constructor with memory preallocation + * + * Like the default constructor but with preallocation of the internal data + * according to the specified problem \a size. + * \sa LDLT() + */ + LDLT(Index size) + : m_matrix(size, size), + m_transpositions(size), + m_temporary(size), + m_sign(internal::ZeroSign), + m_isInitialized(false) + {} + + /** \brief Constructor with decomposition + * + * This calculates the decomposition for the input \a matrix. + * \sa LDLT(Index size) + */ + LDLT(const MatrixType& matrix) + : m_matrix(matrix.rows(), matrix.cols()), + m_transpositions(matrix.rows()), + m_temporary(matrix.rows()), + m_sign(internal::ZeroSign), + m_isInitialized(false) + { + compute(matrix); + } + + /** Clear any existing decomposition + * \sa rankUpdate(w,sigma) + */ + void setZero() + { + m_isInitialized = false; + } + + /** \returns a view of the upper triangular matrix U */ + inline typename Traits::MatrixU matrixU() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return Traits::getU(m_matrix); + } + + /** \returns a view of the lower triangular matrix L */ + inline typename Traits::MatrixL matrixL() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return Traits::getL(m_matrix); + } + + /** \returns the permutation matrix P as a transposition sequence. + */ + inline const TranspositionType& transpositionsP() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_transpositions; + } + + /** \returns the coefficients of the diagonal matrix D */ + inline Diagonal vectorD() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_matrix.diagonal(); + } + + /** \returns true if the matrix is positive (semidefinite) */ + inline bool isPositive() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign; + } + + #ifdef EIGEN2_SUPPORT + inline bool isPositiveDefinite() const + { + return isPositive(); + } + #endif + + /** \returns true if the matrix is negative (semidefinite) */ + inline bool isNegative(void) const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_sign == internal::NegativeSemiDef || m_sign == internal::ZeroSign; + } + + /** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A. + * + * This function also supports in-place solves using the syntax x = decompositionObject.solve(x) . + * + * \note_about_checking_solutions + * + * More precisely, this method solves \f$ A x = b \f$ using the decomposition \f$ A = P^T L D L^* P \f$ + * by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$, + * \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then + * \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the + * least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function + * computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular. + * + * \sa MatrixBase::ldlt() + */ + template + inline const internal::solve_retval + solve(const MatrixBase& b) const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + eigen_assert(m_matrix.rows()==b.rows() + && "LDLT::solve(): invalid number of rows of the right hand side matrix b"); + return internal::solve_retval(*this, b.derived()); + } + + #ifdef EIGEN2_SUPPORT + template + bool solve(const MatrixBase& b, ResultType *result) const + { + *result = this->solve(b); + return true; + } + #endif + + template + bool solveInPlace(MatrixBase &bAndX) const; + + LDLT& compute(const MatrixType& matrix); + + template + LDLT& rankUpdate(const MatrixBase& w, const RealScalar& alpha=1); + + /** \returns the internal LDLT decomposition matrix + * + * TODO: document the storage layout + */ + inline const MatrixType& matrixLDLT() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_matrix; + } + + MatrixType reconstructedMatrix() const; + + inline Index rows() const { return m_matrix.rows(); } + inline Index cols() const { return m_matrix.cols(); } + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the matrix.appears to be negative. + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return Success; + } + + protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + + /** \internal + * Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U. + * The strict upper part is used during the decomposition, the strict lower + * part correspond to the coefficients of L (its diagonal is equal to 1 and + * is not stored), and the diagonal entries correspond to D. + */ + MatrixType m_matrix; + TranspositionType m_transpositions; + TmpMatrixType m_temporary; + internal::SignMatrix m_sign; + bool m_isInitialized; +}; + +namespace internal { + +template struct ldlt_inplace; + +template<> struct ldlt_inplace +{ + template + static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign) + { + using std::abs; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::Index Index; + eigen_assert(mat.rows()==mat.cols()); + const Index size = mat.rows(); + + if (size <= 1) + { + transpositions.setIdentity(); + if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef; + else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef; + else sign = ZeroSign; + return true; + } + + for (Index k = 0; k < size; ++k) + { + // Find largest diagonal element + Index index_of_biggest_in_corner; + mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner); + index_of_biggest_in_corner += k; + + transpositions.coeffRef(k) = index_of_biggest_in_corner; + if(k != index_of_biggest_in_corner) + { + // apply the transposition while taking care to consider only + // the lower triangular part + Index s = size-index_of_biggest_in_corner-1; // trailing size after the biggest element + mat.row(k).head(k).swap(mat.row(index_of_biggest_in_corner).head(k)); + mat.col(k).tail(s).swap(mat.col(index_of_biggest_in_corner).tail(s)); + std::swap(mat.coeffRef(k,k),mat.coeffRef(index_of_biggest_in_corner,index_of_biggest_in_corner)); + for(int i=k+1;i::IsComplex) + mat.coeffRef(index_of_biggest_in_corner,k) = numext::conj(mat.coeff(index_of_biggest_in_corner,k)); + } + + // partition the matrix: + // A00 | - | - + // lu = A10 | A11 | - + // A20 | A21 | A22 + Index rs = size - k - 1; + Block A21(mat,k+1,k,rs,1); + Block A10(mat,k,0,1,k); + Block A20(mat,k+1,0,rs,k); + + if(k>0) + { + temp.head(k) = mat.diagonal().real().head(k).asDiagonal() * A10.adjoint(); + mat.coeffRef(k,k) -= (A10 * temp.head(k)).value(); + if(rs>0) + A21.noalias() -= A20 * temp.head(k); + } + + // In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot + // was smaller than the cutoff value. However, soince LDLT is not rank-revealing + // we should only make sure we do not introduce INF or NaN values. + // LAPACK also uses 0 as the cutoff value. + RealScalar realAkk = numext::real(mat.coeffRef(k,k)); + if((rs>0) && (abs(realAkk) > RealScalar(0))) + A21 /= realAkk; + + if (sign == PositiveSemiDef) { + if (realAkk < 0) sign = Indefinite; + } else if (sign == NegativeSemiDef) { + if (realAkk > 0) sign = Indefinite; + } else if (sign == ZeroSign) { + if (realAkk > 0) sign = PositiveSemiDef; + else if (realAkk < 0) sign = NegativeSemiDef; + } + } + + return true; + } + + // Reference for the algorithm: Davis and Hager, "Multiple Rank + // Modifications of a Sparse Cholesky Factorization" (Algorithm 1) + // Trivial rearrangements of their computations (Timothy E. Holy) + // allow their algorithm to work for rank-1 updates even if the + // original matrix is not of full rank. + // Here only rank-1 updates are implemented, to reduce the + // requirement for intermediate storage and improve accuracy + template + static bool updateInPlace(MatrixType& mat, MatrixBase& w, const typename MatrixType::RealScalar& sigma=1) + { + using numext::isfinite; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::Index Index; + + const Index size = mat.rows(); + eigen_assert(mat.cols() == size && w.size()==size); + + RealScalar alpha = 1; + + // Apply the update + for (Index j = 0; j < size; j++) + { + // Check for termination due to an original decomposition of low-rank + if (!(isfinite)(alpha)) + break; + + // Update the diagonal terms + RealScalar dj = numext::real(mat.coeff(j,j)); + Scalar wj = w.coeff(j); + RealScalar swj2 = sigma*numext::abs2(wj); + RealScalar gamma = dj*alpha + swj2; + + mat.coeffRef(j,j) += swj2/alpha; + alpha += swj2/dj; + + + // Update the terms of L + Index rs = size-j-1; + w.tail(rs) -= wj * mat.col(j).tail(rs); + if(gamma != 0) + mat.col(j).tail(rs) += (sigma*numext::conj(wj)/gamma)*w.tail(rs); + } + return true; + } + + template + static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, const typename MatrixType::RealScalar& sigma=1) + { + // Apply the permutation to the input w + tmp = transpositions * w; + + return ldlt_inplace::updateInPlace(mat,tmp,sigma); + } +}; + +template<> struct ldlt_inplace +{ + template + static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign) + { + Transpose matt(mat); + return ldlt_inplace::unblocked(matt, transpositions, temp, sign); + } + + template + static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, const typename MatrixType::RealScalar& sigma=1) + { + Transpose matt(mat); + return ldlt_inplace::update(matt, transpositions, tmp, w.conjugate(), sigma); + } +}; + +template struct LDLT_Traits +{ + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return m; } + static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } +}; + +template struct LDLT_Traits +{ + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); } + static inline MatrixU getU(const MatrixType& m) { return m; } +}; + +} // end namespace internal + +/** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix + */ +template +LDLT& LDLT::compute(const MatrixType& a) +{ + check_template_parameters(); + + eigen_assert(a.rows()==a.cols()); + const Index size = a.rows(); + + m_matrix = a; + + m_transpositions.resize(size); + m_isInitialized = false; + m_temporary.resize(size); + m_sign = internal::ZeroSign; + + internal::ldlt_inplace::unblocked(m_matrix, m_transpositions, m_temporary, m_sign); + + m_isInitialized = true; + return *this; +} + +/** Update the LDLT decomposition: given A = L D L^T, efficiently compute the decomposition of A + sigma w w^T. + * \param w a vector to be incorporated into the decomposition. + * \param sigma a scalar, +1 for updates and -1 for "downdates," which correspond to removing previously-added column vectors. Optional; default value is +1. + * \sa setZero() + */ +template +template +LDLT& LDLT::rankUpdate(const MatrixBase& w, const typename LDLT::RealScalar& sigma) +{ + const Index size = w.rows(); + if (m_isInitialized) + { + eigen_assert(m_matrix.rows()==size); + } + else + { + m_matrix.resize(size,size); + m_matrix.setZero(); + m_transpositions.resize(size); + for (Index i = 0; i < size; i++) + m_transpositions.coeffRef(i) = i; + m_temporary.resize(size); + m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef; + m_isInitialized = true; + } + + internal::ldlt_inplace::update(m_matrix, m_transpositions, m_temporary, w, sigma); + + return *this; +} + +namespace internal { +template +struct solve_retval, Rhs> + : solve_retval_base, Rhs> +{ + typedef LDLT<_MatrixType,_UpLo> LDLTType; + EIGEN_MAKE_SOLVE_HELPERS(LDLTType,Rhs) + + template void evalTo(Dest& dst) const + { + eigen_assert(rhs().rows() == dec().matrixLDLT().rows()); + // dst = P b + dst = dec().transpositionsP() * rhs(); + + // dst = L^-1 (P b) + dec().matrixL().solveInPlace(dst); + + // dst = D^-1 (L^-1 P b) + // more precisely, use pseudo-inverse of D (see bug 241) + using std::abs; + using std::max; + typedef typename LDLTType::MatrixType MatrixType; + typedef typename LDLTType::RealScalar RealScalar; + const typename Diagonal::RealReturnType vectorD(dec().vectorD()); + // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon + // as motivated by LAPACK's xGELSS: + // RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() *NumTraits::epsilon(),RealScalar(1) / NumTraits::highest()); + // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest + // diagonal element is not well justified and to numerical issues in some cases. + // Moreover, Lapack's xSYTRS routines use 0 for the tolerance. + RealScalar tolerance = RealScalar(1) / NumTraits::highest(); + + for (Index i = 0; i < vectorD.size(); ++i) { + if(abs(vectorD(i)) > tolerance) + dst.row(i) /= vectorD(i); + else + dst.row(i).setZero(); + } + + // dst = L^-T (D^-1 L^-1 P b) + dec().matrixU().solveInPlace(dst); + + // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b + dst = dec().transpositionsP().transpose() * dst; + } +}; +} + +/** \internal use x = ldlt_object.solve(x); + * + * This is the \em in-place version of solve(). + * + * \param bAndX represents both the right-hand side matrix b and result x. + * + * \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD. + * + * This version avoids a copy when the right hand side matrix b is not + * needed anymore. + * + * \sa LDLT::solve(), MatrixBase::ldlt() + */ +template +template +bool LDLT::solveInPlace(MatrixBase &bAndX) const +{ + eigen_assert(m_isInitialized && "LDLT is not initialized."); + eigen_assert(m_matrix.rows() == bAndX.rows()); + + bAndX = this->solve(bAndX); + + return true; +} + +/** \returns the matrix represented by the decomposition, + * i.e., it returns the product: P^T L D L^* P. + * This function is provided for debug purpose. */ +template +MatrixType LDLT::reconstructedMatrix() const +{ + eigen_assert(m_isInitialized && "LDLT is not initialized."); + const Index size = m_matrix.rows(); + MatrixType res(size,size); + + // P + res.setIdentity(); + res = transpositionsP() * res; + // L^* P + res = matrixU() * res; + // D(L^*P) + res = vectorD().real().asDiagonal() * res; + // L(DL^*P) + res = matrixL() * res; + // P^T (LDL^*P) + res = transpositionsP().transpose() * res; + + return res; +} + +/** \cholesky_module + * \returns the Cholesky decomposition with full pivoting without square root of \c *this + */ +template +inline const LDLT::PlainObject, UpLo> +SelfAdjointView::ldlt() const +{ + return LDLT(m_matrix); +} + +/** \cholesky_module + * \returns the Cholesky decomposition with full pivoting without square root of \c *this + */ +template +inline const LDLT::PlainObject> +MatrixBase::ldlt() const +{ + return LDLT(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_LDLT_H From 235d8ae684997f13a27476e7ec758dd425ef798c Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 14:07:52 +0200 Subject: [PATCH 10/25] add more eigen files --- tools/Eigen/src/CMakeLists.txt | 7 + tools/Eigen/src/Cholesky/LLT.h | 498 ++++++++++++++ tools/Eigen/src/Cholesky/LLT_MKL.h | 102 +++ tools/Eigen/src/CholmodSupport/CMakeLists.txt | 6 + .../Eigen/src/CholmodSupport/CholmodSupport.h | 607 ++++++++++++++++++ .../src/Core/arch/AltiVec/CMakeLists.txt | 6 + 6 files changed, 1226 insertions(+) create mode 100644 tools/Eigen/src/CMakeLists.txt create mode 100644 tools/Eigen/src/Cholesky/LLT.h create mode 100644 tools/Eigen/src/Cholesky/LLT_MKL.h create mode 100644 tools/Eigen/src/CholmodSupport/CMakeLists.txt create mode 100644 tools/Eigen/src/CholmodSupport/CholmodSupport.h create mode 100644 tools/Eigen/src/Core/arch/AltiVec/CMakeLists.txt diff --git a/tools/Eigen/src/CMakeLists.txt b/tools/Eigen/src/CMakeLists.txt new file mode 100644 index 0000000..c326f37 --- /dev/null +++ b/tools/Eigen/src/CMakeLists.txt @@ -0,0 +1,7 @@ +file(GLOB Eigen_src_subdirectories "*") +escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") +foreach(f ${Eigen_src_subdirectories}) + if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" ) + add_subdirectory(${f}) + endif() +endforeach() diff --git a/tools/Eigen/src/Cholesky/LLT.h b/tools/Eigen/src/Cholesky/LLT.h new file mode 100644 index 0000000..7c11a2d --- /dev/null +++ b/tools/Eigen/src/Cholesky/LLT.h @@ -0,0 +1,498 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LLT_H +#define EIGEN_LLT_H + +namespace Eigen { + +namespace internal{ +template struct LLT_Traits; +} + +/** \ingroup Cholesky_Module + * + * \class LLT + * + * \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features + * + * \param MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition + * \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. + * The other triangular part won't be read. + * + * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite + * matrix A such that A = LL^* = U^*U, where L is lower triangular. + * + * While the Cholesky decomposition is particularly useful to solve selfadjoint problems like D^*D x = b, + * for that purpose, we recommend the Cholesky decomposition without square root which is more stable + * and even faster. Nevertheless, this standard Cholesky decomposition remains useful in many other + * situations like generalised eigen problems with hermitian matrices. + * + * Remember that Cholesky decompositions are not rank-revealing. This LLT decomposition is only stable on positive definite matrices, + * use LDLT instead for the semidefinite case. Also, do not use a Cholesky decomposition to determine whether a system of equations + * has a solution. + * + * Example: \include LLT_example.cpp + * Output: \verbinclude LLT_example.out + * + * \sa MatrixBase::llt(), class LDLT + */ + /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH) + * Note that during the decomposition, only the upper triangular part of A is considered. Therefore, + * the strict lower part does not have to store correct values. + */ +template class LLT +{ + public: + typedef _MatrixType MatrixType; + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + Options = MatrixType::Options, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef typename MatrixType::Index Index; + + enum { + PacketSize = internal::packet_traits::size, + AlignmentMask = int(PacketSize)-1, + UpLo = _UpLo + }; + + typedef internal::LLT_Traits Traits; + + /** + * \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via LLT::compute(const MatrixType&). + */ + LLT() : m_matrix(), m_isInitialized(false) {} + + /** \brief Default Constructor with memory preallocation + * + * Like the default constructor but with preallocation of the internal data + * according to the specified problem \a size. + * \sa LLT() + */ + LLT(Index size) : m_matrix(size, size), + m_isInitialized(false) {} + + LLT(const MatrixType& matrix) + : m_matrix(matrix.rows(), matrix.cols()), + m_isInitialized(false) + { + compute(matrix); + } + + /** \returns a view of the upper triangular matrix U */ + inline typename Traits::MatrixU matrixU() const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return Traits::getU(m_matrix); + } + + /** \returns a view of the lower triangular matrix L */ + inline typename Traits::MatrixL matrixL() const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return Traits::getL(m_matrix); + } + + /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. + * + * Since this LLT class assumes anyway that the matrix A is invertible, the solution + * theoretically exists and is unique regardless of b. + * + * Example: \include LLT_solve.cpp + * Output: \verbinclude LLT_solve.out + * + * \sa solveInPlace(), MatrixBase::llt() + */ + template + inline const internal::solve_retval + solve(const MatrixBase& b) const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + eigen_assert(m_matrix.rows()==b.rows() + && "LLT::solve(): invalid number of rows of the right hand side matrix b"); + return internal::solve_retval(*this, b.derived()); + } + + #ifdef EIGEN2_SUPPORT + template + bool solve(const MatrixBase& b, ResultType *result) const + { + *result = this->solve(b); + return true; + } + + bool isPositiveDefinite() const { return true; } + #endif + + template + void solveInPlace(MatrixBase &bAndX) const; + + LLT& compute(const MatrixType& matrix); + + /** \returns the LLT decomposition matrix + * + * TODO: document the storage layout + */ + inline const MatrixType& matrixLLT() const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return m_matrix; + } + + MatrixType reconstructedMatrix() const; + + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the matrix.appears to be negative. + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return m_info; + } + + inline Index rows() const { return m_matrix.rows(); } + inline Index cols() const { return m_matrix.cols(); } + + template + LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1); + + protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + + /** \internal + * Used to compute and store L + * The strict upper part is not used and even not initialized. + */ + MatrixType m_matrix; + bool m_isInitialized; + ComputationInfo m_info; +}; + +namespace internal { + +template struct llt_inplace; + +template +static typename MatrixType::Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) +{ + using std::sqrt; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::Index Index; + typedef typename MatrixType::ColXpr ColXpr; + typedef typename internal::remove_all::type ColXprCleaned; + typedef typename ColXprCleaned::SegmentReturnType ColXprSegment; + typedef Matrix TempVectorType; + typedef typename TempVectorType::SegmentReturnType TempVecSegment; + + Index n = mat.cols(); + eigen_assert(mat.rows()==n && vec.size()==n); + + TempVectorType temp; + + if(sigma>0) + { + // This version is based on Givens rotations. + // It is faster than the other one below, but only works for updates, + // i.e., for sigma > 0 + temp = sqrt(sigma) * vec; + + for(Index i=0; i g; + g.makeGivens(mat(i,i), -temp(i), &mat(i,i)); + + Index rs = n-i-1; + if(rs>0) + { + ColXprSegment x(mat.col(i).tail(rs)); + TempVecSegment y(temp.tail(rs)); + apply_rotation_in_the_plane(x, y, g); + } + } + } + else + { + temp = vec; + RealScalar beta = 1; + for(Index j=0; j struct llt_inplace +{ + typedef typename NumTraits::Real RealScalar; + template + static typename MatrixType::Index unblocked(MatrixType& mat) + { + using std::sqrt; + typedef typename MatrixType::Index Index; + + eigen_assert(mat.rows()==mat.cols()); + const Index size = mat.rows(); + for(Index k = 0; k < size; ++k) + { + Index rs = size-k-1; // remaining size + + Block A21(mat,k+1,k,rs,1); + Block A10(mat,k,0,1,k); + Block A20(mat,k+1,0,rs,k); + + RealScalar x = numext::real(mat.coeff(k,k)); + if (k>0) x -= A10.squaredNorm(); + if (x<=RealScalar(0)) + return k; + mat.coeffRef(k,k) = x = sqrt(x); + if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint(); + if (rs>0) A21 /= x; + } + return -1; + } + + template + static typename MatrixType::Index blocked(MatrixType& m) + { + typedef typename MatrixType::Index Index; + eigen_assert(m.rows()==m.cols()); + Index size = m.rows(); + if(size<32) + return unblocked(m); + + Index blockSize = size/8; + blockSize = (blockSize/16)*16; + blockSize = (std::min)((std::max)(blockSize,Index(8)), Index(128)); + + for (Index k=0; k A11(m,k, k, bs,bs); + Block A21(m,k+bs,k, rs,bs); + Block A22(m,k+bs,k+bs,rs,rs); + + Index ret; + if((ret=unblocked(A11))>=0) return k+ret; + if(rs>0) A11.adjoint().template triangularView().template solveInPlace(A21); + if(rs>0) A22.template selfadjointView().rankUpdate(A21,-1); // bottleneck + } + return -1; + } + + template + static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) + { + return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); + } +}; + +template struct llt_inplace +{ + typedef typename NumTraits::Real RealScalar; + + template + static EIGEN_STRONG_INLINE typename MatrixType::Index unblocked(MatrixType& mat) + { + Transpose matt(mat); + return llt_inplace::unblocked(matt); + } + template + static EIGEN_STRONG_INLINE typename MatrixType::Index blocked(MatrixType& mat) + { + Transpose matt(mat); + return llt_inplace::blocked(matt); + } + template + static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) + { + Transpose matt(mat); + return llt_inplace::rankUpdate(matt, vec.conjugate(), sigma); + } +}; + +template struct LLT_Traits +{ + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return m; } + static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } + static bool inplace_decomposition(MatrixType& m) + { return llt_inplace::blocked(m)==-1; } +}; + +template struct LLT_Traits +{ + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); } + static inline MatrixU getU(const MatrixType& m) { return m; } + static bool inplace_decomposition(MatrixType& m) + { return llt_inplace::blocked(m)==-1; } +}; + +} // end namespace internal + +/** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix + * + * \returns a reference to *this + * + * Example: \include TutorialLinAlgComputeTwice.cpp + * Output: \verbinclude TutorialLinAlgComputeTwice.out + */ +template +LLT& LLT::compute(const MatrixType& a) +{ + check_template_parameters(); + + eigen_assert(a.rows()==a.cols()); + const Index size = a.rows(); + m_matrix.resize(size, size); + m_matrix = a; + + m_isInitialized = true; + bool ok = Traits::inplace_decomposition(m_matrix); + m_info = ok ? Success : NumericalIssue; + + return *this; +} + +/** Performs a rank one update (or dowdate) of the current decomposition. + * If A = LL^* before the rank one update, + * then after it we have LL^* = A + sigma * v v^* where \a v must be a vector + * of same dimension. + */ +template +template +LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, const RealScalar& sigma) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType); + eigen_assert(v.size()==m_matrix.cols()); + eigen_assert(m_isInitialized); + if(internal::llt_inplace::rankUpdate(m_matrix,v,sigma)>=0) + m_info = NumericalIssue; + else + m_info = Success; + + return *this; +} + +namespace internal { +template +struct solve_retval, Rhs> + : solve_retval_base, Rhs> +{ + typedef LLT<_MatrixType,UpLo> LLTType; + EIGEN_MAKE_SOLVE_HELPERS(LLTType,Rhs) + + template void evalTo(Dest& dst) const + { + dst = rhs(); + dec().solveInPlace(dst); + } +}; +} + +/** \internal use x = llt_object.solve(x); + * + * This is the \em in-place version of solve(). + * + * \param bAndX represents both the right-hand side matrix b and result x. + * + * \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD. + * + * This version avoids a copy when the right hand side matrix b is not + * needed anymore. + * + * \sa LLT::solve(), MatrixBase::llt() + */ +template +template +void LLT::solveInPlace(MatrixBase &bAndX) const +{ + eigen_assert(m_isInitialized && "LLT is not initialized."); + eigen_assert(m_matrix.rows()==bAndX.rows()); + matrixL().solveInPlace(bAndX); + matrixU().solveInPlace(bAndX); +} + +/** \returns the matrix represented by the decomposition, + * i.e., it returns the product: L L^*. + * This function is provided for debug purpose. */ +template +MatrixType LLT::reconstructedMatrix() const +{ + eigen_assert(m_isInitialized && "LLT is not initialized."); + return matrixL() * matrixL().adjoint().toDenseMatrix(); +} + +/** \cholesky_module + * \returns the LLT decomposition of \c *this + */ +template +inline const LLT::PlainObject> +MatrixBase::llt() const +{ + return LLT(derived()); +} + +/** \cholesky_module + * \returns the LLT decomposition of \c *this + */ +template +inline const LLT::PlainObject, UpLo> +SelfAdjointView::llt() const +{ + return LLT(m_matrix); +} + +} // end namespace Eigen + +#endif // EIGEN_LLT_H diff --git a/tools/Eigen/src/Cholesky/LLT_MKL.h b/tools/Eigen/src/Cholesky/LLT_MKL.h new file mode 100644 index 0000000..66675d7 --- /dev/null +++ b/tools/Eigen/src/Cholesky/LLT_MKL.h @@ -0,0 +1,102 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * LLt decomposition based on LAPACKE_?potrf function. + ******************************************************************************** +*/ + +#ifndef EIGEN_LLT_MKL_H +#define EIGEN_LLT_MKL_H + +#include "Eigen/src/Core/util/MKL_support.h" +#include + +namespace Eigen { + +namespace internal { + +template struct mkl_llt; + +#define EIGEN_MKL_LLT(EIGTYPE, MKLTYPE, MKLPREFIX) \ +template<> struct mkl_llt \ +{ \ + template \ + static inline typename MatrixType::Index potrf(MatrixType& m, char uplo) \ + { \ + lapack_int matrix_order; \ + lapack_int size, lda, info, StorageOrder; \ + EIGTYPE* a; \ + eigen_assert(m.rows()==m.cols()); \ + /* Set up parameters for ?potrf */ \ + size = m.rows(); \ + StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \ + matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ + a = &(m.coeffRef(0,0)); \ + lda = m.outerStride(); \ +\ + info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \ + info = (info==0) ? -1 : info>0 ? info-1 : size; \ + return info; \ + } \ +}; \ +template<> struct llt_inplace \ +{ \ + template \ + static typename MatrixType::Index blocked(MatrixType& m) \ + { \ + return mkl_llt::potrf(m, 'L'); \ + } \ + template \ + static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ + { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \ +}; \ +template<> struct llt_inplace \ +{ \ + template \ + static typename MatrixType::Index blocked(MatrixType& m) \ + { \ + return mkl_llt::potrf(m, 'U'); \ + } \ + template \ + static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ + { \ + Transpose matt(mat); \ + return llt_inplace::rankUpdate(matt, vec.conjugate(), sigma); \ + } \ +}; + +EIGEN_MKL_LLT(double, double, d) +EIGEN_MKL_LLT(float, float, s) +EIGEN_MKL_LLT(dcomplex, MKL_Complex16, z) +EIGEN_MKL_LLT(scomplex, MKL_Complex8, c) + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_LLT_MKL_H diff --git a/tools/Eigen/src/CholmodSupport/CMakeLists.txt b/tools/Eigen/src/CholmodSupport/CMakeLists.txt new file mode 100644 index 0000000..814dfa6 --- /dev/null +++ b/tools/Eigen/src/CholmodSupport/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_CholmodSupport_SRCS "*.h") + +INSTALL(FILES + ${Eigen_CholmodSupport_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/CholmodSupport COMPONENT Devel + ) diff --git a/tools/Eigen/src/CholmodSupport/CholmodSupport.h b/tools/Eigen/src/CholmodSupport/CholmodSupport.h new file mode 100644 index 0000000..99dbe17 --- /dev/null +++ b/tools/Eigen/src/CholmodSupport/CholmodSupport.h @@ -0,0 +1,607 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CHOLMODSUPPORT_H +#define EIGEN_CHOLMODSUPPORT_H + +namespace Eigen { + +namespace internal { + +template +void cholmod_configure_matrix(CholmodType& mat) +{ + if (internal::is_same::value) + { + mat.xtype = CHOLMOD_REAL; + mat.dtype = CHOLMOD_SINGLE; + } + else if (internal::is_same::value) + { + mat.xtype = CHOLMOD_REAL; + mat.dtype = CHOLMOD_DOUBLE; + } + else if (internal::is_same >::value) + { + mat.xtype = CHOLMOD_COMPLEX; + mat.dtype = CHOLMOD_SINGLE; + } + else if (internal::is_same >::value) + { + mat.xtype = CHOLMOD_COMPLEX; + mat.dtype = CHOLMOD_DOUBLE; + } + else + { + eigen_assert(false && "Scalar type not supported by CHOLMOD"); + } +} + +} // namespace internal + +/** Wraps the Eigen sparse matrix \a mat into a Cholmod sparse matrix object. + * Note that the data are shared. + */ +template +cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) +{ + cholmod_sparse res; + res.nzmax = mat.nonZeros(); + res.nrow = mat.rows();; + res.ncol = mat.cols(); + res.p = mat.outerIndexPtr(); + res.i = mat.innerIndexPtr(); + res.x = mat.valuePtr(); + res.z = 0; + res.sorted = 1; + if(mat.isCompressed()) + { + res.packed = 1; + res.nz = 0; + } + else + { + res.packed = 0; + res.nz = mat.innerNonZeroPtr(); + } + + res.dtype = 0; + res.stype = -1; + + if (internal::is_same<_Index,int>::value) + { + res.itype = CHOLMOD_INT; + } + else if (internal::is_same<_Index,SuiteSparse_long>::value) + { + res.itype = CHOLMOD_LONG; + } + else + { + eigen_assert(false && "Index type not supported yet"); + } + + // setup res.xtype + internal::cholmod_configure_matrix<_Scalar>(res); + + res.stype = 0; + + return res; +} + +template +const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& mat) +{ + cholmod_sparse res = viewAsCholmod(mat.const_cast_derived()); + return res; +} + +/** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix. + * The data are not copied but shared. */ +template +cholmod_sparse viewAsCholmod(const SparseSelfAdjointView, UpLo>& mat) +{ + cholmod_sparse res = viewAsCholmod(mat.matrix().const_cast_derived()); + + if(UpLo==Upper) res.stype = 1; + if(UpLo==Lower) res.stype = -1; + + return res; +} + +/** Returns a view of the Eigen \b dense matrix \a mat as Cholmod dense matrix. + * The data are not copied but shared. */ +template +cholmod_dense viewAsCholmod(MatrixBase& mat) +{ + EIGEN_STATIC_ASSERT((internal::traits::Flags&RowMajorBit)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + typedef typename Derived::Scalar Scalar; + + cholmod_dense res; + res.nrow = mat.rows(); + res.ncol = mat.cols(); + res.nzmax = res.nrow * res.ncol; + res.d = Derived::IsVectorAtCompileTime ? mat.derived().size() : mat.derived().outerStride(); + res.x = (void*)(mat.derived().data()); + res.z = 0; + + internal::cholmod_configure_matrix(res); + + return res; +} + +/** Returns a view of the Cholmod sparse matrix \a cm as an Eigen sparse matrix. + * The data are not copied but shared. */ +template +MappedSparseMatrix viewAsEigen(cholmod_sparse& cm) +{ + return MappedSparseMatrix + (cm.nrow, cm.ncol, static_cast(cm.p)[cm.ncol], + static_cast(cm.p), static_cast(cm.i),static_cast(cm.x) ); +} + +enum CholmodMode { + CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt +}; + + +/** \ingroup CholmodSupport_Module + * \class CholmodBase + * \brief The base class for the direct Cholesky factorization of Cholmod + * \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT + */ +template +class CholmodBase : internal::noncopyable +{ + public: + typedef _MatrixType MatrixType; + enum { UpLo = _UpLo }; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef MatrixType CholMatrixType; + typedef typename MatrixType::Index Index; + + public: + + CholmodBase() + : m_cholmodFactor(0), m_info(Success), m_isInitialized(false) + { + m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); + cholmod_start(&m_cholmod); + } + + CholmodBase(const MatrixType& matrix) + : m_cholmodFactor(0), m_info(Success), m_isInitialized(false) + { + m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); + cholmod_start(&m_cholmod); + compute(matrix); + } + + ~CholmodBase() + { + if(m_cholmodFactor) + cholmod_free_factor(&m_cholmodFactor, &m_cholmod); + cholmod_finish(&m_cholmod); + } + + inline Index cols() const { return m_cholmodFactor->n; } + inline Index rows() const { return m_cholmodFactor->n; } + + Derived& derived() { return *static_cast(this); } + const Derived& derived() const { return *static_cast(this); } + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the matrix.appears to be negative. + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "Decomposition is not initialized."); + return m_info; + } + + /** Computes the sparse Cholesky decomposition of \a matrix */ + Derived& compute(const MatrixType& matrix) + { + analyzePattern(matrix); + factorize(matrix); + return derived(); + } + + /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. + * + * \sa compute() + */ + template + inline const internal::solve_retval + solve(const MatrixBase& b) const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + eigen_assert(rows()==b.rows() + && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b"); + return internal::solve_retval(*this, b.derived()); + } + + /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. + * + * \sa compute() + */ + template + inline const internal::sparse_solve_retval + solve(const SparseMatrixBase& b) const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + eigen_assert(rows()==b.rows() + && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b"); + return internal::sparse_solve_retval(*this, b.derived()); + } + + /** Performs a symbolic decomposition on the sparsity pattern of \a matrix. + * + * This function is particularly useful when solving for several problems having the same structure. + * + * \sa factorize() + */ + void analyzePattern(const MatrixType& matrix) + { + if(m_cholmodFactor) + { + cholmod_free_factor(&m_cholmodFactor, &m_cholmod); + m_cholmodFactor = 0; + } + cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView()); + m_cholmodFactor = cholmod_analyze(&A, &m_cholmod); + + this->m_isInitialized = true; + this->m_info = Success; + m_analysisIsOk = true; + m_factorizationIsOk = false; + } + + /** Performs a numeric decomposition of \a matrix + * + * The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been performed. + * + * \sa analyzePattern() + */ + void factorize(const MatrixType& matrix) + { + eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); + cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView()); + cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod); + + // If the factorization failed, minor is the column at which it did. On success minor == n. + this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue); + m_factorizationIsOk = true; + } + + /** Returns a reference to the Cholmod's configuration structure to get a full control over the performed operations. + * See the Cholmod user guide for details. */ + cholmod_common& cholmod() { return m_cholmod; } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal */ + template + void _solve(const MatrixBase &b, MatrixBase &dest) const + { + eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); + const Index size = m_cholmodFactor->n; + EIGEN_UNUSED_VARIABLE(size); + eigen_assert(size==b.rows()); + + // note: cd stands for Cholmod Dense + Rhs& b_ref(b.const_cast_derived()); + cholmod_dense b_cd = viewAsCholmod(b_ref); + cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod); + if(!x_cd) + { + this->m_info = NumericalIssue; + } + // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) + dest = Matrix::Map(reinterpret_cast(x_cd->x),b.rows(),b.cols()); + cholmod_free_dense(&x_cd, &m_cholmod); + } + + /** \internal */ + template + void _solve(const SparseMatrix &b, SparseMatrix &dest) const + { + eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); + const Index size = m_cholmodFactor->n; + EIGEN_UNUSED_VARIABLE(size); + eigen_assert(size==b.rows()); + + // note: cs stands for Cholmod Sparse + cholmod_sparse b_cs = viewAsCholmod(b); + cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod); + if(!x_cs) + { + this->m_info = NumericalIssue; + } + // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) + dest = viewAsEigen(*x_cs); + cholmod_free_sparse(&x_cs, &m_cholmod); + } + #endif // EIGEN_PARSED_BY_DOXYGEN + + + /** Sets the shift parameter that will be used to adjust the diagonal coefficients during the numerical factorization. + * + * During the numerical factorization, an offset term is added to the diagonal coefficients:\n + * \c d_ii = \a offset + \c d_ii + * + * The default is \a offset=0. + * + * \returns a reference to \c *this. + */ + Derived& setShift(const RealScalar& offset) + { + m_shiftOffset[0] = offset; + return derived(); + } + + template + void dumpMemory(Stream& /*s*/) + {} + + protected: + mutable cholmod_common m_cholmod; + cholmod_factor* m_cholmodFactor; + RealScalar m_shiftOffset[2]; + mutable ComputationInfo m_info; + bool m_isInitialized; + int m_factorizationIsOk; + int m_analysisIsOk; +}; + +/** \ingroup CholmodSupport_Module + * \class CholmodSimplicialLLT + * \brief A simplicial direct Cholesky (LLT) factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a simplicial LL^T Cholesky factorization + * using the Cholmod library. + * This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Therefore, it has little practical interest. + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices + * X and B can be either dense or sparse. + * + * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. + * + * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLLT + */ +template +class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> > +{ + typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT> Base; + using Base::m_cholmod; + + public: + + typedef _MatrixType MatrixType; + + CholmodSimplicialLLT() : Base() { init(); } + + CholmodSimplicialLLT(const MatrixType& matrix) : Base() + { + init(); + Base::compute(matrix); + } + + ~CholmodSimplicialLLT() {} + protected: + void init() + { + m_cholmod.final_asis = 0; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + m_cholmod.final_ll = 1; + } +}; + + +/** \ingroup CholmodSupport_Module + * \class CholmodSimplicialLDLT + * \brief A simplicial direct Cholesky (LDLT) factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a simplicial LDL^T Cholesky factorization + * using the Cholmod library. + * This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Therefore, it has little practical interest. + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices + * X and B can be either dense or sparse. + * + * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. + * + * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLDLT + */ +template +class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> > +{ + typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT> Base; + using Base::m_cholmod; + + public: + + typedef _MatrixType MatrixType; + + CholmodSimplicialLDLT() : Base() { init(); } + + CholmodSimplicialLDLT(const MatrixType& matrix) : Base() + { + init(); + Base::compute(matrix); + } + + ~CholmodSimplicialLDLT() {} + protected: + void init() + { + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + } +}; + +/** \ingroup CholmodSupport_Module + * \class CholmodSupernodalLLT + * \brief A supernodal Cholesky (LLT) factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a supernodal LL^T Cholesky factorization + * using the Cholmod library. + * This supernodal variant performs best on dense enough problems, e.g., 3D FEM, or very high order 2D FEM. + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices + * X and B can be either dense or sparse. + * + * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. + * + * \sa \ref TutorialSparseDirectSolvers + */ +template +class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> > +{ + typedef CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT> Base; + using Base::m_cholmod; + + public: + + typedef _MatrixType MatrixType; + + CholmodSupernodalLLT() : Base() { init(); } + + CholmodSupernodalLLT(const MatrixType& matrix) : Base() + { + init(); + Base::compute(matrix); + } + + ~CholmodSupernodalLLT() {} + protected: + void init() + { + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SUPERNODAL; + } +}; + +/** \ingroup CholmodSupport_Module + * \class CholmodDecomposition + * \brief A general Cholesky factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization + * using the Cholmod library. The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices + * X and B can be either dense or sparse. + * + * This variant permits to change the underlying Cholesky method at runtime. + * On the other hand, it does not provide access to the result of the factorization. + * The default is to let Cholmod automatically choose between a simplicial and supernodal factorization. + * + * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. + * + * \sa \ref TutorialSparseDirectSolvers + */ +template +class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> > +{ + typedef CholmodBase<_MatrixType, _UpLo, CholmodDecomposition> Base; + using Base::m_cholmod; + + public: + + typedef _MatrixType MatrixType; + + CholmodDecomposition() : Base() { init(); } + + CholmodDecomposition(const MatrixType& matrix) : Base() + { + init(); + Base::compute(matrix); + } + + ~CholmodDecomposition() {} + + void setMode(CholmodMode mode) + { + switch(mode) + { + case CholmodAuto: + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_AUTO; + break; + case CholmodSimplicialLLt: + m_cholmod.final_asis = 0; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + m_cholmod.final_ll = 1; + break; + case CholmodSupernodalLLt: + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SUPERNODAL; + break; + case CholmodLDLt: + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + break; + default: + break; + } + } + protected: + void init() + { + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_AUTO; + } +}; + +namespace internal { + +template +struct solve_retval, Rhs> + : solve_retval_base, Rhs> +{ + typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec; + EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) + + template void evalTo(Dest& dst) const + { + dec()._solve(rhs(),dst); + } +}; + +template +struct sparse_solve_retval, Rhs> + : sparse_solve_retval_base, Rhs> +{ + typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec; + EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) + + template void evalTo(Dest& dst) const + { + dec()._solve(rhs(),dst); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_CHOLMODSUPPORT_H diff --git a/tools/Eigen/src/Core/arch/AltiVec/CMakeLists.txt b/tools/Eigen/src/Core/arch/AltiVec/CMakeLists.txt new file mode 100644 index 0000000..9f8d2e9 --- /dev/null +++ b/tools/Eigen/src/Core/arch/AltiVec/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_Core_arch_AltiVec_SRCS "*.h") + +INSTALL(FILES + ${Eigen_Core_arch_AltiVec_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AltiVec COMPONENT Devel +) From c9014f7ca02e7cfdc66ba84a9bad6e36daa4d7eb Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 14:08:09 +0200 Subject: [PATCH 11/25] add more eigen files --- tools/Eigen/src/Core/arch/AltiVec/Complex.h | 217 ++++++++ .../Eigen/src/Core/arch/AltiVec/PacketMath.h | 501 ++++++++++++++++++ tools/Eigen/src/Core/arch/CMakeLists.txt | 4 + .../src/Core/arch/Default/CMakeLists.txt | 6 + tools/Eigen/src/Core/arch/Default/Settings.h | 49 ++ tools/Eigen/src/Core/arch/NEON/CMakeLists.txt | 6 + tools/Eigen/src/Core/arch/NEON/Complex.h | 253 +++++++++ tools/Eigen/src/Core/arch/NEON/PacketMath.h | 420 +++++++++++++++ tools/Eigen/src/Core/arch/SSE/CMakeLists.txt | 6 + tools/Eigen/src/Core/arch/SSE/Complex.h | 442 +++++++++++++++ 10 files changed, 1904 insertions(+) create mode 100644 tools/Eigen/src/Core/arch/AltiVec/Complex.h create mode 100644 tools/Eigen/src/Core/arch/AltiVec/PacketMath.h create mode 100644 tools/Eigen/src/Core/arch/CMakeLists.txt create mode 100644 tools/Eigen/src/Core/arch/Default/CMakeLists.txt create mode 100644 tools/Eigen/src/Core/arch/Default/Settings.h create mode 100644 tools/Eigen/src/Core/arch/NEON/CMakeLists.txt create mode 100644 tools/Eigen/src/Core/arch/NEON/Complex.h create mode 100644 tools/Eigen/src/Core/arch/NEON/PacketMath.h create mode 100644 tools/Eigen/src/Core/arch/SSE/CMakeLists.txt create mode 100644 tools/Eigen/src/Core/arch/SSE/Complex.h diff --git a/tools/Eigen/src/Core/arch/AltiVec/Complex.h b/tools/Eigen/src/Core/arch/AltiVec/Complex.h new file mode 100644 index 0000000..68d9a2b --- /dev/null +++ b/tools/Eigen/src/Core/arch/AltiVec/Complex.h @@ -0,0 +1,217 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COMPLEX_ALTIVEC_H +#define EIGEN_COMPLEX_ALTIVEC_H + +namespace Eigen { + +namespace internal { + +static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; +static Packet16uc p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; +static Packet16uc p16uc_COMPLEX_IM = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; +static Packet16uc p16uc_COMPLEX_REV = vec_sld(p16uc_REVERSE, p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 }; +static Packet16uc p16uc_COMPLEX_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; +static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 }; +static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 }; + +//---------- float ---------- +struct Packet2cf +{ + EIGEN_STRONG_INLINE Packet2cf() {} + EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {} + Packet4f v; +}; + +template<> struct packet_traits > : default_packet_traits +{ + typedef Packet2cf type; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 2, + + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasNegate = 1, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasSetLinear = 0 + }; +}; + +template<> struct unpacket_traits { typedef std::complex type; enum {size=2}; }; + +template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) +{ + Packet2cf res; + /* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */ + if((ptrdiff_t(&from) % 16) == 0) + res.v = pload((const float *)&from); + else + res.v = ploadu((const float *)&from); + res.v = vec_perm(res.v, res.v, p16uc_PSET_HI); + return res; +} + +template<> EIGEN_STRONG_INLINE Packet2cf padd(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf psub(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, p4ui_CONJ_XOR)); } + +template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) +{ + Packet4f v1, v2; + + // Permute and multiply the real parts of a and b + v1 = vec_perm(a.v, a.v, p16uc_COMPLEX_RE); + // Get the imaginary parts of a + v2 = vec_perm(a.v, a.v, p16uc_COMPLEX_IM); + // multiply a_re * b + v1 = vec_madd(v1, b.v, p4f_ZERO); + // multiply a_im * b and get the conjugate result + v2 = vec_madd(v2, b.v, p4f_ZERO); + v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR); + // permute back to a proper order + v2 = vec_perm(v2, v2, p16uc_COMPLEX_REV); + + return Packet2cf(vec_add(v1, v2)); +} + +template<> EIGEN_STRONG_INLINE Packet2cf pand (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf por (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_or(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pxor (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pandnot(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); } + +template<> EIGEN_STRONG_INLINE Packet2cf pload (const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload((const float*)from)); } +template<> EIGEN_STRONG_INLINE Packet2cf ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu((const float*)from)); } + +template<> EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex* from) +{ + return pset1(*from); +} + +template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } +template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } + +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { vec_dstt((float *)addr, DST_CTRL(2,2,32), DST_CHAN); } + +template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet2cf& a) +{ + std::complex EIGEN_ALIGN16 res[2]; + pstore((float *)&res, a.v); + + return res[0]; +} + +template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) +{ + Packet4f rev_a; + rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX_REV2); + return Packet2cf(rev_a); +} + +template<> EIGEN_STRONG_INLINE std::complex predux(const Packet2cf& a) +{ + Packet4f b; + b = (Packet4f) vec_sld(a.v, a.v, 8); + b = padd(a.v, b); + return pfirst(Packet2cf(b)); +} + +template<> EIGEN_STRONG_INLINE Packet2cf preduxp(const Packet2cf* vecs) +{ + Packet4f b1, b2; + + b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8); + b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8); + b2 = (Packet4f) vec_sld(b2, b2, 8); + b2 = padd(b1, b2); + + return Packet2cf(b2); +} + +template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet2cf& a) +{ + Packet4f b; + Packet2cf prod; + b = (Packet4f) vec_sld(a.v, a.v, 8); + prod = pmul(a, Packet2cf(b)); + + return pfirst(prod); +} + +template +struct palign_impl +{ + static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second) + { + if (Offset==1) + { + first.v = vec_sld(first.v, second.v, 8); + } + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + return internal::pmul(a, pconj(b)); + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + return internal::pmul(pconj(a), b); + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + return pconj(internal::pmul(a, b)); + } +}; + +template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) +{ + // TODO optimize it for AltiVec + Packet2cf res = conj_helper().pmul(a,b); + Packet4f s = vec_madd(b.v, b.v, p4f_ZERO); + return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX_REV)))); +} + +template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x) +{ + return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV)); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COMPLEX_ALTIVEC_H diff --git a/tools/Eigen/src/Core/arch/AltiVec/PacketMath.h b/tools/Eigen/src/Core/arch/AltiVec/PacketMath.h new file mode 100644 index 0000000..e408996 --- /dev/null +++ b/tools/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -0,0 +1,501 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Konstantinos Margaritis +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PACKET_MATH_ALTIVEC_H +#define EIGEN_PACKET_MATH_ALTIVEC_H + +namespace Eigen { + +namespace internal { + +#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD +#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4 +#endif + +#ifndef EIGEN_HAS_FUSE_CJMADD +#define EIGEN_HAS_FUSE_CJMADD 1 +#endif + +// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16 +#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS +#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16 +#endif + +typedef __vector float Packet4f; +typedef __vector int Packet4i; +typedef __vector unsigned int Packet4ui; +typedef __vector __bool int Packet4bi; +typedef __vector short int Packet8i; +typedef __vector unsigned char Packet16uc; + +// We don't want to write the same code all the time, but we need to reuse the constants +// and it doesn't really work to declare them global, so we define macros instead + +#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \ + Packet4f p4f_##NAME = (Packet4f) vec_splat_s32(X) + +#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \ + Packet4i p4i_##NAME = vec_splat_s32(X) + +#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ + Packet4f p4f_##NAME = pset1(X) + +#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ + Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1(X)) + +#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ + Packet4i p4i_##NAME = pset1(X) + +#define DST_CHAN 1 +#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride)) + +// Define global static constants: +static Packet4f p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 }; +static Packet4i p4i_COUNTDOWN = { 3, 2, 1, 0 }; +static Packet16uc p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; +static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); +static Packet16uc p16uc_DUPLICATE = {0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7}; + +static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); +static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); +static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); +static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); +static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); +static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); +static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); + +template<> struct packet_traits : default_packet_traits +{ + typedef Packet4f type; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size=4, + + // FIXME check the Has* + HasSin = 0, + HasCos = 0, + HasLog = 0, + HasExp = 0, + HasSqrt = 0 + }; +}; +template<> struct packet_traits : default_packet_traits +{ + typedef Packet4i type; + enum { + // FIXME check the Has* + Vectorizable = 1, + AlignedOnScalar = 1, + size=4 + }; +}; + +template<> struct unpacket_traits { typedef float type; enum {size=4}; }; +template<> struct unpacket_traits { typedef int type; enum {size=4}; }; +/* +inline std::ostream & operator <<(std::ostream & s, const Packet4f & v) +{ + union { + Packet4f v; + float n[4]; + } vt; + vt.v = v; + s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; + return s; +} + +inline std::ostream & operator <<(std::ostream & s, const Packet4i & v) +{ + union { + Packet4i v; + int n[4]; + } vt; + vt.v = v; + s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; + return s; +} + +inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v) +{ + union { + Packet4ui v; + unsigned int n[4]; + } vt; + vt.v = v; + s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; + return s; +} + +inline std::ostream & operator <<(std::ostream & s, const Packetbi & v) +{ + union { + Packet4bi v; + unsigned int n[4]; + } vt; + vt.v = v; + s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; + return s; +} +*/ +template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { + // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html + float EIGEN_ALIGN16 af[4]; + af[0] = from; + Packet4f vc = vec_ld(0, af); + vc = vec_splat(vc, 0); + return vc; +} + +template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { + int EIGEN_ALIGN16 ai[4]; + ai[0] = from; + Packet4i vc = vec_ld(0, ai); + vc = vec_splat(vc, 0); + return vc; +} + +template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { return vec_add(pset1(a), p4f_COUNTDOWN); } +template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { return vec_add(pset1(a), p4i_COUNTDOWN); } + +template<> EIGEN_STRONG_INLINE Packet4f padd(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i padd(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f psub(const Packet4f& a, const Packet4f& b) { return vec_sub(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i psub(const Packet4i& a, const Packet4i& b) { return vec_sub(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return psub(p4f_ZERO, a); } +template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return psub(p4i_ZERO, a); } + +template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; } +template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; } + +template<> EIGEN_STRONG_INLINE Packet4f pmul(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b,p4f_ZERO); } +/* Commented out: it's actually slower than processing it scalar + * +template<> EIGEN_STRONG_INLINE Packet4i pmul(const Packet4i& a, const Packet4i& b) +{ + // Detailed in: http://freevec.org/content/32bit_signed_integer_multiplication_altivec + //Set up constants, variables + Packet4i a1, b1, bswap, low_prod, high_prod, prod, prod_, v1sel; + + // Get the absolute values + a1 = vec_abs(a); + b1 = vec_abs(b); + + // Get the signs using xor + Packet4bi sgn = (Packet4bi) vec_cmplt(vec_xor(a, b), p4i_ZERO); + + // Do the multiplication for the asbolute values. + bswap = (Packet4i) vec_rl((Packet4ui) b1, (Packet4ui) p4i_MINUS16 ); + low_prod = vec_mulo((Packet8i) a1, (Packet8i)b1); + high_prod = vec_msum((Packet8i) a1, (Packet8i) bswap, p4i_ZERO); + high_prod = (Packet4i) vec_sl((Packet4ui) high_prod, (Packet4ui) p4i_MINUS16); + prod = vec_add( low_prod, high_prod ); + + // NOR the product and select only the negative elements according to the sign mask + prod_ = vec_nor(prod, prod); + prod_ = vec_sel(p4i_ZERO, prod_, sgn); + + // Add 1 to the result to get the negative numbers + v1sel = vec_sel(p4i_ZERO, p4i_ONE, sgn); + prod_ = vec_add(prod_, v1sel); + + // Merge the results back to the final vector. + prod = vec_sel(prod, prod_, sgn); + + return prod; +} +*/ +template<> EIGEN_STRONG_INLINE Packet4f pdiv(const Packet4f& a, const Packet4f& b) +{ + Packet4f t, y_0, y_1, res; + + // Altivec does not offer a divide instruction, we have to do a reciprocal approximation + y_0 = vec_re(b); + + // Do one Newton-Raphson iteration to get the needed accuracy + t = vec_nmsub(y_0, b, p4f_ONE); + y_1 = vec_madd(y_0, t, y_0); + + res = vec_madd(a, y_1, p4f_ZERO); + return res; +} + +template<> EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& /*a*/, const Packet4i& /*b*/) +{ eigen_assert(false && "packet integer division are not supported by AltiVec"); + return pset1(0); +} + +// for some weird raisons, it has to be overloaded for packet of integers +template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); } +template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); } + +template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); } +template<> EIGEN_STRONG_INLINE Packet4i pmin(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); } + +template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); } +template<> EIGEN_STRONG_INLINE Packet4i pmax(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } + +// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics +template<> EIGEN_STRONG_INLINE Packet4f pand(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } +template<> EIGEN_STRONG_INLINE Packet4i pand(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); } + +template<> EIGEN_STRONG_INLINE Packet4f por(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); } +template<> EIGEN_STRONG_INLINE Packet4i por(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); } + +template<> EIGEN_STRONG_INLINE Packet4f pxor(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); } +template<> EIGEN_STRONG_INLINE Packet4i pxor(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); } + +template<> EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); } +template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); } + +template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } +template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } + +template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) +{ + EIGEN_DEBUG_ALIGNED_LOAD + // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html + Packet16uc MSQ, LSQ; + Packet16uc mask; + MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword + LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword + mask = vec_lvsl(0, from); // create the permute mask + return (Packet4f) vec_perm(MSQ, LSQ, mask); // align the data + +} +template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) +{ + EIGEN_DEBUG_ALIGNED_LOAD + // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html + Packet16uc MSQ, LSQ; + Packet16uc mask; + MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword + LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword + mask = vec_lvsl(0, from); // create the permute mask + return (Packet4i) vec_perm(MSQ, LSQ, mask); // align the data +} + +template<> EIGEN_STRONG_INLINE Packet4f ploaddup(const float* from) +{ + Packet4f p; + if((ptrdiff_t(&from) % 16) == 0) p = pload(from); + else p = ploadu(from); + return vec_perm(p, p, p16uc_DUPLICATE); +} +template<> EIGEN_STRONG_INLINE Packet4i ploaddup(const int* from) +{ + Packet4i p; + if((ptrdiff_t(&from) % 16) == 0) p = pload(from); + else p = ploadu(from); + return vec_perm(p, p, p16uc_DUPLICATE); +} + +template<> EIGEN_STRONG_INLINE void pstore(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } +template<> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } + +template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) +{ + EIGEN_DEBUG_UNALIGNED_STORE + // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html + // Warning: not thread safe! + Packet16uc MSQ, LSQ, edges; + Packet16uc edgeAlign, align; + + MSQ = vec_ld(0, (unsigned char *)to); // most significant quadword + LSQ = vec_ld(15, (unsigned char *)to); // least significant quadword + edgeAlign = vec_lvsl(0, to); // permute map to extract edges + edges=vec_perm(LSQ,MSQ,edgeAlign); // extract the edges + align = vec_lvsr( 0, to ); // permute map to misalign data + MSQ = vec_perm(edges,(Packet16uc)from,align); // misalign the data (MSQ) + LSQ = vec_perm((Packet16uc)from,edges,align); // misalign the data (LSQ) + vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first + vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part +} +template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) +{ + EIGEN_DEBUG_UNALIGNED_STORE + // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html + // Warning: not thread safe! + Packet16uc MSQ, LSQ, edges; + Packet16uc edgeAlign, align; + + MSQ = vec_ld(0, (unsigned char *)to); // most significant quadword + LSQ = vec_ld(15, (unsigned char *)to); // least significant quadword + edgeAlign = vec_lvsl(0, to); // permute map to extract edges + edges=vec_perm(LSQ, MSQ, edgeAlign); // extract the edges + align = vec_lvsr( 0, to ); // permute map to misalign data + MSQ = vec_perm(edges, (Packet16uc) from, align); // misalign the data (MSQ) + LSQ = vec_perm((Packet16uc) from, edges, align); // misalign the data (LSQ) + vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first + vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part +} + +template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); } +template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); } + +template<> EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } +template<> EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } + +template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); } +template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); } + +template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); } +template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); } + +template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) +{ + Packet4f b, sum; + b = (Packet4f) vec_sld(a, a, 8); + sum = vec_add(a, b); + b = (Packet4f) vec_sld(sum, sum, 4); + sum = vec_add(sum, b); + return pfirst(sum); +} + +template<> EIGEN_STRONG_INLINE Packet4f preduxp(const Packet4f* vecs) +{ + Packet4f v[4], sum[4]; + + // It's easier and faster to transpose then add as columns + // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation + // Do the transpose, first set of moves + v[0] = vec_mergeh(vecs[0], vecs[2]); + v[1] = vec_mergel(vecs[0], vecs[2]); + v[2] = vec_mergeh(vecs[1], vecs[3]); + v[3] = vec_mergel(vecs[1], vecs[3]); + // Get the resulting vectors + sum[0] = vec_mergeh(v[0], v[2]); + sum[1] = vec_mergel(v[0], v[2]); + sum[2] = vec_mergeh(v[1], v[3]); + sum[3] = vec_mergel(v[1], v[3]); + + // Now do the summation: + // Lines 0+1 + sum[0] = vec_add(sum[0], sum[1]); + // Lines 2+3 + sum[1] = vec_add(sum[2], sum[3]); + // Add the results + sum[0] = vec_add(sum[0], sum[1]); + + return sum[0]; +} + +template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) +{ + Packet4i sum; + sum = vec_sums(a, p4i_ZERO); + sum = vec_sld(sum, p4i_ZERO, 12); + return pfirst(sum); +} + +template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) +{ + Packet4i v[4], sum[4]; + + // It's easier and faster to transpose then add as columns + // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation + // Do the transpose, first set of moves + v[0] = vec_mergeh(vecs[0], vecs[2]); + v[1] = vec_mergel(vecs[0], vecs[2]); + v[2] = vec_mergeh(vecs[1], vecs[3]); + v[3] = vec_mergel(vecs[1], vecs[3]); + // Get the resulting vectors + sum[0] = vec_mergeh(v[0], v[2]); + sum[1] = vec_mergel(v[0], v[2]); + sum[2] = vec_mergeh(v[1], v[3]); + sum[3] = vec_mergel(v[1], v[3]); + + // Now do the summation: + // Lines 0+1 + sum[0] = vec_add(sum[0], sum[1]); + // Lines 2+3 + sum[1] = vec_add(sum[2], sum[3]); + // Add the results + sum[0] = vec_add(sum[0], sum[1]); + + return sum[0]; +} + +// Other reduction functions: +// mul +template<> EIGEN_STRONG_INLINE float predux_mul(const Packet4f& a) +{ + Packet4f prod; + prod = pmul(a, (Packet4f)vec_sld(a, a, 8)); + return pfirst(pmul(prod, (Packet4f)vec_sld(prod, prod, 4))); +} + +template<> EIGEN_STRONG_INLINE int predux_mul(const Packet4i& a) +{ + EIGEN_ALIGN16 int aux[4]; + pstore(aux, a); + return aux[0] * aux[1] * aux[2] * aux[3]; +} + +// min +template<> EIGEN_STRONG_INLINE float predux_min(const Packet4f& a) +{ + Packet4f b, res; + b = vec_min(a, vec_sld(a, a, 8)); + res = vec_min(b, vec_sld(b, b, 4)); + return pfirst(res); +} + +template<> EIGEN_STRONG_INLINE int predux_min(const Packet4i& a) +{ + Packet4i b, res; + b = vec_min(a, vec_sld(a, a, 8)); + res = vec_min(b, vec_sld(b, b, 4)); + return pfirst(res); +} + +// max +template<> EIGEN_STRONG_INLINE float predux_max(const Packet4f& a) +{ + Packet4f b, res; + b = vec_max(a, vec_sld(a, a, 8)); + res = vec_max(b, vec_sld(b, b, 4)); + return pfirst(res); +} + +template<> EIGEN_STRONG_INLINE int predux_max(const Packet4i& a) +{ + Packet4i b, res; + b = vec_max(a, vec_sld(a, a, 8)); + res = vec_max(b, vec_sld(b, b, 4)); + return pfirst(res); +} + +template +struct palign_impl +{ + static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second) + { + if (Offset!=0) + first = vec_sld(first, second, Offset*4); + } +}; + +template +struct palign_impl +{ + static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second) + { + if (Offset!=0) + first = vec_sld(first, second, Offset*4); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PACKET_MATH_ALTIVEC_H diff --git a/tools/Eigen/src/Core/arch/CMakeLists.txt b/tools/Eigen/src/Core/arch/CMakeLists.txt new file mode 100644 index 0000000..8456dec --- /dev/null +++ b/tools/Eigen/src/Core/arch/CMakeLists.txt @@ -0,0 +1,4 @@ +ADD_SUBDIRECTORY(SSE) +ADD_SUBDIRECTORY(AltiVec) +ADD_SUBDIRECTORY(NEON) +ADD_SUBDIRECTORY(Default) diff --git a/tools/Eigen/src/Core/arch/Default/CMakeLists.txt b/tools/Eigen/src/Core/arch/Default/CMakeLists.txt new file mode 100644 index 0000000..339c091 --- /dev/null +++ b/tools/Eigen/src/Core/arch/Default/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_Core_arch_Default_SRCS "*.h") + +INSTALL(FILES + ${Eigen_Core_arch_Default_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/Default COMPONENT Devel +) diff --git a/tools/Eigen/src/Core/arch/Default/Settings.h b/tools/Eigen/src/Core/arch/Default/Settings.h new file mode 100644 index 0000000..097373c --- /dev/null +++ b/tools/Eigen/src/Core/arch/Default/Settings.h @@ -0,0 +1,49 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +/* All the parameters defined in this file can be specialized in the + * architecture specific files, and/or by the user. + * More to come... */ + +#ifndef EIGEN_DEFAULT_SETTINGS_H +#define EIGEN_DEFAULT_SETTINGS_H + +/** Defines the maximal loop size to enable meta unrolling of loops. + * Note that the value here is expressed in Eigen's own notion of "number of FLOPS", + * it does not correspond to the number of iterations or the number of instructions + */ +#ifndef EIGEN_UNROLLING_LIMIT +#define EIGEN_UNROLLING_LIMIT 100 +#endif + +/** Defines the threshold between a "small" and a "large" matrix. + * This threshold is mainly used to select the proper product implementation. + */ +#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD +#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 +#endif + +/** Defines the maximal width of the blocks used in the triangular product and solver + * for vectors (level 2 blas xTRMV and xTRSV). The default is 8. + */ +#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH +#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8 +#endif + + +/** Defines the default number of registers available for that architecture. + * Currently it must be 8 or 16. Other values will fail. + */ +#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS +#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8 +#endif + +#endif // EIGEN_DEFAULT_SETTINGS_H diff --git a/tools/Eigen/src/Core/arch/NEON/CMakeLists.txt b/tools/Eigen/src/Core/arch/NEON/CMakeLists.txt new file mode 100644 index 0000000..fd4d4af --- /dev/null +++ b/tools/Eigen/src/Core/arch/NEON/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_Core_arch_NEON_SRCS "*.h") + +INSTALL(FILES + ${Eigen_Core_arch_NEON_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/NEON COMPONENT Devel +) diff --git a/tools/Eigen/src/Core/arch/NEON/Complex.h b/tools/Eigen/src/Core/arch/NEON/Complex.h new file mode 100644 index 0000000..8d9255e --- /dev/null +++ b/tools/Eigen/src/Core/arch/NEON/Complex.h @@ -0,0 +1,253 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COMPLEX_NEON_H +#define EIGEN_COMPLEX_NEON_H + +namespace Eigen { + +namespace internal { + +static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000); +static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000); + +//---------- float ---------- +struct Packet2cf +{ + EIGEN_STRONG_INLINE Packet2cf() {} + EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {} + Packet4f v; +}; + +template<> struct packet_traits > : default_packet_traits +{ + typedef Packet2cf type; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 2, + + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasNegate = 1, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasSetLinear = 0 + }; +}; + +template<> struct unpacket_traits { typedef std::complex type; enum {size=2}; }; + +template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) +{ + float32x2_t r64; + r64 = vld1_f32((float *)&from); + + return Packet2cf(vcombine_f32(r64, r64)); +} + +template<> EIGEN_STRONG_INLINE Packet2cf padd(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf psub(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) +{ + Packet4ui b = vreinterpretq_u32_f32(a.v); + return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR))); +} + +template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) +{ + Packet4f v1, v2; + + // Get the real values of a | a1_re | a1_re | a2_re | a2_re | + v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0)); + // Get the real values of a | a1_im | a1_im | a2_im | a2_im | + v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1)); + // Multiply the real a with b + v1 = vmulq_f32(v1, b.v); + // Multiply the imag a with b + v2 = vmulq_f32(v2, b.v); + // Conjugate v2 + v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR)); + // Swap real/imag elements in v2. + v2 = vrev64q_f32(v2); + // Add and return the result + return Packet2cf(vaddq_f32(v1, v2)); +} + +template<> EIGEN_STRONG_INLINE Packet2cf pand (const Packet2cf& a, const Packet2cf& b) +{ + return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); +} +template<> EIGEN_STRONG_INLINE Packet2cf por (const Packet2cf& a, const Packet2cf& b) +{ + return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); +} +template<> EIGEN_STRONG_INLINE Packet2cf pxor (const Packet2cf& a, const Packet2cf& b) +{ + return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); +} +template<> EIGEN_STRONG_INLINE Packet2cf pandnot(const Packet2cf& a, const Packet2cf& b) +{ + return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); +} + +template<> EIGEN_STRONG_INLINE Packet2cf pload(const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload((const float*)from)); } +template<> EIGEN_STRONG_INLINE Packet2cf ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu((const float*)from)); } + +template<> EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex* from) { return pset1(*from); } + +template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } +template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } + +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_ARM_PREFETCH((float *)addr); } + +template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet2cf& a) +{ + std::complex EIGEN_ALIGN16 x[2]; + vst1q_f32((float *)x, a.v); + return x[0]; +} + +template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) +{ + float32x2_t a_lo, a_hi; + Packet4f a_r128; + + a_lo = vget_low_f32(a.v); + a_hi = vget_high_f32(a.v); + a_r128 = vcombine_f32(a_hi, a_lo); + + return Packet2cf(a_r128); +} + +template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& a) +{ + return Packet2cf(vrev64q_f32(a.v)); +} + +template<> EIGEN_STRONG_INLINE std::complex predux(const Packet2cf& a) +{ + float32x2_t a1, a2; + std::complex s; + + a1 = vget_low_f32(a.v); + a2 = vget_high_f32(a.v); + a2 = vadd_f32(a1, a2); + vst1_f32((float *)&s, a2); + + return s; +} + +template<> EIGEN_STRONG_INLINE Packet2cf preduxp(const Packet2cf* vecs) +{ + Packet4f sum1, sum2, sum; + + // Add the first two 64-bit float32x2_t of vecs[0] + sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v)); + sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v)); + sum = vaddq_f32(sum1, sum2); + + return Packet2cf(sum); +} + +template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet2cf& a) +{ + float32x2_t a1, a2, v1, v2, prod; + std::complex s; + + a1 = vget_low_f32(a.v); + a2 = vget_high_f32(a.v); + // Get the real values of a | a1_re | a1_re | a2_re | a2_re | + v1 = vdup_lane_f32(a1, 0); + // Get the real values of a | a1_im | a1_im | a2_im | a2_im | + v2 = vdup_lane_f32(a1, 1); + // Multiply the real a with b + v1 = vmul_f32(v1, a2); + // Multiply the imag a with b + v2 = vmul_f32(v2, a2); + // Conjugate v2 + v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR)); + // Swap real/imag elements in v2. + v2 = vrev64_f32(v2); + // Add v1, v2 + prod = vadd_f32(v1, v2); + + vst1_f32((float *)&s, prod); + + return s; +} + +template +struct palign_impl +{ + EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second) + { + if (Offset==1) + { + first.v = vextq_f32(first.v, second.v, 2); + } + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + return internal::pmul(a, pconj(b)); + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + return internal::pmul(pconj(a), b); + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + return pconj(internal::pmul(a, b)); + } +}; + +template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) +{ + // TODO optimize it for AltiVec + Packet2cf res = conj_helper().pmul(a,b); + Packet4f s, rev_s; + + // this computes the norm + s = vmulq_f32(b.v, b.v); + rev_s = vrev64q_f32(s); + + return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s))); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COMPLEX_NEON_H diff --git a/tools/Eigen/src/Core/arch/NEON/PacketMath.h b/tools/Eigen/src/Core/arch/NEON/PacketMath.h new file mode 100644 index 0000000..d49670e --- /dev/null +++ b/tools/Eigen/src/Core/arch/NEON/PacketMath.h @@ -0,0 +1,420 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2010 Konstantinos Margaritis +// Heavily based on Gael's SSE version. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PACKET_MATH_NEON_H +#define EIGEN_PACKET_MATH_NEON_H + +namespace Eigen { + +namespace internal { + +#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD +#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 +#endif + +// FIXME NEON has 16 quad registers, but since the current register allocator +// is so bad, it is much better to reduce it to 8 +#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS +#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8 +#endif + +typedef float32x4_t Packet4f; +typedef int32x4_t Packet4i; +typedef uint32x4_t Packet4ui; + +#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ + const Packet4f p4f_##NAME = pset1(X) + +#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ + const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1(X)) + +#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ + const Packet4i p4i_##NAME = pset1(X) + +#if defined(__llvm__) && !defined(__clang__) + //Special treatment for Apple's llvm-gcc, its NEON packet types are unions + #define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}} + #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}} +#else + //Default initializer for packets + #define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y} + #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W} +#endif + +// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function +// which available on LLVM and GCC (at least) +#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || defined(__GNUC__) + #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR); +#elif defined __pld + #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR) +#elif !defined(__aarch64__) + #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); +#else + // by default no explicit prefetching + #define EIGEN_ARM_PREFETCH(ADDR) +#endif + +template<> struct packet_traits : default_packet_traits +{ + typedef Packet4f type; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 4, + + HasDiv = 1, + // FIXME check the Has* + HasSin = 0, + HasCos = 0, + HasLog = 0, + HasExp = 0, + HasSqrt = 0 + }; +}; +template<> struct packet_traits : default_packet_traits +{ + typedef Packet4i type; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size=4 + // FIXME check the Has* + }; +}; + +#if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__) +// workaround gcc 4.2, 4.3 and 4.4 compilatin issue +EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); } +EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); } +EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); } +EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); } +#endif + +template<> struct unpacket_traits { typedef float type; enum {size=4}; }; +template<> struct unpacket_traits { typedef int type; enum {size=4}; }; + +template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { return vdupq_n_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { return vdupq_n_s32(from); } + +template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) +{ + Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); + return vaddq_f32(pset1(a), countdown); +} +template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) +{ + Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); + return vaddq_s32(pset1(a), countdown); +} + +template<> EIGEN_STRONG_INLINE Packet4f padd(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i padd(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f psub(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i psub(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return vnegq_f32(a); } +template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return vnegq_s32(a); } + +template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; } +template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; } + +template<> EIGEN_STRONG_INLINE Packet4f pmul(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pmul(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f pdiv(const Packet4f& a, const Packet4f& b) +{ + Packet4f inv, restep, div; + + // NEON does not offer a divide instruction, we have to do a reciprocal approximation + // However NEON in contrast to other SIMD engines (AltiVec/SSE), offers + // a reciprocal estimate AND a reciprocal step -which saves a few instructions + // vrecpeq_f32() returns an estimate to 1/b, which we will finetune with + // Newton-Raphson and vrecpsq_f32() + inv = vrecpeq_f32(b); + + // This returns a differential, by which we will have to multiply inv to get a better + // approximation of 1/b. + restep = vrecpsq_f32(b, inv); + inv = vmulq_f32(restep, inv); + + // Finally, multiply a by 1/b and get the wanted result of the division. + div = vmulq_f32(a, inv); + + return div; +} +template<> EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& /*a*/, const Packet4i& /*b*/) +{ eigen_assert(false && "packet integer division are not supported by NEON"); + return pset1(0); +} + +// for some weird raisons, it has to be overloaded for packet of integers +template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return vmlaq_s32(c,a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pmin(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pmax(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); } + +// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics +template<> EIGEN_STRONG_INLINE Packet4f pand(const Packet4f& a, const Packet4f& b) +{ + return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); +} +template<> EIGEN_STRONG_INLINE Packet4i pand(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f por(const Packet4f& a, const Packet4f& b) +{ + return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); +} +template<> EIGEN_STRONG_INLINE Packet4i por(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f pxor(const Packet4f& a, const Packet4f& b) +{ + return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); +} +template<> EIGEN_STRONG_INLINE Packet4i pxor(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, const Packet4f& b) +{ + return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); +} +template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } + +template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); } + +template<> EIGEN_STRONG_INLINE Packet4f ploaddup(const float* from) +{ + float32x2_t lo, hi; + lo = vld1_dup_f32(from); + hi = vld1_dup_f32(from+1); + return vcombine_f32(lo, hi); +} +template<> EIGEN_STRONG_INLINE Packet4i ploaddup(const int* from) +{ + int32x2_t lo, hi; + lo = vld1_dup_s32(from); + hi = vld1_dup_s32(from+1); + return vcombine_s32(lo, hi); +} + +template<> EIGEN_STRONG_INLINE void pstore(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); } +template<> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); } + +template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } +template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } + +template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { EIGEN_ARM_PREFETCH(addr); } +template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { EIGEN_ARM_PREFETCH(addr); } + +// FIXME only store the 2 first elements ? +template<> EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; } +template<> EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; } + +template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { + float32x2_t a_lo, a_hi; + Packet4f a_r64; + + a_r64 = vrev64q_f32(a); + a_lo = vget_low_f32(a_r64); + a_hi = vget_high_f32(a_r64); + return vcombine_f32(a_hi, a_lo); +} +template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { + int32x2_t a_lo, a_hi; + Packet4i a_r64; + + a_r64 = vrev64q_s32(a); + a_lo = vget_low_s32(a_r64); + a_hi = vget_high_s32(a_r64); + return vcombine_s32(a_hi, a_lo); +} +template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); } +template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); } + +template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) +{ + float32x2_t a_lo, a_hi, sum; + + a_lo = vget_low_f32(a); + a_hi = vget_high_f32(a); + sum = vpadd_f32(a_lo, a_hi); + sum = vpadd_f32(sum, sum); + return vget_lane_f32(sum, 0); +} + +template<> EIGEN_STRONG_INLINE Packet4f preduxp(const Packet4f* vecs) +{ + float32x4x2_t vtrn1, vtrn2, res1, res2; + Packet4f sum1, sum2, sum; + + // NEON zip performs interleaving of the supplied vectors. + // We perform two interleaves in a row to acquire the transposed vector + vtrn1 = vzipq_f32(vecs[0], vecs[2]); + vtrn2 = vzipq_f32(vecs[1], vecs[3]); + res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]); + res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]); + + // Do the addition of the resulting vectors + sum1 = vaddq_f32(res1.val[0], res1.val[1]); + sum2 = vaddq_f32(res2.val[0], res2.val[1]); + sum = vaddq_f32(sum1, sum2); + + return sum; +} + +template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) +{ + int32x2_t a_lo, a_hi, sum; + + a_lo = vget_low_s32(a); + a_hi = vget_high_s32(a); + sum = vpadd_s32(a_lo, a_hi); + sum = vpadd_s32(sum, sum); + return vget_lane_s32(sum, 0); +} + +template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) +{ + int32x4x2_t vtrn1, vtrn2, res1, res2; + Packet4i sum1, sum2, sum; + + // NEON zip performs interleaving of the supplied vectors. + // We perform two interleaves in a row to acquire the transposed vector + vtrn1 = vzipq_s32(vecs[0], vecs[2]); + vtrn2 = vzipq_s32(vecs[1], vecs[3]); + res1 = vzipq_s32(vtrn1.val[0], vtrn2.val[0]); + res2 = vzipq_s32(vtrn1.val[1], vtrn2.val[1]); + + // Do the addition of the resulting vectors + sum1 = vaddq_s32(res1.val[0], res1.val[1]); + sum2 = vaddq_s32(res2.val[0], res2.val[1]); + sum = vaddq_s32(sum1, sum2); + + return sum; +} + +// Other reduction functions: +// mul +template<> EIGEN_STRONG_INLINE float predux_mul(const Packet4f& a) +{ + float32x2_t a_lo, a_hi, prod; + + // Get a_lo = |a1|a2| and a_hi = |a3|a4| + a_lo = vget_low_f32(a); + a_hi = vget_high_f32(a); + // Get the product of a_lo * a_hi -> |a1*a3|a2*a4| + prod = vmul_f32(a_lo, a_hi); + // Multiply prod with its swapped value |a2*a4|a1*a3| + prod = vmul_f32(prod, vrev64_f32(prod)); + + return vget_lane_f32(prod, 0); +} +template<> EIGEN_STRONG_INLINE int predux_mul(const Packet4i& a) +{ + int32x2_t a_lo, a_hi, prod; + + // Get a_lo = |a1|a2| and a_hi = |a3|a4| + a_lo = vget_low_s32(a); + a_hi = vget_high_s32(a); + // Get the product of a_lo * a_hi -> |a1*a3|a2*a4| + prod = vmul_s32(a_lo, a_hi); + // Multiply prod with its swapped value |a2*a4|a1*a3| + prod = vmul_s32(prod, vrev64_s32(prod)); + + return vget_lane_s32(prod, 0); +} + +// min +template<> EIGEN_STRONG_INLINE float predux_min(const Packet4f& a) +{ + float32x2_t a_lo, a_hi, min; + + a_lo = vget_low_f32(a); + a_hi = vget_high_f32(a); + min = vpmin_f32(a_lo, a_hi); + min = vpmin_f32(min, min); + + return vget_lane_f32(min, 0); +} + +template<> EIGEN_STRONG_INLINE int predux_min(const Packet4i& a) +{ + int32x2_t a_lo, a_hi, min; + + a_lo = vget_low_s32(a); + a_hi = vget_high_s32(a); + min = vpmin_s32(a_lo, a_hi); + min = vpmin_s32(min, min); + + return vget_lane_s32(min, 0); +} + +// max +template<> EIGEN_STRONG_INLINE float predux_max(const Packet4f& a) +{ + float32x2_t a_lo, a_hi, max; + + a_lo = vget_low_f32(a); + a_hi = vget_high_f32(a); + max = vpmax_f32(a_lo, a_hi); + max = vpmax_f32(max, max); + + return vget_lane_f32(max, 0); +} + +template<> EIGEN_STRONG_INLINE int predux_max(const Packet4i& a) +{ + int32x2_t a_lo, a_hi, max; + + a_lo = vget_low_s32(a); + a_hi = vget_high_s32(a); + max = vpmax_s32(a_lo, a_hi); + max = vpmax_s32(max, max); + + return vget_lane_s32(max, 0); +} + +// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors, +// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074 +#define PALIGN_NEON(Offset,Type,Command) \ +template<>\ +struct palign_impl\ +{\ + EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\ + {\ + if (Offset!=0)\ + first = Command(first, second, Offset);\ + }\ +};\ + +PALIGN_NEON(0,Packet4f,vextq_f32) +PALIGN_NEON(1,Packet4f,vextq_f32) +PALIGN_NEON(2,Packet4f,vextq_f32) +PALIGN_NEON(3,Packet4f,vextq_f32) +PALIGN_NEON(0,Packet4i,vextq_s32) +PALIGN_NEON(1,Packet4i,vextq_s32) +PALIGN_NEON(2,Packet4i,vextq_s32) +PALIGN_NEON(3,Packet4i,vextq_s32) + +#undef PALIGN_NEON + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PACKET_MATH_NEON_H diff --git a/tools/Eigen/src/Core/arch/SSE/CMakeLists.txt b/tools/Eigen/src/Core/arch/SSE/CMakeLists.txt new file mode 100644 index 0000000..46ea7cc --- /dev/null +++ b/tools/Eigen/src/Core/arch/SSE/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_Core_arch_SSE_SRCS "*.h") + +INSTALL(FILES + ${Eigen_Core_arch_SSE_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/SSE COMPONENT Devel +) diff --git a/tools/Eigen/src/Core/arch/SSE/Complex.h b/tools/Eigen/src/Core/arch/SSE/Complex.h new file mode 100644 index 0000000..91bba5e --- /dev/null +++ b/tools/Eigen/src/Core/arch/SSE/Complex.h @@ -0,0 +1,442 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COMPLEX_SSE_H +#define EIGEN_COMPLEX_SSE_H + +namespace Eigen { + +namespace internal { + +//---------- float ---------- +struct Packet2cf +{ + EIGEN_STRONG_INLINE Packet2cf() {} + EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {} + __m128 v; +}; + +template<> struct packet_traits > : default_packet_traits +{ + typedef Packet2cf type; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 2, + + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasNegate = 1, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasSetLinear = 0 + }; +}; + +template<> struct unpacket_traits { typedef std::complex type; enum {size=2}; }; + +template<> EIGEN_STRONG_INLINE Packet2cf padd(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf psub(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) +{ + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000)); + return Packet2cf(_mm_xor_ps(a.v,mask)); +} +template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) +{ + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); + return Packet2cf(_mm_xor_ps(a.v,mask)); +} + +template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) +{ + // TODO optimize it for SSE3 and 4 + #ifdef EIGEN_VECTORIZE_SSE3 + return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v), + _mm_mul_ps(_mm_movehdup_ps(a.v), + vec4f_swizzle1(b.v, 1, 0, 3, 2)))); +// return Packet2cf(_mm_addsub_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), +// _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), +// vec4f_swizzle1(b.v, 1, 0, 3, 2)))); + #else + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000)); + return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), + _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), + vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask))); + #endif +} + +template<> EIGEN_STRONG_INLINE Packet2cf pand (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf por (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pxor (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet2cf pandnot(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); } + +template<> EIGEN_STRONG_INLINE Packet2cf pload (const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload(&numext::real_ref(*from))); } +template<> EIGEN_STRONG_INLINE Packet2cf ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu(&numext::real_ref(*from))); } + +template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) +{ + Packet2cf res; +#if EIGEN_GNUC_AT_MOST(4,2) + // Workaround annoying "may be used uninitialized in this function" warning with gcc 4.2 + res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast(&from)); +#elif EIGEN_GNUC_AT_LEAST(4,6) + // Suppress annoying "may be used uninitialized in this function" warning with gcc >= 4.6 + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wuninitialized" + res.v = _mm_loadl_pi(res.v, (const __m64*)&from); + #pragma GCC diagnostic pop +#else + res.v = _mm_loadl_pi(res.v, (const __m64*)&from); +#endif + return Packet2cf(_mm_movelh_ps(res.v,res.v)); +} + +template<> EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex* from) { return pset1(*from); } + +template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); } +template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); } + +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } + +template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet2cf& a) +{ + #if EIGEN_GNUC_AT_MOST(4,3) + // Workaround gcc 4.2 ICE - this is not performance wise ideal, but who cares... + // This workaround also fix invalid code generation with gcc 4.3 + EIGEN_ALIGN16 std::complex res[2]; + _mm_store_ps((float*)res, a.v); + return res[0]; + #else + std::complex res; + _mm_storel_pi((__m64*)&res, a.v); + return res; + #endif +} + +template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(_mm_castps_pd(a.v)))); } + +template<> EIGEN_STRONG_INLINE std::complex predux(const Packet2cf& a) +{ + return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v)))); +} + +template<> EIGEN_STRONG_INLINE Packet2cf preduxp(const Packet2cf* vecs) +{ + return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v))); +} + +template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet2cf& a) +{ + return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v)))); +} + +template +struct palign_impl +{ + static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second) + { + if (Offset==1) + { + first.v = _mm_movehl_ps(first.v, first.v); + first.v = _mm_movelh_ps(first.v, second.v); + } + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + #ifdef EIGEN_VECTORIZE_SSE3 + return internal::pmul(a, pconj(b)); + #else + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); + return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask), + _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), + vec4f_swizzle1(b.v, 1, 0, 3, 2)))); + #endif + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + #ifdef EIGEN_VECTORIZE_SSE3 + return internal::pmul(pconj(a), b); + #else + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); + return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), + _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), + vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask))); + #endif + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const + { + #ifdef EIGEN_VECTORIZE_SSE3 + return pconj(internal::pmul(a, b)); + #else + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); + return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask), + _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), + vec4f_swizzle1(b.v, 1, 0, 3, 2)))); + #endif + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const + { return padd(c, pmul(x,y)); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const + { return Packet2cf(Eigen::internal::pmul(x, y.v)); } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const + { return padd(c, pmul(x,y)); } + + EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const + { return Packet2cf(Eigen::internal::pmul(x.v, y)); } +}; + +template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) +{ + // TODO optimize it for SSE3 and 4 + Packet2cf res = conj_helper().pmul(a,b); + __m128 s = _mm_mul_ps(b.v,b.v); + return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1))))); +} + +EIGEN_STRONG_INLINE Packet2cf pcplxflip/**/(const Packet2cf& x) +{ + return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2)); +} + + +//---------- double ---------- +struct Packet1cd +{ + EIGEN_STRONG_INLINE Packet1cd() {} + EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {} + __m128d v; +}; + +template<> struct packet_traits > : default_packet_traits +{ + typedef Packet1cd type; + enum { + Vectorizable = 1, + AlignedOnScalar = 0, + size = 1, + + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasNegate = 1, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasSetLinear = 0 + }; +}; + +template<> struct unpacket_traits { typedef std::complex type; enum {size=1}; }; + +template<> EIGEN_STRONG_INLINE Packet1cd padd(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd psub(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(a.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) +{ + const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); + return Packet1cd(_mm_xor_pd(a.v,mask)); +} + +template<> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) +{ + // TODO optimize it for SSE3 and 4 + #ifdef EIGEN_VECTORIZE_SSE3 + return Packet1cd(_mm_addsub_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), + _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), + vec2d_swizzle1(b.v, 1, 0)))); + #else + const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); + return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), + _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), + vec2d_swizzle1(b.v, 1, 0)), mask))); + #endif +} + +template<> EIGEN_STRONG_INLINE Packet1cd pand (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd por (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pxor (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pandnot(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); } + +// FIXME force unaligned load, this is a temporary fix +template<> EIGEN_STRONG_INLINE Packet1cd pload (const std::complex* from) +{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload((const double*)from)); } +template<> EIGEN_STRONG_INLINE Packet1cd ploadu(const std::complex* from) +{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu((const double*)from)); } +template<> EIGEN_STRONG_INLINE Packet1cd pset1(const std::complex& from) +{ /* here we really have to use unaligned loads :( */ return ploadu(&from); } + +template<> EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex* from) { return pset1(*from); } + +// FIXME force unaligned store, this is a temporary fix +template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } +template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } + +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } + +template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet1cd& a) +{ + EIGEN_ALIGN16 double res[2]; + _mm_store_pd(res, a.v); + return std::complex(res[0],res[1]); +} + +template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; } + +template<> EIGEN_STRONG_INLINE std::complex predux(const Packet1cd& a) +{ + return pfirst(a); +} + +template<> EIGEN_STRONG_INLINE Packet1cd preduxp(const Packet1cd* vecs) +{ + return vecs[0]; +} + +template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet1cd& a) +{ + return pfirst(a); +} + +template +struct palign_impl +{ + static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/) + { + // FIXME is it sure we never have to align a Packet1cd? + // Even though a std::complex has 16 bytes, it is not necessarily aligned on a 16 bytes boundary... + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const + { + #ifdef EIGEN_VECTORIZE_SSE3 + return internal::pmul(a, pconj(b)); + #else + const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); + return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask), + _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), + vec2d_swizzle1(b.v, 1, 0)))); + #endif + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const + { + #ifdef EIGEN_VECTORIZE_SSE3 + return internal::pmul(pconj(a), b); + #else + const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); + return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), + _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), + vec2d_swizzle1(b.v, 1, 0)), mask))); + #endif + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const + { + #ifdef EIGEN_VECTORIZE_SSE3 + return pconj(internal::pmul(a, b)); + #else + const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); + return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask), + _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), + vec2d_swizzle1(b.v, 1, 0)))); + #endif + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(c, pmul(x,y)); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const + { return Packet1cd(Eigen::internal::pmul(x, y.v)); } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const + { return padd(c, pmul(x,y)); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const + { return Packet1cd(Eigen::internal::pmul(x.v, y)); } +}; + +template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) +{ + // TODO optimize it for SSE3 and 4 + Packet1cd res = conj_helper().pmul(a,b); + __m128d s = _mm_mul_pd(b.v,b.v); + return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1)))); +} + +EIGEN_STRONG_INLINE Packet1cd pcplxflip/**/(const Packet1cd& x) +{ + return Packet1cd(preverse(x.v)); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COMPLEX_SSE_H From c526a1a840b72c4c9f4f749269d876823d5de8ad Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 14:09:16 +0200 Subject: [PATCH 12/25] add more eigen files --- tools/Eigen/src/Core/Array.h | 323 +++++++++ tools/Eigen/src/Core/ArrayBase.h | 226 +++++++ tools/Eigen/src/Core/ArrayWrapper.h | 264 ++++++++ tools/Eigen/src/Core/Assign.h | 590 +++++++++++++++++ tools/Eigen/src/Core/Assign_MKL.h | 224 +++++++ tools/Eigen/src/Core/BandMatrix.h | 334 ++++++++++ tools/Eigen/src/Core/Block.h | 406 ++++++++++++ tools/Eigen/src/Core/BooleanRedux.h | 154 +++++ tools/Eigen/src/Core/arch/SSE/MathFunctions.h | 475 ++++++++++++++ tools/Eigen/src/Core/arch/SSE/PacketMath.h | 613 ++++++++++++++++++ 10 files changed, 3609 insertions(+) create mode 100644 tools/Eigen/src/Core/Array.h create mode 100644 tools/Eigen/src/Core/ArrayBase.h create mode 100644 tools/Eigen/src/Core/ArrayWrapper.h create mode 100644 tools/Eigen/src/Core/Assign.h create mode 100644 tools/Eigen/src/Core/Assign_MKL.h create mode 100644 tools/Eigen/src/Core/BandMatrix.h create mode 100644 tools/Eigen/src/Core/Block.h create mode 100644 tools/Eigen/src/Core/BooleanRedux.h create mode 100644 tools/Eigen/src/Core/arch/SSE/MathFunctions.h create mode 100644 tools/Eigen/src/Core/arch/SSE/PacketMath.h diff --git a/tools/Eigen/src/Core/Array.h b/tools/Eigen/src/Core/Array.h new file mode 100644 index 0000000..0b9c38c --- /dev/null +++ b/tools/Eigen/src/Core/Array.h @@ -0,0 +1,323 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARRAY_H +#define EIGEN_ARRAY_H + +namespace Eigen { + +/** \class Array + * \ingroup Core_Module + * + * \brief General-purpose arrays with easy API for coefficient-wise operations + * + * The %Array class is very similar to the Matrix class. It provides + * general-purpose one- and two-dimensional arrays. The difference between the + * %Array and the %Matrix class is primarily in the API: the API for the + * %Array class provides easy access to coefficient-wise operations, while the + * API for the %Matrix class provides easy access to linear-algebra + * operations. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN. + * + * \sa \ref TutorialArrayClass, \ref TopicClassHierarchy + */ +namespace internal { +template +struct traits > : traits > +{ + typedef ArrayXpr XprKind; + typedef ArrayBase > XprBase; +}; +} + +template +class Array + : public PlainObjectBase > +{ + public: + + typedef PlainObjectBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Array) + + enum { Options = _Options }; + typedef typename Base::PlainObject PlainObject; + + protected: + template + friend struct internal::conservative_resize_like_impl; + + using Base::m_storage; + + public: + + using Base::base; + using Base::coeff; + using Base::coeffRef; + + /** + * The usage of + * using Base::operator=; + * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped + * the usage of 'using'. This should be done only for operator=. + */ + template + EIGEN_STRONG_INLINE Array& operator=(const EigenBase &other) + { + return Base::operator=(other); + } + + /** Copies the value of the expression \a other into \c *this with automatic resizing. + * + * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), + * it will be initialized. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_STRONG_INLINE Array& operator=(const ArrayBase& other) + { + return Base::_set(other); + } + + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + EIGEN_STRONG_INLINE Array& operator=(const Array& other) + { + return Base::_set(other); + } + + /** Default constructor. + * + * For fixed-size matrices, does nothing. + * + * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix + * is called a null matrix. This constructor is the unique way to create null matrices: resizing + * a matrix to 0 is not supported. + * + * \sa resize(Index,Index) + */ + EIGEN_STRONG_INLINE Array() : Base() + { + Base::_check_template_params(); + EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + // FIXME is it still needed ?? + /** \internal */ + Array(internal::constructor_without_unaligned_array_assert) + : Base(internal::constructor_without_unaligned_array_assert()) + { + Base::_check_template_params(); + EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } +#endif + +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + Array(Array&& other) + : Base(std::move(other)) + { + Base::_check_template_params(); + if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic) + Base::_set_noalias(other); + } + Array& operator=(Array&& other) + { + other.swap(*this); + return *this; + } +#endif + + /** Constructs a vector or row-vector with given dimension. \only_for_vectors + * + * Note that this is only useful for dynamic-size vectors. For fixed-size vectors, + * it is redundant to pass the dimension here, so it makes more sense to use the default + * constructor Matrix() instead. + */ + EIGEN_STRONG_INLINE explicit Array(Index dim) + : Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim) + { + Base::_check_template_params(); + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array) + eigen_assert(dim >= 0); + eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim); + EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1) + { + Base::_check_template_params(); + this->template _init2(val0, val1); + } + #else + /** constructs an uninitialized matrix with \a rows rows and \a cols columns. + * + * This is useful for dynamic-size matrices. For fixed-size matrices, + * it is redundant to pass these parameters, so one should use the default constructor + * Matrix() instead. */ + Array(Index rows, Index cols); + /** constructs an initialized 2D vector with given coefficients */ + Array(const Scalar& val0, const Scalar& val1); + #endif + + /** constructs an initialized 3D vector with given coefficients */ + EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2) + { + Base::_check_template_params(); + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 3) + m_storage.data()[0] = val0; + m_storage.data()[1] = val1; + m_storage.data()[2] = val2; + } + /** constructs an initialized 4D vector with given coefficients */ + EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3) + { + Base::_check_template_params(); + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 4) + m_storage.data()[0] = val0; + m_storage.data()[1] = val1; + m_storage.data()[2] = val2; + m_storage.data()[3] = val3; + } + + explicit Array(const Scalar *data); + + /** Constructor copying the value of the expression \a other */ + template + EIGEN_STRONG_INLINE Array(const ArrayBase& other) + : Base(other.rows() * other.cols(), other.rows(), other.cols()) + { + Base::_check_template_params(); + Base::_set_noalias(other); + } + /** Copy constructor */ + EIGEN_STRONG_INLINE Array(const Array& other) + : Base(other.rows() * other.cols(), other.rows(), other.cols()) + { + Base::_check_template_params(); + Base::_set_noalias(other); + } + /** Copy constructor with in-place evaluation */ + template + EIGEN_STRONG_INLINE Array(const ReturnByValue& other) + { + Base::_check_template_params(); + Base::resize(other.rows(), other.cols()); + other.evalTo(*this); + } + + /** \sa MatrixBase::operator=(const EigenBase&) */ + template + EIGEN_STRONG_INLINE Array(const EigenBase &other) + : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) + { + Base::_check_template_params(); + Base::_resize_to_match(other); + *this = other; + } + + /** Override MatrixBase::swap() since for dynamic-sized matrices of same type it is enough to swap the + * data pointers. + */ + template + void swap(ArrayBase const & other) + { this->_swap(other.derived()); } + + inline Index innerStride() const { return 1; } + inline Index outerStride() const { return this->innerSize(); } + + #ifdef EIGEN_ARRAY_PLUGIN + #include EIGEN_ARRAY_PLUGIN + #endif + + private: + + template + friend struct internal::matrix_swap_impl; +}; + +/** \defgroup arraytypedefs Global array typedefs + * \ingroup Core_Module + * + * Eigen defines several typedef shortcuts for most common 1D and 2D array types. + * + * The general patterns are the following: + * + * \c ArrayRowsColsType where \c Rows and \c Cols can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size, + * and where \c Type can be \c i for integer, \c f for float, \c d for double, \c cf for complex float, \c cd + * for complex double. + * + * For example, \c Array33d is a fixed-size 3x3 array type of doubles, and \c ArrayXXf is a dynamic-size matrix of floats. + * + * There are also \c ArraySizeType which are self-explanatory. For example, \c Array4cf is + * a fixed-size 1D array of 4 complex floats. + * + * \sa class Array + */ + +#define EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \ +/** \ingroup arraytypedefs */ \ +typedef Array Array##SizeSuffix##SizeSuffix##TypeSuffix; \ +/** \ingroup arraytypedefs */ \ +typedef Array Array##SizeSuffix##TypeSuffix; + +#define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \ +/** \ingroup arraytypedefs */ \ +typedef Array Array##Size##X##TypeSuffix; \ +/** \ingroup arraytypedefs */ \ +typedef Array Array##X##Size##TypeSuffix; + +#define EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \ +EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 2, 2) \ +EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 3, 3) \ +EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 4, 4) \ +EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \ +EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \ +EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \ +EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 4) + +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(int, i) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(float, f) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(double, d) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex, cf) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex, cd) + +#undef EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES +#undef EIGEN_MAKE_ARRAY_TYPEDEFS + +#undef EIGEN_MAKE_ARRAY_TYPEDEFS_LARGE + +#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \ +using Eigen::Matrix##SizeSuffix##TypeSuffix; \ +using Eigen::Vector##SizeSuffix##TypeSuffix; \ +using Eigen::RowVector##SizeSuffix##TypeSuffix; + +#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(TypeSuffix) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \ + +#define EIGEN_USING_ARRAY_TYPEDEFS \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(i) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(f) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(d) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cf) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cd) + +} // end namespace Eigen + +#endif // EIGEN_ARRAY_H diff --git a/tools/Eigen/src/Core/ArrayBase.h b/tools/Eigen/src/Core/ArrayBase.h new file mode 100644 index 0000000..33ff553 --- /dev/null +++ b/tools/Eigen/src/Core/ArrayBase.h @@ -0,0 +1,226 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARRAYBASE_H +#define EIGEN_ARRAYBASE_H + +namespace Eigen { + +template class MatrixWrapper; + +/** \class ArrayBase + * \ingroup Core_Module + * + * \brief Base class for all 1D and 2D array, and related expressions + * + * An array is similar to a dense vector or matrix. While matrices are mathematical + * objects with well defined linear algebra operators, an array is just a collection + * of scalar values arranged in a one or two dimensionnal fashion. As the main consequence, + * all operations applied to an array are performed coefficient wise. Furthermore, + * arrays support scalar math functions of the c++ standard library (e.g., std::sin(x)), and convenient + * constructors allowing to easily write generic code working for both scalar values + * and arrays. + * + * This class is the base that is inherited by all array expression types. + * + * \tparam Derived is the derived type, e.g., an array or an expression type. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN. + * + * \sa class MatrixBase, \ref TopicClassHierarchy + */ +template class ArrayBase + : public DenseBase +{ + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** The base class for a given storage type. */ + typedef ArrayBase StorageBaseType; + + typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + + typedef DenseBase Base; + using Base::operator*; + using Base::RowsAtCompileTime; + using Base::ColsAtCompileTime; + using Base::SizeAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxSizeAtCompileTime; + using Base::IsVectorAtCompileTime; + using Base::Flags; + using Base::CoeffReadCost; + + using Base::derived; + using Base::const_cast_derived; + using Base::rows; + using Base::cols; + using Base::size; + using Base::coeff; + using Base::coeffRef; + using Base::lazyAssign; + using Base::operator=; + using Base::operator+=; + using Base::operator-=; + using Base::operator*=; + using Base::operator/=; + + typedef typename Base::CoeffReturnType CoeffReturnType; + +#endif // not EIGEN_PARSED_BY_DOXYGEN + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal the plain matrix type corresponding to this expression. Note that is not necessarily + * exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const + * reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either + * PlainObject or const PlainObject&. + */ + typedef Array::Scalar, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime + > PlainObject; + + + /** \internal Represents a matrix with all coefficients equal to one another*/ + typedef CwiseNullaryOp,Derived> ConstantReturnType; +#endif // not EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase +# include "../plugins/CommonCwiseUnaryOps.h" +# include "../plugins/MatrixCwiseUnaryOps.h" +# include "../plugins/ArrayCwiseUnaryOps.h" +# include "../plugins/CommonCwiseBinaryOps.h" +# include "../plugins/MatrixCwiseBinaryOps.h" +# include "../plugins/ArrayCwiseBinaryOps.h" +# ifdef EIGEN_ARRAYBASE_PLUGIN +# include EIGEN_ARRAYBASE_PLUGIN +# endif +#undef EIGEN_CURRENT_STORAGE_BASE_CLASS + + /** Special case of the template operator=, in order to prevent the compiler + * from generating a default operator= (issue hit with g++ 4.1) + */ + Derived& operator=(const ArrayBase& other) + { + return internal::assign_selector::run(derived(), other.derived()); + } + + Derived& operator+=(const Scalar& scalar) + { return *this = derived() + scalar; } + Derived& operator-=(const Scalar& scalar) + { return *this = derived() - scalar; } + + template + Derived& operator+=(const ArrayBase& other); + template + Derived& operator-=(const ArrayBase& other); + + template + Derived& operator*=(const ArrayBase& other); + + template + Derived& operator/=(const ArrayBase& other); + + public: + ArrayBase& array() { return *this; } + const ArrayBase& array() const { return *this; } + + /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array + * \sa MatrixBase::array() */ + MatrixWrapper matrix() { return derived(); } + const MatrixWrapper matrix() const { return derived(); } + +// template +// inline void evalTo(Dest& dst) const { dst = matrix(); } + + protected: + ArrayBase() : Base() {} + + private: + explicit ArrayBase(Index); + ArrayBase(Index,Index); + template explicit ArrayBase(const ArrayBase&); + protected: + // mixing arrays and matrices is not legal + template Derived& operator+=(const MatrixBase& ) + {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;} + // mixing arrays and matrices is not legal + template Derived& operator-=(const MatrixBase& ) + {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;} +}; + +/** replaces \c *this by \c *this - \a other. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_STRONG_INLINE Derived & +ArrayBase::operator-=(const ArrayBase &other) +{ + SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); + tmp = other.derived(); + return derived(); +} + +/** replaces \c *this by \c *this + \a other. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_STRONG_INLINE Derived & +ArrayBase::operator+=(const ArrayBase& other) +{ + SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); + tmp = other.derived(); + return derived(); +} + +/** replaces \c *this by \c *this * \a other coefficient wise. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_STRONG_INLINE Derived & +ArrayBase::operator*=(const ArrayBase& other) +{ + SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); + tmp = other.derived(); + return derived(); +} + +/** replaces \c *this by \c *this / \a other coefficient wise. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_STRONG_INLINE Derived & +ArrayBase::operator/=(const ArrayBase& other) +{ + SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); + tmp = other.derived(); + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_ARRAYBASE_H diff --git a/tools/Eigen/src/Core/ArrayWrapper.h b/tools/Eigen/src/Core/ArrayWrapper.h new file mode 100644 index 0000000..b4641e2 --- /dev/null +++ b/tools/Eigen/src/Core/ArrayWrapper.h @@ -0,0 +1,264 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARRAYWRAPPER_H +#define EIGEN_ARRAYWRAPPER_H + +namespace Eigen { + +/** \class ArrayWrapper + * \ingroup Core_Module + * + * \brief Expression of a mathematical vector or matrix as an array object + * + * This class is the return type of MatrixBase::array(), and most of the time + * this is the only way it is use. + * + * \sa MatrixBase::array(), class MatrixWrapper + */ + +namespace internal { +template +struct traits > + : public traits::type > +{ + typedef ArrayXpr XprKind; + // Let's remove NestByRefBit + enum { + Flags0 = traits::type >::Flags, + Flags = Flags0 & ~NestByRefBit + }; +}; +} + +template +class ArrayWrapper : public ArrayBase > +{ + public: + typedef ArrayBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper) + + typedef typename internal::conditional< + internal::is_lvalue::value, + Scalar, + const Scalar + >::type ScalarWithConstIfNotLvalue; + + typedef typename internal::nested::type NestedExpressionType; + + inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} + + inline Index rows() const { return m_expression.rows(); } + inline Index cols() const { return m_expression.cols(); } + inline Index outerStride() const { return m_expression.outerStride(); } + inline Index innerStride() const { return m_expression.innerStride(); } + + inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } + inline const Scalar* data() const { return m_expression.data(); } + + inline CoeffReturnType coeff(Index rowId, Index colId) const + { + return m_expression.coeff(rowId, colId); + } + + inline Scalar& coeffRef(Index rowId, Index colId) + { + return m_expression.const_cast_derived().coeffRef(rowId, colId); + } + + inline const Scalar& coeffRef(Index rowId, Index colId) const + { + return m_expression.const_cast_derived().coeffRef(rowId, colId); + } + + inline CoeffReturnType coeff(Index index) const + { + return m_expression.coeff(index); + } + + inline Scalar& coeffRef(Index index) + { + return m_expression.const_cast_derived().coeffRef(index); + } + + inline const Scalar& coeffRef(Index index) const + { + return m_expression.const_cast_derived().coeffRef(index); + } + + template + inline const PacketScalar packet(Index rowId, Index colId) const + { + return m_expression.template packet(rowId, colId); + } + + template + inline void writePacket(Index rowId, Index colId, const PacketScalar& val) + { + m_expression.const_cast_derived().template writePacket(rowId, colId, val); + } + + template + inline const PacketScalar packet(Index index) const + { + return m_expression.template packet(index); + } + + template + inline void writePacket(Index index, const PacketScalar& val) + { + m_expression.const_cast_derived().template writePacket(index, val); + } + + template + inline void evalTo(Dest& dst) const { dst = m_expression; } + + const typename internal::remove_all::type& + nestedExpression() const + { + return m_expression; + } + + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index) */ + void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index,Index)*/ + void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); } + + protected: + NestedExpressionType m_expression; +}; + +/** \class MatrixWrapper + * \ingroup Core_Module + * + * \brief Expression of an array as a mathematical vector or matrix + * + * This class is the return type of ArrayBase::matrix(), and most of the time + * this is the only way it is use. + * + * \sa MatrixBase::matrix(), class ArrayWrapper + */ + +namespace internal { +template +struct traits > + : public traits::type > +{ + typedef MatrixXpr XprKind; + // Let's remove NestByRefBit + enum { + Flags0 = traits::type >::Flags, + Flags = Flags0 & ~NestByRefBit + }; +}; +} + +template +class MatrixWrapper : public MatrixBase > +{ + public: + typedef MatrixBase > Base; + EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper) + + typedef typename internal::conditional< + internal::is_lvalue::value, + Scalar, + const Scalar + >::type ScalarWithConstIfNotLvalue; + + typedef typename internal::nested::type NestedExpressionType; + + inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {} + + inline Index rows() const { return m_expression.rows(); } + inline Index cols() const { return m_expression.cols(); } + inline Index outerStride() const { return m_expression.outerStride(); } + inline Index innerStride() const { return m_expression.innerStride(); } + + inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } + inline const Scalar* data() const { return m_expression.data(); } + + inline CoeffReturnType coeff(Index rowId, Index colId) const + { + return m_expression.coeff(rowId, colId); + } + + inline Scalar& coeffRef(Index rowId, Index colId) + { + return m_expression.const_cast_derived().coeffRef(rowId, colId); + } + + inline const Scalar& coeffRef(Index rowId, Index colId) const + { + return m_expression.derived().coeffRef(rowId, colId); + } + + inline CoeffReturnType coeff(Index index) const + { + return m_expression.coeff(index); + } + + inline Scalar& coeffRef(Index index) + { + return m_expression.const_cast_derived().coeffRef(index); + } + + inline const Scalar& coeffRef(Index index) const + { + return m_expression.const_cast_derived().coeffRef(index); + } + + template + inline const PacketScalar packet(Index rowId, Index colId) const + { + return m_expression.template packet(rowId, colId); + } + + template + inline void writePacket(Index rowId, Index colId, const PacketScalar& val) + { + m_expression.const_cast_derived().template writePacket(rowId, colId, val); + } + + template + inline const PacketScalar packet(Index index) const + { + return m_expression.template packet(index); + } + + template + inline void writePacket(Index index, const PacketScalar& val) + { + m_expression.const_cast_derived().template writePacket(index, val); + } + + const typename internal::remove_all::type& + nestedExpression() const + { + return m_expression; + } + + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index) */ + void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index,Index)*/ + void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); } + + protected: + NestedExpressionType m_expression; +}; + +} // end namespace Eigen + +#endif // EIGEN_ARRAYWRAPPER_H diff --git a/tools/Eigen/src/Core/Assign.h b/tools/Eigen/src/Core/Assign.h new file mode 100644 index 0000000..f481731 --- /dev/null +++ b/tools/Eigen/src/Core/Assign.h @@ -0,0 +1,590 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007 Michael Olbrich +// Copyright (C) 2006-2010 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ASSIGN_H +#define EIGEN_ASSIGN_H + +namespace Eigen { + +namespace internal { + +/*************************************************************************** +* Part 1 : the logic deciding a strategy for traversal and unrolling * +***************************************************************************/ + +template +struct assign_traits +{ +public: + enum { + DstIsAligned = Derived::Flags & AlignedBit, + DstHasDirectAccess = Derived::Flags & DirectAccessBit, + SrcIsAligned = OtherDerived::Flags & AlignedBit, + JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned + }; + +private: + enum { + InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime) + : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime) + : int(Derived::RowsAtCompileTime), + InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime) + : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime) + : int(Derived::MaxRowsAtCompileTime), + MaxSizeAtCompileTime = Derived::SizeAtCompileTime, + PacketSize = packet_traits::size + }; + + enum { + StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)), + MightVectorize = StorageOrdersAgree + && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), + MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 + && int(DstIsAligned) && int(SrcIsAligned), + MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), + MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess + && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), + /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, + so it's only good for large enough sizes. */ + MaySliceVectorize = MightVectorize && DstHasDirectAccess + && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize) + /* slice vectorization can be slow, so we only want it if the slices are big, which is + indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block + in a fixed-size matrix */ + }; + +public: + enum { + Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) + : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) + : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) + : int(MayLinearize) ? int(LinearTraversal) + : int(DefaultTraversal), + Vectorized = int(Traversal) == InnerVectorizedTraversal + || int(Traversal) == LinearVectorizedTraversal + || int(Traversal) == SliceVectorizedTraversal + }; + +private: + enum { + UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), + MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic + && int(OtherDerived::CoeffReadCost) != Dynamic + && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit), + MayUnrollInner = int(InnerSize) != Dynamic + && int(OtherDerived::CoeffReadCost) != Dynamic + && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit) + }; + +public: + enum { + Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) + ? ( + int(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(MayUnrollInner) ? int(InnerUnrolling) + : int(NoUnrolling) + ) + : int(Traversal) == int(LinearVectorizedTraversal) + ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) ) + : int(Traversal) == int(LinearTraversal) + ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) + : int(NoUnrolling) + }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() + { + EIGEN_DEBUG_VAR(DstIsAligned) + EIGEN_DEBUG_VAR(SrcIsAligned) + EIGEN_DEBUG_VAR(JointAlignment) + EIGEN_DEBUG_VAR(InnerSize) + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(PacketSize) + EIGEN_DEBUG_VAR(StorageOrdersAgree) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearize) + EIGEN_DEBUG_VAR(MayInnerVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + EIGEN_DEBUG_VAR(Traversal) + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(MayUnrollCompletely) + EIGEN_DEBUG_VAR(MayUnrollInner) + EIGEN_DEBUG_VAR(Unrolling) + } +#endif +}; + +/*************************************************************************** +* Part 2 : meta-unrollers +***************************************************************************/ + +/************************ +*** Default traversal *** +************************/ + +template +struct assign_DefaultTraversal_CompleteUnrolling +{ + enum { + outer = Index / Derived1::InnerSizeAtCompileTime, + inner = Index % Derived1::InnerSizeAtCompileTime + }; + + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + dst.copyCoeffByOuterInner(outer, inner, src); + assign_DefaultTraversal_CompleteUnrolling::run(dst, src); + } +}; + +template +struct assign_DefaultTraversal_CompleteUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} +}; + +template +struct assign_DefaultTraversal_InnerUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer) + { + dst.copyCoeffByOuterInner(outer, Index, src); + assign_DefaultTraversal_InnerUnrolling::run(dst, src, outer); + } +}; + +template +struct assign_DefaultTraversal_InnerUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {} +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct assign_LinearTraversal_CompleteUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + dst.copyCoeff(Index, src); + assign_LinearTraversal_CompleteUnrolling::run(dst, src); + } +}; + +template +struct assign_LinearTraversal_CompleteUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct assign_innervec_CompleteUnrolling +{ + enum { + outer = Index / Derived1::InnerSizeAtCompileTime, + inner = Index % Derived1::InnerSizeAtCompileTime, + JointAlignment = assign_traits::JointAlignment + }; + + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + dst.template copyPacketByOuterInner(outer, inner, src); + assign_innervec_CompleteUnrolling::size, Stop>::run(dst, src); + } +}; + +template +struct assign_innervec_CompleteUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} +}; + +template +struct assign_innervec_InnerUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer) + { + dst.template copyPacketByOuterInner(outer, Index, src); + assign_innervec_InnerUnrolling::size, Stop>::run(dst, src, outer); + } +}; + +template +struct assign_innervec_InnerUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {} +}; + +/*************************************************************************** +* Part 3 : implementation of all cases +***************************************************************************/ + +template::Traversal, + int Unrolling = assign_traits::Unrolling, + int Version = Specialized> +struct assign_impl; + +/************************ +*** Default traversal *** +************************/ + +template +struct assign_impl +{ + static inline void run(Derived1 &, const Derived2 &) { } +}; + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static inline void run(Derived1 &dst, const Derived2 &src) + { + const Index innerSize = dst.innerSize(); + const Index outerSize = dst.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + for(Index inner = 0; inner < innerSize; ++inner) + dst.copyCoeffByOuterInner(outer, inner, src); + } +}; + +template +struct assign_impl +{ + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + assign_DefaultTraversal_CompleteUnrolling + ::run(dst, src); + } +}; + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + const Index outerSize = dst.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + assign_DefaultTraversal_InnerUnrolling + ::run(dst, src, outer); + } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static inline void run(Derived1 &dst, const Derived2 &src) + { + const Index size = dst.size(); + for(Index i = 0; i < size; ++i) + dst.copyCoeff(i, src); + } +}; + +template +struct assign_impl +{ + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + assign_LinearTraversal_CompleteUnrolling + ::run(dst, src); + } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static inline void run(Derived1 &dst, const Derived2 &src) + { + const Index innerSize = dst.innerSize(); + const Index outerSize = dst.outerSize(); + const Index packetSize = packet_traits::size; + for(Index outer = 0; outer < outerSize; ++outer) + for(Index inner = 0; inner < innerSize; inner+=packetSize) + dst.template copyPacketByOuterInner(outer, inner, src); + } +}; + +template +struct assign_impl +{ + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + assign_innervec_CompleteUnrolling + ::run(dst, src); + } +}; + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + const Index outerSize = dst.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + assign_innervec_InnerUnrolling + ::run(dst, src, outer); + } +}; + +/*************************** +*** Linear vectorization *** +***************************/ + +template +struct unaligned_assign_impl +{ + template + static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {} +}; + +template <> +struct unaligned_assign_impl +{ + // MSVC must not inline this functions. If it does, it fails to optimize the + // packet access path. +#ifdef _MSC_VER + template + static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end) +#else + template + static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end) +#endif + { + for (typename Derived::Index index = start; index < end; ++index) + dst.copyCoeff(index, src); + } +}; + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + const Index size = dst.size(); + typedef packet_traits PacketTraits; + enum { + packetSize = PacketTraits::size, + dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(assign_traits::DstIsAligned) , + srcAlignment = assign_traits::JointAlignment + }; + const Index alignedStart = assign_traits::DstIsAligned ? 0 + : internal::first_aligned(&dst.coeffRef(0), size); + const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; + + unaligned_assign_impl::DstIsAligned!=0>::run(src,dst,0,alignedStart); + + for(Index index = alignedStart; index < alignedEnd; index += packetSize) + { + dst.template copyPacket(index, src); + } + + unaligned_assign_impl<>::run(src,dst,alignedEnd,size); + } +}; + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + enum { size = Derived1::SizeAtCompileTime, + packetSize = packet_traits::size, + alignedSize = (size/packetSize)*packetSize }; + + assign_innervec_CompleteUnrolling::run(dst, src); + assign_DefaultTraversal_CompleteUnrolling::run(dst, src); + } +}; + +/************************** +*** Slice vectorization *** +***************************/ + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static inline void run(Derived1 &dst, const Derived2 &src) + { + typedef typename Derived1::Scalar Scalar; + typedef packet_traits PacketTraits; + enum { + packetSize = PacketTraits::size, + alignable = PacketTraits::AlignedOnScalar, + dstIsAligned = assign_traits::DstIsAligned, + dstAlignment = alignable ? Aligned : int(dstIsAligned), + srcAlignment = assign_traits::JointAlignment + }; + const Scalar *dst_ptr = &dst.coeffRef(0,0); + if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0) + { + // the pointer is not aligend-on scalar, so alignment is not possible + return assign_impl::run(dst, src); + } + const Index packetAlignedMask = packetSize - 1; + const Index innerSize = dst.innerSize(); + const Index outerSize = dst.outerSize(); + const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0; + Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize); + + for(Index outer = 0; outer < outerSize; ++outer) + { + const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); + // do the non-vectorizable part of the assignment + for(Index inner = 0; inner(outer, inner, src); + + // do the non-vectorizable part of the assignment + for(Index inner = alignedEnd; inner((alignedStart+alignedStep)%packetSize, innerSize); + } + } +}; + +} // end namespace internal + +/*************************************************************************** +* Part 4 : implementation of DenseBase methods +***************************************************************************/ + +template +template +EIGEN_STRONG_INLINE Derived& DenseBase + ::lazyAssign(const DenseBase& other) +{ + enum{ + SameType = internal::is_same::value + }; + + EIGEN_STATIC_ASSERT_LVALUE(Derived) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived) + EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + +#ifdef EIGEN_DEBUG_ASSIGN + internal::assign_traits::debug(); +#endif + eigen_assert(rows() == other.rows() && cols() == other.cols()); + internal::assign_impl::Traversal) + : int(InvalidTraversal)>::run(derived(),other.derived()); +#ifndef EIGEN_NO_DEBUG + checkTransposeAliasing(other.derived()); +#endif + return derived(); +} + +namespace internal { + +template::Flags) & EvalBeforeAssigningBit) != 0, + bool NeedToTranspose = ((int(Derived::RowsAtCompileTime) == 1 && int(OtherDerived::ColsAtCompileTime) == 1) + | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&". + // revert to || as soon as not needed anymore. + (int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1)) + && int(Derived::SizeAtCompileTime) != 1> +struct assign_selector; + +template +struct assign_selector { + static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); } + template + static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; } +}; +template +struct assign_selector { + static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); } +}; +template +struct assign_selector { + static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); } + template + static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose dstTrans(dst); other.evalTo(dstTrans); return dst; } +}; +template +struct assign_selector { + static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); } +}; + +} // end namespace internal + +template +template +EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) +{ + return internal::assign_selector::run(derived(), other.derived()); +} + +template +EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) +{ + return internal::assign_selector::run(derived(), other.derived()); +} + +template +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const MatrixBase& other) +{ + return internal::assign_selector::run(derived(), other.derived()); +} + +template +template +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const DenseBase& other) +{ + return internal::assign_selector::run(derived(), other.derived()); +} + +template +template +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const EigenBase& other) +{ + return internal::assign_selector::evalTo(derived(), other.derived()); +} + +template +template +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const ReturnByValue& other) +{ + return internal::assign_selector::evalTo(derived(), other.derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_ASSIGN_H diff --git a/tools/Eigen/src/Core/Assign_MKL.h b/tools/Eigen/src/Core/Assign_MKL.h new file mode 100644 index 0000000..7772951 --- /dev/null +++ b/tools/Eigen/src/Core/Assign_MKL.h @@ -0,0 +1,224 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin() + ******************************************************************************** +*/ + +#ifndef EIGEN_ASSIGN_VML_H +#define EIGEN_ASSIGN_VML_H + +namespace Eigen { + +namespace internal { + +template struct vml_call +{ enum { IsSupported = 0 }; }; + +template +class vml_assign_traits +{ + private: + enum { + DstHasDirectAccess = Dst::Flags & DirectAccessBit, + SrcHasDirectAccess = Src::Flags & DirectAccessBit, + + StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), + InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) + : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) + : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) + : int(Dst::MaxRowsAtCompileTime), + MaxSizeAtCompileTime = Dst::SizeAtCompileTime, + + MightEnableVml = vml_call::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess + && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, + MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), + VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, + LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD, + MayEnableVml = MightEnableVml && LargeEnough, + MayLinearize = MayEnableVml && MightLinearize + }; + public: + enum { + Traversal = MayLinearize ? LinearVectorizedTraversal + : MayEnableVml ? InnerVectorizedTraversal + : DefaultTraversal + }; +}; + +template::Traversal > +struct vml_assign_impl + : assign_impl,Traversal,Unrolling,BuiltIn> +{ +}; + +template +struct vml_assign_impl +{ + typedef typename Derived1::Scalar Scalar; + typedef typename Derived1::Index Index; + static inline void run(Derived1& dst, const CwiseUnaryOp& src) + { + // in case we want to (or have to) skip VML at runtime we can call: + // assign_impl,Traversal,Unrolling,BuiltIn>::run(dst,src); + const Index innerSize = dst.innerSize(); + const Index outerSize = dst.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) { + const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : + &(src.nestedExpression().coeffRef(0, outer)); + Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); + vml_call::run(src.functor(), innerSize, src_ptr, dst_ptr ); + } + } +}; + +template +struct vml_assign_impl +{ + static inline void run(Derived1& dst, const CwiseUnaryOp& src) + { + // in case we want to (or have to) skip VML at runtime we can call: + // assign_impl,Traversal,Unrolling,BuiltIn>::run(dst,src); + vml_call::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() ); + } +}; + +// Macroses + +#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \ + template \ + struct assign_impl, TRAVERSAL, UNROLLING, Specialized> { \ + static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp &src) { \ + vml_assign_impl::run(dst, src); \ + } \ + }; + +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling) +EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling) + + +#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) +#define EIGEN_MKL_VML_MODE VML_HA +#else +#define EIGEN_MKL_VML_MODE VML_LA +#endif + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ + template<> struct vml_call< scalar_##EIGENOP##_op > { \ + enum { IsSupported = 1 }; \ + static inline void run( const scalar_##EIGENOP##_op& /*func*/, \ + int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ + VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \ + } \ + }; + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ + template<> struct vml_call< scalar_##EIGENOP##_op > { \ + enum { IsSupported = 1 }; \ + static inline void run( const scalar_##EIGENOP##_op& /*func*/, \ + int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ + MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ + VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \ + } \ + }; + +#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ + template<> struct vml_call< scalar_##EIGENOP##_op > { \ + enum { IsSupported = 1 }; \ + static inline void run( const scalar_##EIGENOP##_op& func, \ + int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ + EIGENTYPE exponent = func.m_exponent; \ + MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ + VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \ + (VMLTYPE*)dst, &vmlMode); \ + } \ + }; + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) + + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) + + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan) +//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt) + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr) + +// The vm*powx functions are not avaibale in the windows version of MKL. +#ifndef _WIN32 +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16) +#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_ASSIGN_VML_H diff --git a/tools/Eigen/src/Core/BandMatrix.h b/tools/Eigen/src/Core/BandMatrix.h new file mode 100644 index 0000000..ffd7fe8 --- /dev/null +++ b/tools/Eigen/src/Core/BandMatrix.h @@ -0,0 +1,334 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BANDMATRIX_H +#define EIGEN_BANDMATRIX_H + +namespace Eigen { + +namespace internal { + +template +class BandMatrixBase : public EigenBase +{ + public: + + enum { + Flags = internal::traits::Flags, + CoeffReadCost = internal::traits::CoeffReadCost, + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + MaxRowsAtCompileTime = internal::traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = internal::traits::MaxColsAtCompileTime, + Supers = internal::traits::Supers, + Subs = internal::traits::Subs, + Options = internal::traits::Options + }; + typedef typename internal::traits::Scalar Scalar; + typedef Matrix DenseMatrixType; + typedef typename DenseMatrixType::Index Index; + typedef typename internal::traits::CoefficientsType CoefficientsType; + typedef EigenBase Base; + + protected: + enum { + DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) + ? 1 + Supers + Subs + : Dynamic, + SizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime) + }; + + public: + + using Base::derived; + using Base::rows; + using Base::cols; + + /** \returns the number of super diagonals */ + inline Index supers() const { return derived().supers(); } + + /** \returns the number of sub diagonals */ + inline Index subs() const { return derived().subs(); } + + /** \returns an expression of the underlying coefficient matrix */ + inline const CoefficientsType& coeffs() const { return derived().coeffs(); } + + /** \returns an expression of the underlying coefficient matrix */ + inline CoefficientsType& coeffs() { return derived().coeffs(); } + + /** \returns a vector expression of the \a i -th column, + * only the meaningful part is returned. + * \warning the internal storage must be column major. */ + inline Block col(Index i) + { + EIGEN_STATIC_ASSERT((Options&RowMajor)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + Index start = 0; + Index len = coeffs().rows(); + if (i<=supers()) + { + start = supers()-i; + len = (std::min)(rows(),std::max(0,coeffs().rows() - (supers()-i))); + } + else if (i>=rows()-subs()) + len = std::max(0,coeffs().rows() - (i + 1 - rows() + subs())); + return Block(coeffs(), start, i, len, 1); + } + + /** \returns a vector expression of the main diagonal */ + inline Block diagonal() + { return Block(coeffs(),supers(),0,1,(std::min)(rows(),cols())); } + + /** \returns a vector expression of the main diagonal (const version) */ + inline const Block diagonal() const + { return Block(coeffs(),supers(),0,1,(std::min)(rows(),cols())); } + + template struct DiagonalIntReturnType { + enum { + ReturnOpposite = (Options&SelfAdjoint) && (((Index)>0 && Supers==0) || ((Index)<0 && Subs==0)), + Conjugate = ReturnOpposite && NumTraits::IsComplex, + ActualIndex = ReturnOpposite ? -Index : Index, + DiagonalSize = (RowsAtCompileTime==Dynamic || ColsAtCompileTime==Dynamic) + ? Dynamic + : (ActualIndex<0 + ? EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime, RowsAtCompileTime + ActualIndex) + : EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime - ActualIndex)) + }; + typedef Block BuildType; + typedef typename internal::conditional,BuildType >, + BuildType>::type Type; + }; + + /** \returns a vector expression of the \a N -th sub or super diagonal */ + template inline typename DiagonalIntReturnType::Type diagonal() + { + return typename DiagonalIntReturnType::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N)); + } + + /** \returns a vector expression of the \a N -th sub or super diagonal */ + template inline const typename DiagonalIntReturnType::Type diagonal() const + { + return typename DiagonalIntReturnType::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N)); + } + + /** \returns a vector expression of the \a i -th sub or super diagonal */ + inline Block diagonal(Index i) + { + eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers())); + return Block(coeffs(), supers()-i, std::max(0,i), 1, diagonalLength(i)); + } + + /** \returns a vector expression of the \a i -th sub or super diagonal */ + inline const Block diagonal(Index i) const + { + eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers())); + return Block(coeffs(), supers()-i, std::max(0,i), 1, diagonalLength(i)); + } + + template inline void evalTo(Dest& dst) const + { + dst.resize(rows(),cols()); + dst.setZero(); + dst.diagonal() = diagonal(); + for (Index i=1; i<=supers();++i) + dst.diagonal(i) = diagonal(i); + for (Index i=1; i<=subs();++i) + dst.diagonal(-i) = diagonal(-i); + } + + DenseMatrixType toDenseMatrix() const + { + DenseMatrixType res(rows(),cols()); + evalTo(res); + return res; + } + + protected: + + inline Index diagonalLength(Index i) const + { return i<0 ? (std::min)(cols(),rows()+i) : (std::min)(rows(),cols()-i); } +}; + +/** + * \class BandMatrix + * \ingroup Core_Module + * + * \brief Represents a rectangular matrix with a banded storage + * + * \param _Scalar Numeric type, i.e. float, double, int + * \param Rows Number of rows, or \b Dynamic + * \param Cols Number of columns, or \b Dynamic + * \param Supers Number of super diagonal + * \param Subs Number of sub diagonal + * \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint + * The former controls \ref TopicStorageOrders "storage order", and defaults to + * column-major. The latter controls whether the matrix represents a selfadjoint + * matrix in which case either Supers of Subs have to be null. + * + * \sa class TridiagonalMatrix + */ + +template +struct traits > +{ + typedef _Scalar Scalar; + typedef Dense StorageKind; + typedef DenseIndex Index; + enum { + CoeffReadCost = NumTraits::ReadCost, + RowsAtCompileTime = _Rows, + ColsAtCompileTime = _Cols, + MaxRowsAtCompileTime = _Rows, + MaxColsAtCompileTime = _Cols, + Flags = LvalueBit, + Supers = _Supers, + Subs = _Subs, + Options = _Options, + DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic + }; + typedef Matrix CoefficientsType; +}; + +template +class BandMatrix : public BandMatrixBase > +{ + public: + + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::Index Index; + typedef typename internal::traits::CoefficientsType CoefficientsType; + + inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs) + : m_coeffs(1+supers+subs,cols), + m_rows(rows), m_supers(supers), m_subs(subs) + { + } + + /** \returns the number of columns */ + inline Index rows() const { return m_rows.value(); } + + /** \returns the number of rows */ + inline Index cols() const { return m_coeffs.cols(); } + + /** \returns the number of super diagonals */ + inline Index supers() const { return m_supers.value(); } + + /** \returns the number of sub diagonals */ + inline Index subs() const { return m_subs.value(); } + + inline const CoefficientsType& coeffs() const { return m_coeffs; } + inline CoefficientsType& coeffs() { return m_coeffs; } + + protected: + + CoefficientsType m_coeffs; + internal::variable_if_dynamic m_rows; + internal::variable_if_dynamic m_supers; + internal::variable_if_dynamic m_subs; +}; + +template +class BandMatrixWrapper; + +template +struct traits > +{ + typedef typename _CoefficientsType::Scalar Scalar; + typedef typename _CoefficientsType::StorageKind StorageKind; + typedef typename _CoefficientsType::Index Index; + enum { + CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost, + RowsAtCompileTime = _Rows, + ColsAtCompileTime = _Cols, + MaxRowsAtCompileTime = _Rows, + MaxColsAtCompileTime = _Cols, + Flags = LvalueBit, + Supers = _Supers, + Subs = _Subs, + Options = _Options, + DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic + }; + typedef _CoefficientsType CoefficientsType; +}; + +template +class BandMatrixWrapper : public BandMatrixBase > +{ + public: + + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::CoefficientsType CoefficientsType; + typedef typename internal::traits::Index Index; + + inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs) + : m_coeffs(coeffs), + m_rows(rows), m_supers(supers), m_subs(subs) + { + EIGEN_UNUSED_VARIABLE(cols); + //internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows()); + } + + /** \returns the number of columns */ + inline Index rows() const { return m_rows.value(); } + + /** \returns the number of rows */ + inline Index cols() const { return m_coeffs.cols(); } + + /** \returns the number of super diagonals */ + inline Index supers() const { return m_supers.value(); } + + /** \returns the number of sub diagonals */ + inline Index subs() const { return m_subs.value(); } + + inline const CoefficientsType& coeffs() const { return m_coeffs; } + + protected: + + const CoefficientsType& m_coeffs; + internal::variable_if_dynamic m_rows; + internal::variable_if_dynamic m_supers; + internal::variable_if_dynamic m_subs; +}; + +/** + * \class TridiagonalMatrix + * \ingroup Core_Module + * + * \brief Represents a tridiagonal matrix with a compact banded storage + * + * \param _Scalar Numeric type, i.e. float, double, int + * \param Size Number of rows and cols, or \b Dynamic + * \param _Options Can be 0 or \b SelfAdjoint + * + * \sa class BandMatrix + */ +template +class TridiagonalMatrix : public BandMatrix +{ + typedef BandMatrix Base; + typedef typename Base::Index Index; + public: + TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {} + + inline typename Base::template DiagonalIntReturnType<1>::Type super() + { return Base::template diagonal<1>(); } + inline const typename Base::template DiagonalIntReturnType<1>::Type super() const + { return Base::template diagonal<1>(); } + inline typename Base::template DiagonalIntReturnType<-1>::Type sub() + { return Base::template diagonal<-1>(); } + inline const typename Base::template DiagonalIntReturnType<-1>::Type sub() const + { return Base::template diagonal<-1>(); } + protected: +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_BANDMATRIX_H diff --git a/tools/Eigen/src/Core/Block.h b/tools/Eigen/src/Core/Block.h new file mode 100644 index 0000000..8278944 --- /dev/null +++ b/tools/Eigen/src/Core/Block.h @@ -0,0 +1,406 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BLOCK_H +#define EIGEN_BLOCK_H + +namespace Eigen { + +/** \class Block + * \ingroup Core_Module + * + * \brief Expression of a fixed-size or dynamic-size block + * + * \param XprType the type of the expression in which we are taking a block + * \param BlockRows the number of rows of the block we are taking at compile time (optional) + * \param BlockCols the number of columns of the block we are taking at compile time (optional) + * + * This class represents an expression of either a fixed-size or dynamic-size block. It is the return + * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block(Index,Index) and + * most of the time this is the only way it is used. + * + * However, if you want to directly maniputate block expressions, + * for instance if you want to write a function returning such an expression, you + * will need to use this class. + * + * Here is an example illustrating the dynamic case: + * \include class_Block.cpp + * Output: \verbinclude class_Block.out + * + * \note Even though this expression has dynamic size, in the case where \a XprType + * has fixed size, this expression inherits a fixed maximal size which means that evaluating + * it does not cause a dynamic memory allocation. + * + * Here is an example illustrating the fixed-size case: + * \include class_FixedBlock.cpp + * Output: \verbinclude class_FixedBlock.out + * + * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock + */ + +namespace internal { +template +struct traits > : traits +{ + typedef typename traits::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename nested::type XprTypeNested; + typedef typename remove_reference::type _XprTypeNested; + enum{ + MatrixRows = traits::RowsAtCompileTime, + MatrixCols = traits::ColsAtCompileTime, + RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows, + ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols, + MaxRowsAtCompileTime = BlockRows==0 ? 0 + : RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime) + : int(traits::MaxRowsAtCompileTime), + MaxColsAtCompileTime = BlockCols==0 ? 0 + : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) + : int(traits::MaxColsAtCompileTime), + XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0, + IsDense = is_same::value, + IsRowMajor = (IsDense&&MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 + : (IsDense&&MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 + : XprTypeIsRowMajor, + HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor), + InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsXprType + ? int(inner_stride_at_compile_time::ret) + : int(outer_stride_at_compile_time::ret), + OuterStrideAtCompileTime = HasSameStorageOrderAsXprType + ? int(outer_stride_at_compile_time::ret) + : int(inner_stride_at_compile_time::ret), + MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits::size) == 0) + && (InnerStrideAtCompileTime == 1) + ? PacketAccessBit : 0, + MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0, + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (traits::Flags&LinearAccessBit))) ? LinearAccessBit : 0, + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, + Flags0 = traits::Flags & ( (HereditaryBits & ~RowMajorBit) | + DirectAccessBit | + MaskPacketAccessBit | + MaskAlignedBit), + Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit + }; +}; + +template::ret> class BlockImpl_dense; + +} // end namespace internal + +template class BlockImpl; + +template class Block + : public BlockImpl::StorageKind> +{ + typedef BlockImpl::StorageKind> Impl; + public: + //typedef typename Impl::Base Base; + typedef Impl Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(Block) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block) + + /** Column or Row constructor + */ + inline Block(XprType& xpr, Index i) : Impl(xpr,i) + { + eigen_assert( (i>=0) && ( + ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i= 0 && BlockRows >= 1 && a_startRow + BlockRows <= xpr.rows() + && a_startCol >= 0 && BlockCols >= 1 && a_startCol + BlockCols <= xpr.cols()); + } + + /** Dynamic-size constructor + */ + inline Block(XprType& xpr, + Index a_startRow, Index a_startCol, + Index blockRows, Index blockCols) + : Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) + { + eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows) + && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols)); + eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow <= xpr.rows() - blockRows + && a_startCol >= 0 && blockCols >= 0 && a_startCol <= xpr.cols() - blockCols); + } +}; + +// The generic default implementation for dense block simplu forward to the internal::BlockImpl_dense +// that must be specialized for direct and non-direct access... +template +class BlockImpl + : public internal::BlockImpl_dense +{ + typedef internal::BlockImpl_dense Impl; + typedef typename XprType::Index Index; + public: + typedef Impl Base; + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) + inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {} + inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {} + inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol, Index blockRows, Index blockCols) + : Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) {} +}; + +namespace internal { + +/** \internal Internal implementation of dense Blocks in the general case. */ +template class BlockImpl_dense + : public internal::dense_xpr_base >::type +{ + typedef Block BlockType; + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(BlockType) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense) + + class InnerIterator; + + /** Column or Row constructor + */ + inline BlockImpl_dense(XprType& xpr, Index i) + : m_xpr(xpr), + // It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime, + // and it is a column if and only if BlockRows==XprType::RowsAtCompileTime and BlockCols==1, + // all other cases are invalid. + // The case a 1x1 matrix seems ambiguous, but the result is the same anyway. + m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0), + m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0), + m_blockRows(BlockRows==1 ? 1 : xpr.rows()), + m_blockCols(BlockCols==1 ? 1 : xpr.cols()) + {} + + /** Fixed-size constructor + */ + inline BlockImpl_dense(XprType& xpr, Index a_startRow, Index a_startCol) + : m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol), + m_blockRows(BlockRows), m_blockCols(BlockCols) + {} + + /** Dynamic-size constructor + */ + inline BlockImpl_dense(XprType& xpr, + Index a_startRow, Index a_startCol, + Index blockRows, Index blockCols) + : m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol), + m_blockRows(blockRows), m_blockCols(blockCols) + {} + + inline Index rows() const { return m_blockRows.value(); } + inline Index cols() const { return m_blockCols.value(); } + + inline Scalar& coeffRef(Index rowId, Index colId) + { + EIGEN_STATIC_ASSERT_LVALUE(XprType) + return m_xpr.const_cast_derived() + .coeffRef(rowId + m_startRow.value(), colId + m_startCol.value()); + } + + inline const Scalar& coeffRef(Index rowId, Index colId) const + { + return m_xpr.derived() + .coeffRef(rowId + m_startRow.value(), colId + m_startCol.value()); + } + + EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const + { + return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value()); + } + + inline Scalar& coeffRef(Index index) + { + EIGEN_STATIC_ASSERT_LVALUE(XprType) + return m_xpr.const_cast_derived() + .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + inline const Scalar& coeffRef(Index index) const + { + return m_xpr.const_cast_derived() + .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + inline const CoeffReturnType coeff(Index index) const + { + return m_xpr + .coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + template + inline PacketScalar packet(Index rowId, Index colId) const + { + return m_xpr.template packet + (rowId + m_startRow.value(), colId + m_startCol.value()); + } + + template + inline void writePacket(Index rowId, Index colId, const PacketScalar& val) + { + m_xpr.const_cast_derived().template writePacket + (rowId + m_startRow.value(), colId + m_startCol.value(), val); + } + + template + inline PacketScalar packet(Index index) const + { + return m_xpr.template packet + (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + template + inline void writePacket(Index index, const PacketScalar& val) + { + m_xpr.const_cast_derived().template writePacket + (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val); + } + + #ifdef EIGEN_PARSED_BY_DOXYGEN + /** \sa MapBase::data() */ + inline const Scalar* data() const; + inline Index innerStride() const; + inline Index outerStride() const; + #endif + + const typename internal::remove_all::type& nestedExpression() const + { + return m_xpr; + } + + Index startRow() const + { + return m_startRow.value(); + } + + Index startCol() const + { + return m_startCol.value(); + } + + protected: + + const typename XprType::Nested m_xpr; + const internal::variable_if_dynamic m_startRow; + const internal::variable_if_dynamic m_startCol; + const internal::variable_if_dynamic m_blockRows; + const internal::variable_if_dynamic m_blockCols; +}; + +/** \internal Internal implementation of dense Blocks in the direct access case.*/ +template +class BlockImpl_dense + : public MapBase > +{ + typedef Block BlockType; + public: + + typedef MapBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(BlockType) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense) + + /** Column or Row constructor + */ + inline BlockImpl_dense(XprType& xpr, Index i) + : Base(internal::const_cast_ptr(&xpr.coeffRef( + (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0, + (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)), + BlockRows==1 ? 1 : xpr.rows(), + BlockCols==1 ? 1 : xpr.cols()), + m_xpr(xpr) + { + init(); + } + + /** Fixed-size constructor + */ + inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) + : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr) + { + init(); + } + + /** Dynamic-size constructor + */ + inline BlockImpl_dense(XprType& xpr, + Index startRow, Index startCol, + Index blockRows, Index blockCols) + : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol)), blockRows, blockCols), + m_xpr(xpr) + { + init(); + } + + const typename internal::remove_all::type& nestedExpression() const + { + return m_xpr; + } + + /** \sa MapBase::innerStride() */ + inline Index innerStride() const + { + return internal::traits::HasSameStorageOrderAsXprType + ? m_xpr.innerStride() + : m_xpr.outerStride(); + } + + /** \sa MapBase::outerStride() */ + inline Index outerStride() const + { + return m_outerStride; + } + + #ifndef __SUNPRO_CC + // FIXME sunstudio is not friendly with the above friend... + // META-FIXME there is no 'friend' keyword around here. Is this obsolete? + protected: + #endif + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal used by allowAligned() */ + inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols) + : Base(data, blockRows, blockCols), m_xpr(xpr) + { + init(); + } + #endif + + protected: + void init() + { + m_outerStride = internal::traits::HasSameStorageOrderAsXprType + ? m_xpr.outerStride() + : m_xpr.innerStride(); + } + + typename XprType::Nested m_xpr; + Index m_outerStride; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_BLOCK_H diff --git a/tools/Eigen/src/Core/BooleanRedux.h b/tools/Eigen/src/Core/BooleanRedux.h new file mode 100644 index 0000000..be9f48a --- /dev/null +++ b/tools/Eigen/src/Core/BooleanRedux.h @@ -0,0 +1,154 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ALLANDANY_H +#define EIGEN_ALLANDANY_H + +namespace Eigen { + +namespace internal { + +template +struct all_unroller +{ + enum { + col = (UnrollCount-1) / Derived::RowsAtCompileTime, + row = (UnrollCount-1) % Derived::RowsAtCompileTime + }; + + static inline bool run(const Derived &mat) + { + return all_unroller::run(mat) && mat.coeff(row, col); + } +}; + +template +struct all_unroller +{ + static inline bool run(const Derived &/*mat*/) { return true; } +}; + +template +struct all_unroller +{ + static inline bool run(const Derived &) { return false; } +}; + +template +struct any_unroller +{ + enum { + col = (UnrollCount-1) / Derived::RowsAtCompileTime, + row = (UnrollCount-1) % Derived::RowsAtCompileTime + }; + + static inline bool run(const Derived &mat) + { + return any_unroller::run(mat) || mat.coeff(row, col); + } +}; + +template +struct any_unroller +{ + static inline bool run(const Derived & /*mat*/) { return false; } +}; + +template +struct any_unroller +{ + static inline bool run(const Derived &) { return false; } +}; + +} // end namespace internal + +/** \returns true if all coefficients are true + * + * Example: \include MatrixBase_all.cpp + * Output: \verbinclude MatrixBase_all.out + * + * \sa any(), Cwise::operator<() + */ +template +inline bool DenseBase::all() const +{ + enum { + unroll = SizeAtCompileTime != Dynamic + && CoeffReadCost != Dynamic + && NumTraits::AddCost != Dynamic + && SizeAtCompileTime * (CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT + }; + if(unroll) + return internal::all_unroller::run(derived()); + else + { + for(Index j = 0; j < cols(); ++j) + for(Index i = 0; i < rows(); ++i) + if (!coeff(i, j)) return false; + return true; + } +} + +/** \returns true if at least one coefficient is true + * + * \sa all() + */ +template +inline bool DenseBase::any() const +{ + enum { + unroll = SizeAtCompileTime != Dynamic + && CoeffReadCost != Dynamic + && NumTraits::AddCost != Dynamic + && SizeAtCompileTime * (CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT + }; + if(unroll) + return internal::any_unroller::run(derived()); + else + { + for(Index j = 0; j < cols(); ++j) + for(Index i = 0; i < rows(); ++i) + if (coeff(i, j)) return true; + return false; + } +} + +/** \returns the number of coefficients which evaluate to true + * + * \sa all(), any() + */ +template +inline typename DenseBase::Index DenseBase::count() const +{ + return derived().template cast().template cast().sum(); +} + +/** \returns true is \c *this contains at least one Not A Number (NaN). + * + * \sa allFinite() + */ +template +inline bool DenseBase::hasNaN() const +{ + return !((derived().array()==derived().array()).all()); +} + +/** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values. + * + * \sa hasNaN() + */ +template +inline bool DenseBase::allFinite() const +{ + return !((derived()-derived()).hasNaN()); +} + +} // end namespace Eigen + +#endif // EIGEN_ALLANDANY_H diff --git a/tools/Eigen/src/Core/arch/SSE/MathFunctions.h b/tools/Eigen/src/Core/arch/SSE/MathFunctions.h new file mode 100644 index 0000000..2b07168 --- /dev/null +++ b/tools/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -0,0 +1,475 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007 Julien Pommier +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* The sin, cos, exp, and log functions of this file come from + * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/ + */ + +#ifndef EIGEN_MATH_FUNCTIONS_SSE_H +#define EIGEN_MATH_FUNCTIONS_SSE_H + +namespace Eigen { + +namespace internal { + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4f plog(const Packet4f& _x) +{ + Packet4f x = _x; + _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); + _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f); + _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f); + + _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000); + + /* the smallest non denormalized float number */ + _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000); + _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000);//-1.f/0.f); + + /* natural logarithm computed for 4 simultaneous float + return NaN for x <= 0 + */ + _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f); + + + Packet4i emm0; + + Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps()); // not greater equal is true if x is NaN + Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps()); + + x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */ + emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23); + + /* keep only the fractional part */ + x = _mm_and_ps(x, p4f_inv_mant_mask); + x = _mm_or_ps(x, p4f_half); + + emm0 = _mm_sub_epi32(emm0, p4i_0x7f); + Packet4f e = padd(_mm_cvtepi32_ps(emm0), p4f_1); + + /* part2: + if( x < SQRTHF ) { + e -= 1; + x = x + x - 1.0; + } else { x = x - 1.0; } + */ + Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF); + Packet4f tmp = _mm_and_ps(x, mask); + x = psub(x, p4f_1); + e = psub(e, _mm_and_ps(p4f_1, mask)); + x = padd(x, tmp); + + Packet4f x2 = pmul(x,x); + Packet4f x3 = pmul(x2,x); + + Packet4f y, y1, y2; + y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1); + y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4); + y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7); + y = pmadd(y , x, p4f_cephes_log_p2); + y1 = pmadd(y1, x, p4f_cephes_log_p5); + y2 = pmadd(y2, x, p4f_cephes_log_p8); + y = pmadd(y, x3, y1); + y = pmadd(y, x3, y2); + y = pmul(y, x3); + + y1 = pmul(e, p4f_cephes_log_q1); + tmp = pmul(x2, p4f_half); + y = padd(y, y1); + x = psub(x, tmp); + y2 = pmul(e, p4f_cephes_log_q2); + x = padd(x, y); + x = padd(x, y2); + // negative arg will be NAN, 0 will be -INF + return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)), + _mm_and_ps(iszero_mask, p4f_minus_inf)); +} + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4f pexp(const Packet4f& _x) +{ + Packet4f x = _x; + _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); + _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f); + _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f); + + + _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f); + _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f); + + _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f); + + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f); + + Packet4f tmp, fx; + Packet4i emm0; + + // clamp x + x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo); + + /* express exp(x) as exp(g + n*log(2)) */ + fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half); + +#ifdef EIGEN_VECTORIZE_SSE4_1 + fx = _mm_floor_ps(fx); +#else + emm0 = _mm_cvttps_epi32(fx); + tmp = _mm_cvtepi32_ps(emm0); + /* if greater, substract 1 */ + Packet4f mask = _mm_cmpgt_ps(tmp, fx); + mask = _mm_and_ps(mask, p4f_1); + fx = psub(tmp, mask); +#endif + + tmp = pmul(fx, p4f_cephes_exp_C1); + Packet4f z = pmul(fx, p4f_cephes_exp_C2); + x = psub(x, tmp); + x = psub(x, z); + + z = pmul(x,x); + + Packet4f y = p4f_cephes_exp_p0; + y = pmadd(y, x, p4f_cephes_exp_p1); + y = pmadd(y, x, p4f_cephes_exp_p2); + y = pmadd(y, x, p4f_cephes_exp_p3); + y = pmadd(y, x, p4f_cephes_exp_p4); + y = pmadd(y, x, p4f_cephes_exp_p5); + y = pmadd(y, z, x); + y = padd(y, p4f_1); + + // build 2^n + emm0 = _mm_cvttps_epi32(fx); + emm0 = _mm_add_epi32(emm0, p4i_0x7f); + emm0 = _mm_slli_epi32(emm0, 23); + return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x); +} +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet2d pexp(const Packet2d& _x) +{ + Packet2d x = _x; + + _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0); + _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0); + _EIGEN_DECLARE_CONST_Packet2d(half, 0.5); + + _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437); + _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303); + + _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599); + + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4); + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2); + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1); + + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6); + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3); + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1); + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0); + + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125); + _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6); + static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0); + + Packet2d tmp, fx; + Packet4i emm0; + + // clamp x + x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo); + /* express exp(x) as exp(g + n*log(2)) */ + fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half); + +#ifdef EIGEN_VECTORIZE_SSE4_1 + fx = _mm_floor_pd(fx); +#else + emm0 = _mm_cvttpd_epi32(fx); + tmp = _mm_cvtepi32_pd(emm0); + /* if greater, substract 1 */ + Packet2d mask = _mm_cmpgt_pd(tmp, fx); + mask = _mm_and_pd(mask, p2d_1); + fx = psub(tmp, mask); +#endif + + tmp = pmul(fx, p2d_cephes_exp_C1); + Packet2d z = pmul(fx, p2d_cephes_exp_C2); + x = psub(x, tmp); + x = psub(x, z); + + Packet2d x2 = pmul(x,x); + + Packet2d px = p2d_cephes_exp_p0; + px = pmadd(px, x2, p2d_cephes_exp_p1); + px = pmadd(px, x2, p2d_cephes_exp_p2); + px = pmul (px, x); + + Packet2d qx = p2d_cephes_exp_q0; + qx = pmadd(qx, x2, p2d_cephes_exp_q1); + qx = pmadd(qx, x2, p2d_cephes_exp_q2); + qx = pmadd(qx, x2, p2d_cephes_exp_q3); + + x = pdiv(px,psub(qx,px)); + x = pmadd(p2d_2,x,p2d_1); + + // build 2^n + emm0 = _mm_cvttpd_epi32(fx); + emm0 = _mm_add_epi32(emm0, p4i_1023_0); + emm0 = _mm_slli_epi32(emm0, 20); + emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3)); + return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x); +} + +/* evaluation of 4 sines at onces, using SSE2 intrinsics. + + The code is the exact rewriting of the cephes sinf function. + Precision is excellent as long as x < 8192 (I did not bother to + take into account the special handling they have for greater values + -- it does not return garbage for arguments over 8192, though, but + the extra precision is missing). + + Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the + surprising but correct result. +*/ + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4f psin(const Packet4f& _x) +{ + Packet4f x = _x; + _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); + _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f); + + _EIGEN_DECLARE_CONST_Packet4i(1, 1); + _EIGEN_DECLARE_CONST_Packet4i(not1, ~1); + _EIGEN_DECLARE_CONST_Packet4i(2, 2); + _EIGEN_DECLARE_CONST_Packet4i(4, 4); + + _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000); + + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f); + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f); + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI + + Packet4f xmm1, xmm2, xmm3, sign_bit, y; + + Packet4i emm0, emm2; + sign_bit = x; + /* take the absolute value */ + x = pabs(x); + + /* take the modulo */ + + /* extract the sign bit (upper one) */ + sign_bit = _mm_and_ps(sign_bit, p4f_sign_mask); + + /* scale by 4/Pi */ + y = pmul(x, p4f_cephes_FOPI); + + /* store the integer part of y in mm0 */ + emm2 = _mm_cvttps_epi32(y); + /* j=(j+1) & (~1) (see the cephes sources) */ + emm2 = _mm_add_epi32(emm2, p4i_1); + emm2 = _mm_and_si128(emm2, p4i_not1); + y = _mm_cvtepi32_ps(emm2); + /* get the swap sign flag */ + emm0 = _mm_and_si128(emm2, p4i_4); + emm0 = _mm_slli_epi32(emm0, 29); + /* get the polynom selection mask + there is one polynom for 0 <= x <= Pi/4 + and another one for Pi/4 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4f pcos(const Packet4f& _x) +{ + Packet4f x = _x; + _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); + _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f); + + _EIGEN_DECLARE_CONST_Packet4i(1, 1); + _EIGEN_DECLARE_CONST_Packet4i(not1, ~1); + _EIGEN_DECLARE_CONST_Packet4i(2, 2); + _EIGEN_DECLARE_CONST_Packet4i(4, 4); + + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f); + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f); + _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f); + _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f); + _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f); + _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI + + Packet4f xmm1, xmm2, xmm3, y; + Packet4i emm0, emm2; + + x = pabs(x); + + /* scale by 4/Pi */ + y = pmul(x, p4f_cephes_FOPI); + + /* get the integer part of y */ + emm2 = _mm_cvttps_epi32(y); + /* j=(j+1) & (~1) (see the cephes sources) */ + emm2 = _mm_add_epi32(emm2, p4i_1); + emm2 = _mm_and_si128(emm2, p4i_not1); + y = _mm_cvtepi32_ps(emm2); + + emm2 = _mm_sub_epi32(emm2, p4i_2); + + /* get the swap sign flag */ + emm0 = _mm_andnot_si128(emm2, p4i_4); + emm0 = _mm_slli_epi32(emm0, 29); + /* get the polynom selection mask */ + emm2 = _mm_and_si128(emm2, p4i_2); + emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128()); + + Packet4f sign_bit = _mm_castsi128_ps(emm0); + Packet4f poly_mask = _mm_castsi128_ps(emm2); + + /* The magic pass: "Extended precision modular arithmetic" + x = ((x - y * DP1) - y * DP2) - y * DP3; */ + xmm1 = pmul(y, p4f_minus_cephes_DP1); + xmm2 = pmul(y, p4f_minus_cephes_DP2); + xmm3 = pmul(y, p4f_minus_cephes_DP3); + x = padd(x, xmm1); + x = padd(x, xmm2); + x = padd(x, xmm3); + + /* Evaluate the first polynom (0 <= x <= Pi/4) */ + y = p4f_coscof_p0; + Packet4f z = pmul(x,x); + + y = pmadd(y,z,p4f_coscof_p1); + y = pmadd(y,z,p4f_coscof_p2); + y = pmul(y, z); + y = pmul(y, z); + Packet4f tmp = _mm_mul_ps(z, p4f_half); + y = psub(y, tmp); + y = padd(y, p4f_1); + + /* Evaluate the second polynom (Pi/4 <= x <= 0) */ + Packet4f y2 = p4f_sincof_p0; + y2 = pmadd(y2, z, p4f_sincof_p1); + y2 = pmadd(y2, z, p4f_sincof_p2); + y2 = pmul(y2, z); + y2 = pmadd(y2, x, x); + + /* select the correct result from the two polynoms */ + y2 = _mm_and_ps(poly_mask, y2); + y = _mm_andnot_ps(poly_mask, y); + y = _mm_or_ps(y,y2); + + /* update the sign */ + return _mm_xor_ps(y, sign_bit); +} + +#if EIGEN_FAST_MATH + +// This is based on Quake3's fast inverse square root. +// For detail see here: http://www.beyond3d.com/content/articles/8/ +// It lacks 1 (or 2 bits in some rare cases) of precision, and does not handle negative, +inf, or denormalized numbers correctly. +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +Packet4f psqrt(const Packet4f& _x) +{ + Packet4f half = pmul(_x, pset1(.5f)); + + /* select only the inverse sqrt of non-zero inputs */ + Packet4f non_zero_mask = _mm_cmpge_ps(_x, pset1((std::numeric_limits::min)())); + Packet4f x = _mm_and_ps(non_zero_mask, _mm_rsqrt_ps(_x)); + + x = pmul(x, psub(pset1(1.5f), pmul(half, pmul(x,x)))); + return pmul(_x,x); +} + +#else + +template<> EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f& x) { return _mm_sqrt_ps(x); } + +#endif + +template<> EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& x) { return _mm_sqrt_pd(x); } + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MATH_FUNCTIONS_SSE_H diff --git a/tools/Eigen/src/Core/arch/SSE/PacketMath.h b/tools/Eigen/src/Core/arch/SSE/PacketMath.h new file mode 100644 index 0000000..bef898b --- /dev/null +++ b/tools/Eigen/src/Core/arch/SSE/PacketMath.h @@ -0,0 +1,613 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PACKET_MATH_SSE_H +#define EIGEN_PACKET_MATH_SSE_H + +namespace Eigen { + +namespace internal { + +#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD +#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 +#endif + +#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS +#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*)) +#endif + +typedef __m128 Packet4f; +typedef __m128i Packet4i; +typedef __m128d Packet2d; + +template<> struct is_arithmetic<__m128> { enum { value = true }; }; +template<> struct is_arithmetic<__m128i> { enum { value = true }; }; +template<> struct is_arithmetic<__m128d> { enum { value = true }; }; + +#define vec4f_swizzle1(v,p,q,r,s) \ + (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p))))) + +#define vec4i_swizzle1(v,p,q,r,s) \ + (_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p)))) + +#define vec2d_swizzle1(v,p,q) \ + (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2))))) + +#define vec4f_swizzle2(a,b,p,q,r,s) \ + (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p)))) + +#define vec4i_swizzle2(a,b,p,q,r,s) \ + (_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p)))))) + +#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ + const Packet4f p4f_##NAME = pset1(X) + +#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \ + const Packet2d p2d_##NAME = pset1(X) + +#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ + const Packet4f p4f_##NAME = _mm_castsi128_ps(pset1(X)) + +#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ + const Packet4i p4i_##NAME = pset1(X) + + +template<> struct packet_traits : default_packet_traits +{ + typedef Packet4f type; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size=4, + + HasDiv = 1, + HasSin = EIGEN_FAST_MATH, + HasCos = EIGEN_FAST_MATH, + HasLog = 1, + HasExp = 1, + HasSqrt = 1 + }; +}; +template<> struct packet_traits : default_packet_traits +{ + typedef Packet2d type; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size=2, + + HasDiv = 1, + HasExp = 1, + HasSqrt = 1 + }; +}; +template<> struct packet_traits : default_packet_traits +{ + typedef Packet4i type; + enum { + // FIXME check the Has* + Vectorizable = 1, + AlignedOnScalar = 1, + size=4 + }; +}; + +template<> struct unpacket_traits { typedef float type; enum {size=4}; }; +template<> struct unpacket_traits { typedef double type; enum {size=2}; }; +template<> struct unpacket_traits { typedef int type; enum {size=4}; }; + +#if defined(_MSC_VER) && (_MSC_VER==1500) +// Workaround MSVC 9 internal compiler error. +// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode +// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)). +template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { return _mm_set_ps(from,from,from,from); } +template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { return _mm_set_pd(from,from); } +template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { return _mm_set_epi32(from,from,from,from); } +#else +template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { return _mm_set1_ps(from); } +template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { return _mm_set1_pd(from); } +template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { return _mm_set1_epi32(from); } +#endif + +template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { return _mm_add_ps(pset1(a), _mm_set_ps(3,2,1,0)); } +template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { return _mm_add_pd(pset1(a),_mm_set_pd(1,0)); } +template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { return _mm_add_epi32(pset1(a),_mm_set_epi32(3,2,1,0)); } + +template<> EIGEN_STRONG_INLINE Packet4f padd(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d padd(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i padd(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f psub(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d psub(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i psub(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) +{ + const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000)); + return _mm_xor_ps(a,mask); +} +template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) +{ + const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000)); + return _mm_xor_pd(a,mask); +} +template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) +{ + return psub(_mm_setr_epi32(0,0,0,0), a); +} + +template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; } +template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; } +template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; } + +template<> EIGEN_STRONG_INLINE Packet4f pmul(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pmul(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pmul(const Packet4i& a, const Packet4i& b) +{ +#ifdef EIGEN_VECTORIZE_SSE4_1 + return _mm_mullo_epi32(a,b); +#else + // this version is slightly faster than 4 scalar products + return vec4i_swizzle1( + vec4i_swizzle2( + _mm_mul_epu32(a,b), + _mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2), + vec4i_swizzle1(b,1,0,3,2)), + 0,2,0,2), + 0,2,1,3); +#endif +} + +template<> EIGEN_STRONG_INLINE Packet4f pdiv(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pdiv(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& /*a*/, const Packet4i& /*b*/) +{ eigen_assert(false && "packet integer division are not supported by SSE"); + return pset1(0); +} + +// for some weird raisons, it has to be overloaded for packet of integers +template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); } + +template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pmin(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pmin(const Packet4i& a, const Packet4i& b) +{ +#ifdef EIGEN_VECTORIZE_SSE4_1 + return _mm_min_epi32(a,b); +#else + // after some bench, this version *is* faster than a scalar implementation + Packet4i mask = _mm_cmplt_epi32(a,b); + return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b)); +#endif +} + +template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pmax(const Packet4i& a, const Packet4i& b) +{ +#ifdef EIGEN_VECTORIZE_SSE4_1 + return _mm_max_epi32(a,b); +#else + // after some bench, this version *is* faster than a scalar implementation + Packet4i mask = _mm_cmpgt_epi32(a,b); + return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b)); +#endif +} + +template<> EIGEN_STRONG_INLINE Packet4f pand(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pand(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pand(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f por(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d por(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i por(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f pxor(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pxor(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pxor(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pandnot(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); } +template<> EIGEN_STRONG_INLINE Packet2d pload(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); } +template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast(from)); } + +#if defined(_MSC_VER) + template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) { + EIGEN_DEBUG_UNALIGNED_LOAD + #if (_MSC_VER==1600) + // NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps + // (i.e., it does not generate an unaligned load!! + // TODO On most architectures this version should also be faster than a single _mm_loadu_ps + // so we could also enable it for MSVC08 but first we have to make this later does not generate crap when doing so... + __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from)); + res = _mm_loadh_pi(res, (const __m64*)(from+2)); + return res; + #else + return _mm_loadu_ps(from); + #endif + } +#else +// NOTE: with the code below, MSVC's compiler crashes! + +template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) +{ + EIGEN_DEBUG_UNALIGNED_LOAD + return _mm_loadu_ps(from); +} +#endif + +template<> EIGEN_STRONG_INLINE Packet2d ploadu(const double* from) +{ + EIGEN_DEBUG_UNALIGNED_LOAD + return _mm_loadu_pd(from); +} +template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) +{ + EIGEN_DEBUG_UNALIGNED_LOAD + return _mm_loadu_si128(reinterpret_cast(from)); +} + + +template<> EIGEN_STRONG_INLINE Packet4f ploaddup(const float* from) +{ + return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast(from))), 0, 0, 1, 1); +} +template<> EIGEN_STRONG_INLINE Packet2d ploaddup(const double* from) +{ return pset1(from[0]); } +template<> EIGEN_STRONG_INLINE Packet4i ploaddup(const int* from) +{ + Packet4i tmp; + tmp = _mm_loadl_epi64(reinterpret_cast(from)); + return vec4i_swizzle1(tmp, 0, 0, 1, 1); +} + +template<> EIGEN_STRONG_INLINE void pstore(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); } +template<> EIGEN_STRONG_INLINE void pstore(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); } +template<> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast(to), from); } + +template<> EIGEN_STRONG_INLINE void pstoreu(double* to, const Packet2d& from) { + EIGEN_DEBUG_UNALIGNED_STORE + _mm_storel_pd((to), from); + _mm_storeh_pd((to+1), from); +} +template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast(to), _mm_castps_pd(from)); } +template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast(to), _mm_castsi128_pd(from)); } + +// some compilers might be tempted to perform multiple moves instead of using a vector path. +template<> EIGEN_STRONG_INLINE void pstore1(float* to, const float& a) +{ + Packet4f pa = _mm_set_ss(a); + pstore(to, vec4f_swizzle1(pa,0,0,0,0)); +} +// some compilers might be tempted to perform multiple moves instead of using a vector path. +template<> EIGEN_STRONG_INLINE void pstore1(double* to, const double& a) +{ + Packet2d pa = _mm_set_sd(a); + pstore(to, vec2d_swizzle1(pa,0,0)); +} + +template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } + +#if defined(_MSC_VER) && defined(_WIN64) && !defined(__INTEL_COMPILER) +// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010 +// Direct of the struct members fixed bug #62. +template<> EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { return a.m128_f32[0]; } +template<> EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { return a.m128d_f64[0]; } +template<> EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; } +#elif defined(_MSC_VER) && !defined(__INTEL_COMPILER) +// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010 +template<> EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; } +template<> EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; } +template<> EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; } +#else +template<> EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { return _mm_cvtss_f32(a); } +template<> EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { return _mm_cvtsd_f64(a); } +template<> EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { return _mm_cvtsi128_si32(a); } +#endif + +template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) +{ return _mm_shuffle_ps(a,a,0x1B); } +template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) +{ return _mm_shuffle_pd(a,a,0x1); } +template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) +{ return _mm_shuffle_epi32(a,0x1B); } + + +template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) +{ + const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF)); + return _mm_and_ps(a,mask); +} +template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) +{ + const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF)); + return _mm_and_pd(a,mask); +} +template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) +{ + #ifdef EIGEN_VECTORIZE_SSSE3 + return _mm_abs_epi32(a); + #else + Packet4i aux = _mm_srai_epi32(a,31); + return _mm_sub_epi32(_mm_xor_si128(a,aux),aux); + #endif +} + +EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs) +{ + vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55)); + vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA)); + vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF)); + vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00)); +} + +#ifdef EIGEN_VECTORIZE_SSE3 +// TODO implement SSE2 versions as well as integer versions +template<> EIGEN_STRONG_INLINE Packet4f preduxp(const Packet4f* vecs) +{ + return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3])); +} +template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) +{ + return _mm_hadd_pd(vecs[0], vecs[1]); +} +// SSSE3 version: +// EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) +// { +// return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3])); +// } + +template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) +{ + Packet4f tmp0 = _mm_hadd_ps(a,a); + return pfirst(_mm_hadd_ps(tmp0, tmp0)); +} + +template<> EIGEN_STRONG_INLINE double predux(const Packet2d& a) { return pfirst(_mm_hadd_pd(a, a)); } + +// SSSE3 version: +// EIGEN_STRONG_INLINE float predux(const Packet4i& a) +// { +// Packet4i tmp0 = _mm_hadd_epi32(a,a); +// return pfirst(_mm_hadd_epi32(tmp0, tmp0)); +// } +#else +// SSE2 versions +template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) +{ + Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a)); + return pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); +} +template<> EIGEN_STRONG_INLINE double predux(const Packet2d& a) +{ + return pfirst(_mm_add_sd(a, _mm_unpackhi_pd(a,a))); +} + +template<> EIGEN_STRONG_INLINE Packet4f preduxp(const Packet4f* vecs) +{ + Packet4f tmp0, tmp1, tmp2; + tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]); + tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]); + tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]); + tmp0 = _mm_add_ps(tmp0, tmp1); + tmp1 = _mm_unpacklo_ps(vecs[2], vecs[3]); + tmp1 = _mm_add_ps(tmp1, tmp2); + tmp2 = _mm_movehl_ps(tmp1, tmp0); + tmp0 = _mm_movelh_ps(tmp0, tmp1); + return _mm_add_ps(tmp0, tmp2); +} + +template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) +{ + return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1])); +} +#endif // SSE3 + +template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) +{ + Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a)); + return pfirst(tmp) + pfirst(_mm_shuffle_epi32(tmp, 1)); +} + +template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) +{ + Packet4i tmp0, tmp1, tmp2; + tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]); + tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]); + tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]); + tmp0 = _mm_add_epi32(tmp0, tmp1); + tmp1 = _mm_unpacklo_epi32(vecs[2], vecs[3]); + tmp1 = _mm_add_epi32(tmp1, tmp2); + tmp2 = _mm_unpacklo_epi64(tmp0, tmp1); + tmp0 = _mm_unpackhi_epi64(tmp0, tmp1); + return _mm_add_epi32(tmp0, tmp2); +} + +// Other reduction functions: + +// mul +template<> EIGEN_STRONG_INLINE float predux_mul(const Packet4f& a) +{ + Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a)); + return pfirst(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); +} +template<> EIGEN_STRONG_INLINE double predux_mul(const Packet2d& a) +{ + return pfirst(_mm_mul_sd(a, _mm_unpackhi_pd(a,a))); +} +template<> EIGEN_STRONG_INLINE int predux_mul(const Packet4i& a) +{ + // after some experiments, it is seems this is the fastest way to implement it + // for GCC (eg., reusing pmul is very slow !) + // TODO try to call _mm_mul_epu32 directly + EIGEN_ALIGN16 int aux[4]; + pstore(aux, a); + return (aux[0] * aux[1]) * (aux[2] * aux[3]);; +} + +// min +template<> EIGEN_STRONG_INLINE float predux_min(const Packet4f& a) +{ + Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a)); + return pfirst(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); +} +template<> EIGEN_STRONG_INLINE double predux_min(const Packet2d& a) +{ + return pfirst(_mm_min_sd(a, _mm_unpackhi_pd(a,a))); +} +template<> EIGEN_STRONG_INLINE int predux_min(const Packet4i& a) +{ + // after some experiments, it is seems this is the fastest way to implement it + // for GCC (eg., it does not like using std::min after the pstore !!) + EIGEN_ALIGN16 int aux[4]; + pstore(aux, a); + int aux0 = aux[0] EIGEN_STRONG_INLINE float predux_max(const Packet4f& a) +{ + Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a)); + return pfirst(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); +} +template<> EIGEN_STRONG_INLINE double predux_max(const Packet2d& a) +{ + return pfirst(_mm_max_sd(a, _mm_unpackhi_pd(a,a))); +} +template<> EIGEN_STRONG_INLINE int predux_max(const Packet4i& a) +{ + // after some experiments, it is seems this is the fastest way to implement it + // for GCC (eg., it does not like using std::min after the pstore !!) + EIGEN_ALIGN16 int aux[4]; + pstore(aux, a); + int aux0 = aux[0]>aux[1] ? aux[0] : aux[1]; + int aux2 = aux[2]>aux[3] ? aux[2] : aux[3]; + return aux0>aux2 ? aux0 : aux2; +} + +#if (defined __GNUC__) +// template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) +// { +// Packet4f res = b; +// asm("mulps %[a], %[b] \n\taddps %[c], %[b]" : [b] "+x" (res) : [a] "x" (a), [c] "x" (c)); +// return res; +// } +// EIGEN_STRONG_INLINE Packet4i _mm_alignr_epi8(const Packet4i& a, const Packet4i& b, const int i) +// { +// Packet4i res = a; +// asm("palignr %[i], %[a], %[b] " : [b] "+x" (res) : [a] "x" (a), [i] "i" (i)); +// return res; +// } +#endif + +#ifdef EIGEN_VECTORIZE_SSSE3 +// SSSE3 versions +template +struct palign_impl +{ + static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second) + { + if (Offset!=0) + first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4)); + } +}; + +template +struct palign_impl +{ + static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second) + { + if (Offset!=0) + first = _mm_alignr_epi8(second,first, Offset*4); + } +}; + +template +struct palign_impl +{ + static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second) + { + if (Offset==1) + first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8)); + } +}; +#else +// SSE2 versions +template +struct palign_impl +{ + static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second) + { + if (Offset==1) + { + first = _mm_move_ss(first,second); + first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39)); + } + else if (Offset==2) + { + first = _mm_movehl_ps(first,first); + first = _mm_movelh_ps(first,second); + } + else if (Offset==3) + { + first = _mm_move_ss(first,second); + first = _mm_shuffle_ps(first,second,0x93); + } + } +}; + +template +struct palign_impl +{ + static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second) + { + if (Offset==1) + { + first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second))); + first = _mm_shuffle_epi32(first,0x39); + } + else if (Offset==2) + { + first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first))); + first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second))); + } + else if (Offset==3) + { + first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second))); + first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93)); + } + } +}; + +template +struct palign_impl +{ + static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second) + { + if (Offset==1) + { + first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first))); + first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second))); + } + } +}; +#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PACKET_MATH_SSE_H From 4545ac4476996500f8a17c27240f94e30951ac1e Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 14:09:31 +0200 Subject: [PATCH 13/25] add more eigen files --- tools/Eigen/src/Core/CMakeLists.txt | 10 + tools/Eigen/src/Core/CommaInitializer.h | 154 +++++ tools/Eigen/src/Core/CoreIterators.h | 61 ++ tools/Eigen/src/Core/CwiseBinaryOp.h | 230 +++++++ tools/Eigen/src/Core/CwiseNullaryOp.h | 864 ++++++++++++++++++++++++ tools/Eigen/src/Core/CwiseUnaryView.h | 139 ++++ tools/Eigen/src/Core/DenseBase.h | 521 ++++++++++++++ tools/Eigen/src/Core/DenseCoeffsBase.h | 754 +++++++++++++++++++++ tools/Eigen/src/Core/DenseStorage.h | 434 ++++++++++++ 9 files changed, 3167 insertions(+) create mode 100644 tools/Eigen/src/Core/CMakeLists.txt create mode 100644 tools/Eigen/src/Core/CommaInitializer.h create mode 100644 tools/Eigen/src/Core/CoreIterators.h create mode 100644 tools/Eigen/src/Core/CwiseBinaryOp.h create mode 100644 tools/Eigen/src/Core/CwiseNullaryOp.h create mode 100644 tools/Eigen/src/Core/CwiseUnaryView.h create mode 100644 tools/Eigen/src/Core/DenseBase.h create mode 100644 tools/Eigen/src/Core/DenseCoeffsBase.h create mode 100644 tools/Eigen/src/Core/DenseStorage.h diff --git a/tools/Eigen/src/Core/CMakeLists.txt b/tools/Eigen/src/Core/CMakeLists.txt new file mode 100644 index 0000000..2346fc2 --- /dev/null +++ b/tools/Eigen/src/Core/CMakeLists.txt @@ -0,0 +1,10 @@ +FILE(GLOB Eigen_Core_SRCS "*.h") + +INSTALL(FILES + ${Eigen_Core_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core COMPONENT Devel + ) + +ADD_SUBDIRECTORY(products) +ADD_SUBDIRECTORY(util) +ADD_SUBDIRECTORY(arch) diff --git a/tools/Eigen/src/Core/CommaInitializer.h b/tools/Eigen/src/Core/CommaInitializer.h new file mode 100644 index 0000000..a036d8c --- /dev/null +++ b/tools/Eigen/src/Core/CommaInitializer.h @@ -0,0 +1,154 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COMMAINITIALIZER_H +#define EIGEN_COMMAINITIALIZER_H + +namespace Eigen { + +/** \class CommaInitializer + * \ingroup Core_Module + * + * \brief Helper class used by the comma initializer operator + * + * This class is internally used to implement the comma initializer feature. It is + * the return type of MatrixBase::operator<<, and most of the time this is the only + * way it is used. + * + * \sa \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished() + */ +template +struct CommaInitializer +{ + typedef typename XprType::Scalar Scalar; + typedef typename XprType::Index Index; + + inline CommaInitializer(XprType& xpr, const Scalar& s) + : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1) + { + m_xpr.coeffRef(0,0) = s; + } + + template + inline CommaInitializer(XprType& xpr, const DenseBase& other) + : m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows()) + { + m_xpr.block(0, 0, other.rows(), other.cols()) = other; + } + + /* Copy/Move constructor which transfers ownership. This is crucial in + * absence of return value optimization to avoid assertions during destruction. */ + // FIXME in C++11 mode this could be replaced by a proper RValue constructor + inline CommaInitializer(const CommaInitializer& o) + : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) { + // Mark original object as finished. In absence of R-value references we need to const_cast: + const_cast(o).m_row = m_xpr.rows(); + const_cast(o).m_col = m_xpr.cols(); + const_cast(o).m_currentBlockRows = 0; + } + + /* inserts a scalar value in the target matrix */ + CommaInitializer& operator,(const Scalar& s) + { + if (m_col==m_xpr.cols()) + { + m_row+=m_currentBlockRows; + m_col = 0; + m_currentBlockRows = 1; + eigen_assert(m_row + CommaInitializer& operator,(const DenseBase& other) + { + if(other.cols()==0 || other.rows()==0) + return *this; + if (m_col==m_xpr.cols()) + { + m_row+=m_currentBlockRows; + m_col = 0; + m_currentBlockRows = other.rows(); + eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows() + && "Too many rows passed to comma initializer (operator<<)"); + } + eigen_assert(m_col + (m_row, m_col) = other; + else + m_xpr.block(m_row, m_col, other.rows(), other.cols()) = other; + m_col += other.cols(); + return *this; + } + + inline ~CommaInitializer() + { + eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows() + && m_col == m_xpr.cols() + && "Too few coefficients passed to comma initializer (operator<<)"); + } + + /** \returns the built matrix once all its coefficients have been set. + * Calling finished is 100% optional. Its purpose is to write expressions + * like this: + * \code + * quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished()); + * \endcode + */ + inline XprType& finished() { return m_xpr; } + + XprType& m_xpr; // target expression + Index m_row; // current row id + Index m_col; // current col id + Index m_currentBlockRows; // current block height +}; + +/** \anchor MatrixBaseCommaInitRef + * Convenient operator to set the coefficients of a matrix. + * + * The coefficients must be provided in a row major order and exactly match + * the size of the matrix. Otherwise an assertion is raised. + * + * Example: \include MatrixBase_set.cpp + * Output: \verbinclude MatrixBase_set.out + * + * \note According the c++ standard, the argument expressions of this comma initializer are evaluated in arbitrary order. + * + * \sa CommaInitializer::finished(), class CommaInitializer + */ +template +inline CommaInitializer DenseBase::operator<< (const Scalar& s) +{ + return CommaInitializer(*static_cast(this), s); +} + +/** \sa operator<<(const Scalar&) */ +template +template +inline CommaInitializer +DenseBase::operator<<(const DenseBase& other) +{ + return CommaInitializer(*static_cast(this), other); +} + +} // end namespace Eigen + +#endif // EIGEN_COMMAINITIALIZER_H diff --git a/tools/Eigen/src/Core/CoreIterators.h b/tools/Eigen/src/Core/CoreIterators.h new file mode 100644 index 0000000..6da4683 --- /dev/null +++ b/tools/Eigen/src/Core/CoreIterators.h @@ -0,0 +1,61 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COREITERATORS_H +#define EIGEN_COREITERATORS_H + +namespace Eigen { + +/* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core + */ + +/** \ingroup SparseCore_Module + * \class InnerIterator + * \brief An InnerIterator allows to loop over the element of a sparse (or dense) matrix or expression + * + * todo + */ + +// generic version for dense matrix and expressions +template class DenseBase::InnerIterator +{ + protected: + typedef typename Derived::Scalar Scalar; + typedef typename Derived::Index Index; + + enum { IsRowMajor = (Derived::Flags&RowMajorBit)==RowMajorBit }; + public: + EIGEN_STRONG_INLINE InnerIterator(const Derived& expr, Index outer) + : m_expression(expr), m_inner(0), m_outer(outer), m_end(expr.innerSize()) + {} + + EIGEN_STRONG_INLINE Scalar value() const + { + return (IsRowMajor) ? m_expression.coeff(m_outer, m_inner) + : m_expression.coeff(m_inner, m_outer); + } + + EIGEN_STRONG_INLINE InnerIterator& operator++() { m_inner++; return *this; } + + EIGEN_STRONG_INLINE Index index() const { return m_inner; } + inline Index row() const { return IsRowMajor ? m_outer : index(); } + inline Index col() const { return IsRowMajor ? index() : m_outer; } + + EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; } + + protected: + const Derived& m_expression; + Index m_inner; + const Index m_outer; + const Index m_end; +}; + +} // end namespace Eigen + +#endif // EIGEN_COREITERATORS_H diff --git a/tools/Eigen/src/Core/CwiseBinaryOp.h b/tools/Eigen/src/Core/CwiseBinaryOp.h new file mode 100644 index 0000000..519a866 --- /dev/null +++ b/tools/Eigen/src/Core/CwiseBinaryOp.h @@ -0,0 +1,230 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_BINARY_OP_H +#define EIGEN_CWISE_BINARY_OP_H + +namespace Eigen { + +/** \class CwiseBinaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise binary operator is applied to two expressions + * + * \param BinaryOp template functor implementing the operator + * \param Lhs the type of the left-hand side + * \param Rhs the type of the right-hand side + * + * This class represents an expression where a coefficient-wise binary operator is applied to two expressions. + * It is the return type of binary operators, by which we mean only those binary operators where + * both the left-hand side and the right-hand side are Eigen expressions. + * For example, the return type of matrix1+matrix2 is a CwiseBinaryOp. + * + * Most of the time, this is the only way that it is used, so you typically don't have to name + * CwiseBinaryOp types explicitly. + * + * \sa MatrixBase::binaryExpr(const MatrixBase &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp + */ + +namespace internal { +template +struct traits > +{ + // we must not inherit from traits since it has + // the potential to cause problems with MSVC + typedef typename remove_all::type Ancestor; + typedef typename traits::XprKind XprKind; + enum { + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime + }; + + // even though we require Lhs and Rhs to have the same scalar type (see CwiseBinaryOp constructor), + // we still want to handle the case when the result type is different. + typedef typename result_of< + BinaryOp( + typename Lhs::Scalar, + typename Rhs::Scalar + ) + >::type Scalar; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename Lhs::Nested LhsNested; + typedef typename Rhs::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + enum { + LhsCoeffReadCost = _LhsNested::CoeffReadCost, + RhsCoeffReadCost = _RhsNested::CoeffReadCost, + LhsFlags = _LhsNested::Flags, + RhsFlags = _RhsNested::Flags, + SameType = is_same::value, + StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit), + Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( + HereditaryBits + | (int(LhsFlags) & int(RhsFlags) & + ( AlignedBit + | (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), + Cost0 = EIGEN_ADD_COST(LhsCoeffReadCost,RhsCoeffReadCost), + CoeffReadCost = EIGEN_ADD_COST(Cost0,functor_traits::Cost) + }; +}; +} // end namespace internal + +// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor +// that would take two operands of different types. If there were such an example, then this check should be +// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as +// currently they take only one typename Scalar template parameter. +// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. +// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to +// add together a float matrix and a double matrix. +#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ + EIGEN_STATIC_ASSERT((internal::functor_is_product_like::ret \ + ? int(internal::scalar_product_traits::Defined) \ + : int(internal::is_same::value)), \ + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + +template +class CwiseBinaryOpImpl; + +template +class CwiseBinaryOp : internal::no_assignment_operator, + public CwiseBinaryOpImpl< + BinaryOp, Lhs, Rhs, + typename internal::promote_storage_type::StorageKind, + typename internal::traits::StorageKind>::ret> +{ + public: + + typedef typename CwiseBinaryOpImpl< + BinaryOp, Lhs, Rhs, + typename internal::promote_storage_type::StorageKind, + typename internal::traits::StorageKind>::ret>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp) + + typedef typename internal::nested::type LhsNested; + typedef typename internal::nested::type RhsNested; + typedef typename internal::remove_reference::type _LhsNested; + typedef typename internal::remove_reference::type _RhsNested; + + EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp()) + : m_lhs(aLhs), m_rhs(aRhs), m_functor(func) + { + EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar); + // require the sizes to match + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs) + eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols()); + } + + EIGEN_STRONG_INLINE Index rows() const { + // return the fixed size type if available to enable compile time optimizations + if (internal::traits::type>::RowsAtCompileTime==Dynamic) + return m_rhs.rows(); + else + return m_lhs.rows(); + } + EIGEN_STRONG_INLINE Index cols() const { + // return the fixed size type if available to enable compile time optimizations + if (internal::traits::type>::ColsAtCompileTime==Dynamic) + return m_rhs.cols(); + else + return m_lhs.cols(); + } + + /** \returns the left hand side nested expression */ + const _LhsNested& lhs() const { return m_lhs; } + /** \returns the right hand side nested expression */ + const _RhsNested& rhs() const { return m_rhs; } + /** \returns the functor representing the binary operation */ + const BinaryOp& functor() const { return m_functor; } + + protected: + LhsNested m_lhs; + RhsNested m_rhs; + const BinaryOp m_functor; +}; + +template +class CwiseBinaryOpImpl + : public internal::dense_xpr_base >::type +{ + typedef CwiseBinaryOp Derived; + public: + + typedef typename internal::dense_xpr_base >::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE( Derived ) + + EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const + { + return derived().functor()(derived().lhs().coeff(rowId, colId), + derived().rhs().coeff(rowId, colId)); + } + + template + EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const + { + return derived().functor().packetOp(derived().lhs().template packet(rowId, colId), + derived().rhs().template packet(rowId, colId)); + } + + EIGEN_STRONG_INLINE const Scalar coeff(Index index) const + { + return derived().functor()(derived().lhs().coeff(index), + derived().rhs().coeff(index)); + } + + template + EIGEN_STRONG_INLINE PacketScalar packet(Index index) const + { + return derived().functor().packetOp(derived().lhs().template packet(index), + derived().rhs().template packet(index)); + } +}; + +/** replaces \c *this by \c *this - \a other. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_STRONG_INLINE Derived & +MatrixBase::operator-=(const MatrixBase &other) +{ + SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); + tmp = other.derived(); + return derived(); +} + +/** replaces \c *this by \c *this + \a other. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_STRONG_INLINE Derived & +MatrixBase::operator+=(const MatrixBase& other) +{ + SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); + tmp = other.derived(); + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_CWISE_BINARY_OP_H diff --git a/tools/Eigen/src/Core/CwiseNullaryOp.h b/tools/Eigen/src/Core/CwiseNullaryOp.h new file mode 100644 index 0000000..a93bab2 --- /dev/null +++ b/tools/Eigen/src/Core/CwiseNullaryOp.h @@ -0,0 +1,864 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_NULLARY_OP_H +#define EIGEN_CWISE_NULLARY_OP_H + +namespace Eigen { + +/** \class CwiseNullaryOp + * \ingroup Core_Module + * + * \brief Generic expression of a matrix where all coefficients are defined by a functor + * + * \param NullaryOp template functor implementing the operator + * \param PlainObjectType the underlying plain matrix/array type + * + * This class represents an expression of a generic nullary operator. + * It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods, + * and most of the time this is the only way it is used. + * + * However, if you want to write a function returning such an expression, you + * will need to use this class. + * + * \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr() + */ + +namespace internal { +template +struct traits > : traits +{ + enum { + Flags = (traits::Flags + & ( HereditaryBits + | (functor_has_linear_access::ret ? LinearAccessBit : 0) + | (functor_traits::PacketAccess ? PacketAccessBit : 0))) + | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), + CoeffReadCost = functor_traits::Cost + }; +}; +} + +template +class CwiseNullaryOp : internal::no_assignment_operator, + public internal::dense_xpr_base< CwiseNullaryOp >::type +{ + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp) + + CwiseNullaryOp(Index nbRows, Index nbCols, const NullaryOp& func = NullaryOp()) + : m_rows(nbRows), m_cols(nbCols), m_functor(func) + { + eigen_assert(nbRows >= 0 + && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows) + && nbCols >= 0 + && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols)); + } + + EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); } + EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); } + + EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const + { + return m_functor(rowId, colId); + } + + template + EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const + { + return m_functor.packetOp(rowId, colId); + } + + EIGEN_STRONG_INLINE const Scalar coeff(Index index) const + { + return m_functor(index); + } + + template + EIGEN_STRONG_INLINE PacketScalar packet(Index index) const + { + return m_functor.packetOp(index); + } + + /** \returns the functor representing the nullary operation */ + const NullaryOp& functor() const { return m_functor; } + + protected: + const internal::variable_if_dynamic m_rows; + const internal::variable_if_dynamic m_cols; + const NullaryOp m_functor; +}; + + +/** \returns an expression of a matrix defined by a custom functor \a func + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +template +EIGEN_STRONG_INLINE const CwiseNullaryOp +DenseBase::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func) +{ + return CwiseNullaryOp(rows, cols, func); +} + +/** \returns an expression of a matrix defined by a custom functor \a func + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Zero() should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +template +EIGEN_STRONG_INLINE const CwiseNullaryOp +DenseBase::NullaryExpr(Index size, const CustomNullaryOp& func) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + if(RowsAtCompileTime == 1) return CwiseNullaryOp(1, size, func); + else return CwiseNullaryOp(size, 1, func); +} + +/** \returns an expression of a matrix defined by a custom functor \a func + * + * This variant is only for fixed-size DenseBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +template +EIGEN_STRONG_INLINE const CwiseNullaryOp +DenseBase::NullaryExpr(const CustomNullaryOp& func) +{ + return CwiseNullaryOp(RowsAtCompileTime, ColsAtCompileTime, func); +} + +/** \returns an expression of a constant matrix of value \a value + * + * The parameters \a nbRows and \a nbCols are the number of rows and of columns of + * the returned matrix. Must be compatible with this DenseBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a nbRows and \a nbCols as arguments, so Zero() should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Constant(Index nbRows, Index nbCols, const Scalar& value) +{ + return DenseBase::NullaryExpr(nbRows, nbCols, internal::scalar_constant_op(value)); +} + +/** \returns an expression of a constant matrix of value \a value + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this DenseBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Zero() should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Constant(Index size, const Scalar& value) +{ + return DenseBase::NullaryExpr(size, internal::scalar_constant_op(value)); +} + +/** \returns an expression of a constant matrix of value \a value + * + * This variant is only for fixed-size DenseBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Constant(const Scalar& value) +{ + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return DenseBase::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op(value)); +} + +/** + * \brief Sets a linearly space vector. + * + * The function generates 'size' equally spaced values in the closed interval [low,high]. + * This particular version of LinSpaced() uses sequential access, i.e. vector access is + * assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization + * and yields faster code than the random access version. + * + * When size is set to 1, a vector of length 1 containing 'high' is returned. + * + * \only_for_vectors + * + * Example: \include DenseBase_LinSpaced_seq.cpp + * Output: \verbinclude DenseBase_LinSpaced_seq.out + * + * \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Index,Scalar,Scalar), CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::SequentialLinSpacedReturnType +DenseBase::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); +} + +/** + * \copydoc DenseBase::LinSpaced(Sequential_t, Index, const Scalar&, const Scalar&) + * Special version for fixed size types which does not require the size parameter. + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::SequentialLinSpacedReturnType +DenseBase::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); +} + +/** + * \brief Sets a linearly space vector. + * + * The function generates 'size' equally spaced values in the closed interval [low,high]. + * When size is set to 1, a vector of length 1 containing 'high' is returned. + * + * \only_for_vectors + * + * Example: \include DenseBase_LinSpaced.cpp + * Output: \verbinclude DenseBase_LinSpaced.out + * + * \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Sequential_t,Index,const Scalar&,const Scalar&,Index), CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +DenseBase::LinSpaced(Index size, const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); +} + +/** + * \copydoc DenseBase::LinSpaced(Index, const Scalar&, const Scalar&) + * Special version for fixed size types which does not require the size parameter. + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +DenseBase::LinSpaced(const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); +} + +/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */ +template +bool DenseBase::isApproxToConstant +(const Scalar& val, const RealScalar& prec) const +{ + for(Index j = 0; j < cols(); ++j) + for(Index i = 0; i < rows(); ++i) + if(!internal::isApprox(this->coeff(i, j), val, prec)) + return false; + return true; +} + +/** This is just an alias for isApproxToConstant(). + * + * \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */ +template +bool DenseBase::isConstant +(const Scalar& val, const RealScalar& prec) const +{ + return isApproxToConstant(val, prec); +} + +/** Alias for setConstant(): sets all coefficients in this expression to \a val. + * + * \sa setConstant(), Constant(), class CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE void DenseBase::fill(const Scalar& val) +{ + setConstant(val); +} + +/** Sets all coefficients in this expression to \a value. + * + * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes() + */ +template +EIGEN_STRONG_INLINE Derived& DenseBase::setConstant(const Scalar& val) +{ + return derived() = Constant(rows(), cols(), val); +} + +/** Resizes to the given \a size, and sets all coefficients in this expression to the given \a value. + * + * \only_for_vectors + * + * Example: \include Matrix_setConstant_int.cpp + * Output: \verbinclude Matrix_setConstant_int.out + * + * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setConstant(Index size, const Scalar& val) +{ + resize(size); + return setConstant(val); +} + +/** Resizes to the given size, and sets all coefficients in this expression to the given \a value. + * + * \param nbRows the new number of rows + * \param nbCols the new number of columns + * \param val the value to which all coefficients are set + * + * Example: \include Matrix_setConstant_int_int.cpp + * Output: \verbinclude Matrix_setConstant_int_int.out + * + * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setConstant(Index nbRows, Index nbCols, const Scalar& val) +{ + resize(nbRows, nbCols); + return setConstant(val); +} + +/** + * \brief Sets a linearly space vector. + * + * The function generates 'size' equally spaced values in the closed interval [low,high]. + * When size is set to 1, a vector of length 1 containing 'high' is returned. + * + * \only_for_vectors + * + * Example: \include DenseBase_setLinSpaced.cpp + * Output: \verbinclude DenseBase_setLinSpaced.out + * + * \sa CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op(low,high,newSize)); +} + +/** + * \brief Sets a linearly space vector. + * + * The function fill *this with equally spaced values in the closed interval [low,high]. + * When size is set to 1, a vector of length 1 containing 'high' is returned. + * + * \only_for_vectors + * + * \sa setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return setLinSpaced(size(), low, high); +} + +// zero: + +/** \returns an expression of a zero matrix. + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used + * instead. + * + * Example: \include MatrixBase_zero_int_int.cpp + * Output: \verbinclude MatrixBase_zero_int_int.out + * + * \sa Zero(), Zero(Index) + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Zero(Index nbRows, Index nbCols) +{ + return Constant(nbRows, nbCols, Scalar(0)); +} + +/** \returns an expression of a zero vector. + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Zero() should be used + * instead. + * + * Example: \include MatrixBase_zero_int.cpp + * Output: \verbinclude MatrixBase_zero_int.out + * + * \sa Zero(), Zero(Index,Index) + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Zero(Index size) +{ + return Constant(size, Scalar(0)); +} + +/** \returns an expression of a fixed-size zero matrix or vector. + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * Example: \include MatrixBase_zero.cpp + * Output: \verbinclude MatrixBase_zero.out + * + * \sa Zero(Index), Zero(Index,Index) + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Zero() +{ + return Constant(Scalar(0)); +} + +/** \returns true if *this is approximately equal to the zero matrix, + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isZero.cpp + * Output: \verbinclude MatrixBase_isZero.out + * + * \sa class CwiseNullaryOp, Zero() + */ +template +bool DenseBase::isZero(const RealScalar& prec) const +{ + for(Index j = 0; j < cols(); ++j) + for(Index i = 0; i < rows(); ++i) + if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast(1), prec)) + return false; + return true; +} + +/** Sets all coefficients in this expression to zero. + * + * Example: \include MatrixBase_setZero.cpp + * Output: \verbinclude MatrixBase_setZero.out + * + * \sa class CwiseNullaryOp, Zero() + */ +template +EIGEN_STRONG_INLINE Derived& DenseBase::setZero() +{ + return setConstant(Scalar(0)); +} + +/** Resizes to the given \a size, and sets all coefficients in this expression to zero. + * + * \only_for_vectors + * + * Example: \include Matrix_setZero_int.cpp + * Output: \verbinclude Matrix_setZero_int.out + * + * \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero() + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setZero(Index newSize) +{ + resize(newSize); + return setConstant(Scalar(0)); +} + +/** Resizes to the given size, and sets all coefficients in this expression to zero. + * + * \param nbRows the new number of rows + * \param nbCols the new number of columns + * + * Example: \include Matrix_setZero_int_int.cpp + * Output: \verbinclude Matrix_setZero_int_int.out + * + * \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero() + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setZero(Index nbRows, Index nbCols) +{ + resize(nbRows, nbCols); + return setConstant(Scalar(0)); +} + +// ones: + +/** \returns an expression of a matrix where all coefficients equal one. + * + * The parameters \a nbRows and \a nbCols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Ones() should be used + * instead. + * + * Example: \include MatrixBase_ones_int_int.cpp + * Output: \verbinclude MatrixBase_ones_int_int.out + * + * \sa Ones(), Ones(Index), isOnes(), class Ones + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Ones(Index nbRows, Index nbCols) +{ + return Constant(nbRows, nbCols, Scalar(1)); +} + +/** \returns an expression of a vector where all coefficients equal one. + * + * The parameter \a newSize is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Ones() should be used + * instead. + * + * Example: \include MatrixBase_ones_int.cpp + * Output: \verbinclude MatrixBase_ones_int.out + * + * \sa Ones(), Ones(Index,Index), isOnes(), class Ones + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Ones(Index newSize) +{ + return Constant(newSize, Scalar(1)); +} + +/** \returns an expression of a fixed-size matrix or vector where all coefficients equal one. + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * Example: \include MatrixBase_ones.cpp + * Output: \verbinclude MatrixBase_ones.out + * + * \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Ones() +{ + return Constant(Scalar(1)); +} + +/** \returns true if *this is approximately equal to the matrix where all coefficients + * are equal to 1, within the precision given by \a prec. + * + * Example: \include MatrixBase_isOnes.cpp + * Output: \verbinclude MatrixBase_isOnes.out + * + * \sa class CwiseNullaryOp, Ones() + */ +template +bool DenseBase::isOnes +(const RealScalar& prec) const +{ + return isApproxToConstant(Scalar(1), prec); +} + +/** Sets all coefficients in this expression to one. + * + * Example: \include MatrixBase_setOnes.cpp + * Output: \verbinclude MatrixBase_setOnes.out + * + * \sa class CwiseNullaryOp, Ones() + */ +template +EIGEN_STRONG_INLINE Derived& DenseBase::setOnes() +{ + return setConstant(Scalar(1)); +} + +/** Resizes to the given \a newSize, and sets all coefficients in this expression to one. + * + * \only_for_vectors + * + * Example: \include Matrix_setOnes_int.cpp + * Output: \verbinclude Matrix_setOnes_int.out + * + * \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones() + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setOnes(Index newSize) +{ + resize(newSize); + return setConstant(Scalar(1)); +} + +/** Resizes to the given size, and sets all coefficients in this expression to one. + * + * \param nbRows the new number of rows + * \param nbCols the new number of columns + * + * Example: \include Matrix_setOnes_int_int.cpp + * Output: \verbinclude Matrix_setOnes_int_int.out + * + * \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones() + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setOnes(Index nbRows, Index nbCols) +{ + resize(nbRows, nbCols); + return setConstant(Scalar(1)); +} + +// Identity: + +/** \returns an expression of the identity matrix (not necessarily square). + * + * The parameters \a nbRows and \a nbCols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Identity() should be used + * instead. + * + * Example: \include MatrixBase_identity_int_int.cpp + * Output: \verbinclude MatrixBase_identity_int_int.out + * + * \sa Identity(), setIdentity(), isIdentity() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType +MatrixBase::Identity(Index nbRows, Index nbCols) +{ + return DenseBase::NullaryExpr(nbRows, nbCols, internal::scalar_identity_op()); +} + +/** \returns an expression of the identity matrix (not necessarily square). + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variant taking size arguments. + * + * Example: \include MatrixBase_identity.cpp + * Output: \verbinclude MatrixBase_identity.out + * + * \sa Identity(Index,Index), setIdentity(), isIdentity() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType +MatrixBase::Identity() +{ + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return MatrixBase::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_identity_op()); +} + +/** \returns true if *this is approximately equal to the identity matrix + * (not necessarily square), + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isIdentity.cpp + * Output: \verbinclude MatrixBase_isIdentity.out + * + * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), setIdentity() + */ +template +bool MatrixBase::isIdentity +(const RealScalar& prec) const +{ + for(Index j = 0; j < cols(); ++j) + { + for(Index i = 0; i < rows(); ++i) + { + if(i == j) + { + if(!internal::isApprox(this->coeff(i, j), static_cast(1), prec)) + return false; + } + else + { + if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast(1), prec)) + return false; + } + } + } + return true; +} + +namespace internal { + +template=16)> +struct setIdentity_impl +{ + static EIGEN_STRONG_INLINE Derived& run(Derived& m) + { + return m = Derived::Identity(m.rows(), m.cols()); + } +}; + +template +struct setIdentity_impl +{ + typedef typename Derived::Index Index; + static EIGEN_STRONG_INLINE Derived& run(Derived& m) + { + m.setZero(); + const Index size = (std::min)(m.rows(), m.cols()); + for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1); + return m; + } +}; + +} // end namespace internal + +/** Writes the identity expression (not necessarily square) into *this. + * + * Example: \include MatrixBase_setIdentity.cpp + * Output: \verbinclude MatrixBase_setIdentity.out + * + * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity() + */ +template +EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity() +{ + return internal::setIdentity_impl::run(derived()); +} + +/** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this. + * + * \param nbRows the new number of rows + * \param nbCols the new number of columns + * + * Example: \include Matrix_setIdentity_int_int.cpp + * Output: \verbinclude Matrix_setIdentity_int_int.out + * + * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity() + */ +template +EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity(Index nbRows, Index nbCols) +{ + derived().resize(nbRows, nbCols); + return setIdentity(); +} + +/** \returns an expression of the i-th unit (basis) vector. + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit(Index newSize, Index i) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i); +} + +/** \returns an expression of the i-th unit (basis) vector. + * + * \only_for_vectors + * + * This variant is for fixed-size vector only. + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit(Index i) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return BasisReturnType(SquareMatrixType::Identity(),i); +} + +/** \returns an expression of the X axis unit vector (1{,0}^*) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitX() +{ return Derived::Unit(0); } + +/** \returns an expression of the Y axis unit vector (0,1{,0}^*) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitY() +{ return Derived::Unit(1); } + +/** \returns an expression of the Z axis unit vector (0,0,1{,0}^*) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitZ() +{ return Derived::Unit(2); } + +/** \returns an expression of the W axis unit vector (0,0,0,1) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitW() +{ return Derived::Unit(3); } + +} // end namespace Eigen + +#endif // EIGEN_CWISE_NULLARY_OP_H diff --git a/tools/Eigen/src/Core/CwiseUnaryView.h b/tools/Eigen/src/Core/CwiseUnaryView.h new file mode 100644 index 0000000..f3b2ffe --- /dev/null +++ b/tools/Eigen/src/Core/CwiseUnaryView.h @@ -0,0 +1,139 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_UNARY_VIEW_H +#define EIGEN_CWISE_UNARY_VIEW_H + +namespace Eigen { + +/** \class CwiseUnaryView + * \ingroup Core_Module + * + * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector + * + * \param ViewOp template functor implementing the view + * \param MatrixType the type of the matrix we are applying the unary operator + * + * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector. + * It is the return type of real() and imag(), and most of the time this is the only way it is used. + * + * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp + */ + +namespace internal { +template +struct traits > + : traits +{ + typedef typename result_of< + ViewOp(typename traits::Scalar) + >::type Scalar; + typedef typename MatrixType::Nested MatrixTypeNested; + typedef typename remove_all::type _MatrixTypeNested; + enum { + Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)), + CoeffReadCost = EIGEN_ADD_COST(traits<_MatrixTypeNested>::CoeffReadCost, functor_traits::Cost), + MatrixTypeInnerStride = inner_stride_at_compile_time::ret, + // need to cast the sizeof's from size_t to int explicitly, otherwise: + // "error: no integral type can represent all of the enumerator values + InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic + ? int(Dynamic) + : int(MatrixTypeInnerStride) * int(sizeof(typename traits::Scalar) / sizeof(Scalar)), + OuterStrideAtCompileTime = outer_stride_at_compile_time::ret == Dynamic + ? int(Dynamic) + : outer_stride_at_compile_time::ret * int(sizeof(typename traits::Scalar) / sizeof(Scalar)) + }; +}; +} + +template +class CwiseUnaryViewImpl; + +template +class CwiseUnaryView : public CwiseUnaryViewImpl::StorageKind> +{ + public: + + typedef typename CwiseUnaryViewImpl::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView) + + inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp()) + : m_matrix(mat), m_functor(func) {} + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView) + + EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); } + EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); } + + /** \returns the functor representing unary operation */ + const ViewOp& functor() const { return m_functor; } + + /** \returns the nested expression */ + const typename internal::remove_all::type& + nestedExpression() const { return m_matrix; } + + /** \returns the nested expression */ + typename internal::remove_all::type& + nestedExpression() { return m_matrix.const_cast_derived(); } + + protected: + // FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC + typename internal::nested::type m_matrix; + ViewOp m_functor; +}; + +template +class CwiseUnaryViewImpl + : public internal::dense_xpr_base< CwiseUnaryView >::type +{ + public: + + typedef CwiseUnaryView Derived; + typedef typename internal::dense_xpr_base< CwiseUnaryView >::type Base; + + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl) + + inline Scalar* data() { return &coeffRef(0); } + inline const Scalar* data() const { return &coeff(0); } + + inline Index innerStride() const + { + return derived().nestedExpression().innerStride() * sizeof(typename internal::traits::Scalar) / sizeof(Scalar); + } + + inline Index outerStride() const + { + return derived().nestedExpression().outerStride() * sizeof(typename internal::traits::Scalar) / sizeof(Scalar); + } + + EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const + { + return derived().functor()(derived().nestedExpression().coeff(row, col)); + } + + EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return derived().functor()(derived().nestedExpression().coeff(index)); + } + + EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) + { + return derived().functor()(const_cast_derived().nestedExpression().coeffRef(row, col)); + } + + EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) + { + return derived().functor()(const_cast_derived().nestedExpression().coeffRef(index)); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CWISE_UNARY_VIEW_H diff --git a/tools/Eigen/src/Core/DenseBase.h b/tools/Eigen/src/Core/DenseBase.h new file mode 100644 index 0000000..4b371b0 --- /dev/null +++ b/tools/Eigen/src/Core/DenseBase.h @@ -0,0 +1,521 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2010 Benoit Jacob +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DENSEBASE_H +#define EIGEN_DENSEBASE_H + +namespace Eigen { + +namespace internal { + +// The index type defined by EIGEN_DEFAULT_DENSE_INDEX_TYPE must be a signed type. +// This dummy function simply aims at checking that at compile time. +static inline void check_DenseIndex_is_signed() { + EIGEN_STATIC_ASSERT(NumTraits::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE); +} + +} // end namespace internal + +/** \class DenseBase + * \ingroup Core_Module + * + * \brief Base class for all dense matrices, vectors, and arrays + * + * This class is the base that is inherited by all dense objects (matrix, vector, arrays, + * and related expression types). The common Eigen API for dense objects is contained in this class. + * + * \tparam Derived is the derived type, e.g., a matrix type or an expression. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN. + * + * \sa \ref TopicClassHierarchy + */ +template class DenseBase +#ifndef EIGEN_PARSED_BY_DOXYGEN + : public internal::special_scalar_op_base::Scalar, + typename NumTraits::Scalar>::Real, + DenseCoeffsBase > +#else + : public DenseCoeffsBase +#endif // not EIGEN_PARSED_BY_DOXYGEN +{ + public: + + class InnerIterator; + + typedef typename internal::traits::StorageKind StorageKind; + + /** \brief The type of indices + * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE. + * \sa \ref TopicPreprocessorDirectives. + */ + typedef typename internal::traits::Index Index; + + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + typedef internal::special_scalar_op_base > Base; + + using Base::operator*; + using Base::derived; + using Base::const_cast_derived; + using Base::rows; + using Base::cols; + using Base::size; + using Base::rowIndexByOuterInner; + using Base::colIndexByOuterInner; + using Base::coeff; + using Base::coeffByOuterInner; + using Base::packet; + using Base::packetByOuterInner; + using Base::writePacket; + using Base::writePacketByOuterInner; + using Base::coeffRef; + using Base::coeffRefByOuterInner; + using Base::copyCoeff; + using Base::copyCoeffByOuterInner; + using Base::copyPacket; + using Base::copyPacketByOuterInner; + using Base::operator(); + using Base::operator[]; + using Base::x; + using Base::y; + using Base::z; + using Base::w; + using Base::stride; + using Base::innerStride; + using Base::outerStride; + using Base::rowStride; + using Base::colStride; + typedef typename Base::CoeffReturnType CoeffReturnType; + + enum { + + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + /**< The number of rows at compile-time. This is just a copy of the value provided + * by the \a Derived type. If a value is not known at compile-time, + * it is set to the \a Dynamic constant. + * \sa MatrixBase::rows(), MatrixBase::cols(), ColsAtCompileTime, SizeAtCompileTime */ + + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + /**< The number of columns at compile-time. This is just a copy of the value provided + * by the \a Derived type. If a value is not known at compile-time, + * it is set to the \a Dynamic constant. + * \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */ + + + SizeAtCompileTime = (internal::size_at_compile_time::RowsAtCompileTime, + internal::traits::ColsAtCompileTime>::ret), + /**< This is equal to the number of coefficients, i.e. the number of + * rows times the number of columns, or to \a Dynamic if this is not + * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */ + + MaxRowsAtCompileTime = internal::traits::MaxRowsAtCompileTime, + /**< This value is equal to the maximum possible number of rows that this expression + * might have. If this expression might have an arbitrarily high number of rows, + * this value is set to \a Dynamic. + * + * This value is useful to know when evaluating an expression, in order to determine + * whether it is possible to avoid doing a dynamic memory allocation. + * + * \sa RowsAtCompileTime, MaxColsAtCompileTime, MaxSizeAtCompileTime + */ + + MaxColsAtCompileTime = internal::traits::MaxColsAtCompileTime, + /**< This value is equal to the maximum possible number of columns that this expression + * might have. If this expression might have an arbitrarily high number of columns, + * this value is set to \a Dynamic. + * + * This value is useful to know when evaluating an expression, in order to determine + * whether it is possible to avoid doing a dynamic memory allocation. + * + * \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime + */ + + MaxSizeAtCompileTime = (internal::size_at_compile_time::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime>::ret), + /**< This value is equal to the maximum possible number of coefficients that this expression + * might have. If this expression might have an arbitrarily high number of coefficients, + * this value is set to \a Dynamic. + * + * This value is useful to know when evaluating an expression, in order to determine + * whether it is possible to avoid doing a dynamic memory allocation. + * + * \sa SizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime + */ + + IsVectorAtCompileTime = internal::traits::MaxRowsAtCompileTime == 1 + || internal::traits::MaxColsAtCompileTime == 1, + /**< This is set to true if either the number of rows or the number of + * columns is known at compile-time to be equal to 1. Indeed, in that case, + * we are dealing with a column-vector (if there is only one column) or with + * a row-vector (if there is only one row). */ + + Flags = internal::traits::Flags, + /**< This stores expression \ref flags flags which may or may not be inherited by new expressions + * constructed from this one. See the \ref flags "list of flags". + */ + + IsRowMajor = int(Flags) & RowMajorBit, /**< True if this expression has row-major storage order. */ + + InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime) + : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + + CoeffReadCost = internal::traits::CoeffReadCost, + /**< This is a rough measure of how expensive it is to read one coefficient from + * this expression. + */ + + InnerStrideAtCompileTime = internal::inner_stride_at_compile_time::ret, + OuterStrideAtCompileTime = internal::outer_stride_at_compile_time::ret + }; + + enum { ThisConstantIsPrivateInPlainObjectBase }; + + /** \returns the number of nonzero coefficients which is in practice the number + * of stored coefficients. */ + inline Index nonZeros() const { return size(); } + + /** \returns the outer size. + * + * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension + * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a + * column-major matrix, and the number of rows for a row-major matrix. */ + Index outerSize() const + { + return IsVectorAtCompileTime ? 1 + : int(IsRowMajor) ? this->rows() : this->cols(); + } + + /** \returns the inner size. + * + * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension + * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a + * column-major matrix, and the number of columns for a row-major matrix. */ + Index innerSize() const + { + return IsVectorAtCompileTime ? this->size() + : int(IsRowMajor) ? this->cols() : this->rows(); + } + + /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are + * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does + * nothing else. + */ + void resize(Index newSize) + { + EIGEN_ONLY_USED_FOR_DEBUG(newSize); + eigen_assert(newSize == this->size() + && "DenseBase::resize() does not actually allow to resize."); + } + /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are + * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does + * nothing else. + */ + void resize(Index nbRows, Index nbCols) + { + EIGEN_ONLY_USED_FOR_DEBUG(nbRows); + EIGEN_ONLY_USED_FOR_DEBUG(nbCols); + eigen_assert(nbRows == this->rows() && nbCols == this->cols() + && "DenseBase::resize() does not actually allow to resize."); + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + + /** \internal Represents a matrix with all coefficients equal to one another*/ + typedef CwiseNullaryOp,Derived> ConstantReturnType; + /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */ + typedef CwiseNullaryOp,Derived> SequentialLinSpacedReturnType; + /** \internal Represents a vector with linearly spaced coefficients that allows random access. */ + typedef CwiseNullaryOp,Derived> RandomAccessLinSpacedReturnType; + /** \internal the return type of MatrixBase::eigenvalues() */ + typedef Matrix::Scalar>::Real, internal::traits::ColsAtCompileTime, 1> EigenvaluesReturnType; + +#endif // not EIGEN_PARSED_BY_DOXYGEN + + /** Copies \a other into *this. \returns a reference to *this. */ + template + Derived& operator=(const DenseBase& other); + + /** Special case of the template operator=, in order to prevent the compiler + * from generating a default operator= (issue hit with g++ 4.1) + */ + Derived& operator=(const DenseBase& other); + + template + Derived& operator=(const EigenBase &other); + + template + Derived& operator+=(const EigenBase &other); + + template + Derived& operator-=(const EigenBase &other); + + template + Derived& operator=(const ReturnByValue& func); + + /** \internal Copies \a other into *this without evaluating other. \returns a reference to *this. */ + template + Derived& lazyAssign(const DenseBase& other); + + /** \internal Evaluates \a other into *this. \returns a reference to *this. */ + template + Derived& lazyAssign(const ReturnByValue& other); + + CommaInitializer operator<< (const Scalar& s); + + template + const Flagged flagged() const; + + template + CommaInitializer operator<< (const DenseBase& other); + + Eigen::Transpose transpose(); + typedef typename internal::add_const >::type ConstTransposeReturnType; + ConstTransposeReturnType transpose() const; + void transposeInPlace(); +#ifndef EIGEN_NO_DEBUG + protected: + template + void checkTransposeAliasing(const OtherDerived& other) const; + public: +#endif + + + static const ConstantReturnType + Constant(Index rows, Index cols, const Scalar& value); + static const ConstantReturnType + Constant(Index size, const Scalar& value); + static const ConstantReturnType + Constant(const Scalar& value); + + static const SequentialLinSpacedReturnType + LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high); + static const RandomAccessLinSpacedReturnType + LinSpaced(Index size, const Scalar& low, const Scalar& high); + static const SequentialLinSpacedReturnType + LinSpaced(Sequential_t, const Scalar& low, const Scalar& high); + static const RandomAccessLinSpacedReturnType + LinSpaced(const Scalar& low, const Scalar& high); + + template + static const CwiseNullaryOp + NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func); + template + static const CwiseNullaryOp + NullaryExpr(Index size, const CustomNullaryOp& func); + template + static const CwiseNullaryOp + NullaryExpr(const CustomNullaryOp& func); + + static const ConstantReturnType Zero(Index rows, Index cols); + static const ConstantReturnType Zero(Index size); + static const ConstantReturnType Zero(); + static const ConstantReturnType Ones(Index rows, Index cols); + static const ConstantReturnType Ones(Index size); + static const ConstantReturnType Ones(); + + void fill(const Scalar& value); + Derived& setConstant(const Scalar& value); + Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high); + Derived& setLinSpaced(const Scalar& low, const Scalar& high); + Derived& setZero(); + Derived& setOnes(); + Derived& setRandom(); + + template + bool isApprox(const DenseBase& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isMuchSmallerThan(const RealScalar& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + template + bool isMuchSmallerThan(const DenseBase& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + + bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isZero(const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isOnes(const RealScalar& prec = NumTraits::dummy_precision()) const; + + inline bool hasNaN() const; + inline bool allFinite() const; + + inline Derived& operator*=(const Scalar& other); + inline Derived& operator/=(const Scalar& other); + + typedef typename internal::add_const_on_value_type::type>::type EvalReturnType; + /** \returns the matrix or vector obtained by evaluating this expression. + * + * Notice that in the case of a plain matrix or vector (not an expression) this function just returns + * a const reference, in order to avoid a useless copy. + */ + EIGEN_STRONG_INLINE EvalReturnType eval() const + { + // Even though MSVC does not honor strong inlining when the return type + // is a dynamic matrix, we desperately need strong inlining for fixed + // size types on MSVC. + return typename internal::eval::type(derived()); + } + + /** swaps *this with the expression \a other. + * + */ + template + void swap(const DenseBase& other, + int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase) + { + SwapWrapper(derived()).lazyAssign(other.derived()); + } + + /** swaps *this with the matrix or array \a other. + * + */ + template + void swap(PlainObjectBase& other) + { + SwapWrapper(derived()).lazyAssign(other.derived()); + } + + + inline const NestByValue nestByValue() const; + inline const ForceAlignedAccess forceAlignedAccess() const; + inline ForceAlignedAccess forceAlignedAccess(); + template inline const typename internal::conditional,Derived&>::type forceAlignedAccessIf() const; + template inline typename internal::conditional,Derived&>::type forceAlignedAccessIf(); + + Scalar sum() const; + Scalar mean() const; + Scalar trace() const; + + Scalar prod() const; + + typename internal::traits::Scalar minCoeff() const; + typename internal::traits::Scalar maxCoeff() const; + + template + typename internal::traits::Scalar minCoeff(IndexType* row, IndexType* col) const; + template + typename internal::traits::Scalar maxCoeff(IndexType* row, IndexType* col) const; + template + typename internal::traits::Scalar minCoeff(IndexType* index) const; + template + typename internal::traits::Scalar maxCoeff(IndexType* index) const; + + template + typename internal::result_of::Scalar)>::type + redux(const BinaryOp& func) const; + + template + void visit(Visitor& func) const; + + inline const WithFormat format(const IOFormat& fmt) const; + + /** \returns the unique coefficient of a 1x1 expression */ + CoeffReturnType value() const + { + EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) + eigen_assert(this->rows() == 1 && this->cols() == 1); + return derived().coeff(0,0); + } + + bool all(void) const; + bool any(void) const; + Index count() const; + + typedef VectorwiseOp RowwiseReturnType; + typedef const VectorwiseOp ConstRowwiseReturnType; + typedef VectorwiseOp ColwiseReturnType; + typedef const VectorwiseOp ConstColwiseReturnType; + + ConstRowwiseReturnType rowwise() const; + RowwiseReturnType rowwise(); + ConstColwiseReturnType colwise() const; + ColwiseReturnType colwise(); + + static const CwiseNullaryOp,Derived> Random(Index rows, Index cols); + static const CwiseNullaryOp,Derived> Random(Index size); + static const CwiseNullaryOp,Derived> Random(); + + template + const Select + select(const DenseBase& thenMatrix, + const DenseBase& elseMatrix) const; + + template + inline const Select + select(const DenseBase& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const; + + template + inline const Select + select(const typename ElseDerived::Scalar& thenScalar, const DenseBase& elseMatrix) const; + + template RealScalar lpNorm() const; + + template + inline const Replicate replicate() const; + + typedef Replicate ReplicateReturnType; + inline const ReplicateReturnType replicate(Index rowFacor,Index colFactor) const; + + typedef Reverse ReverseReturnType; + typedef const Reverse ConstReverseReturnType; + ReverseReturnType reverse(); + ConstReverseReturnType reverse() const; + void reverseInPlace(); + +#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase +# include "../plugins/BlockMethods.h" +# ifdef EIGEN_DENSEBASE_PLUGIN +# include EIGEN_DENSEBASE_PLUGIN +# endif +#undef EIGEN_CURRENT_STORAGE_BASE_CLASS + +#ifdef EIGEN2_SUPPORT + + Block corner(CornerType type, Index cRows, Index cCols); + const Block corner(CornerType type, Index cRows, Index cCols) const; + template + Block corner(CornerType type); + template + const Block corner(CornerType type) const; + +#endif // EIGEN2_SUPPORT + + + // disable the use of evalTo for dense objects with a nice compilation error + template inline void evalTo(Dest& ) const + { + EIGEN_STATIC_ASSERT((internal::is_same::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS); + } + + protected: + /** Default constructor. Do nothing. */ + DenseBase() + { + /* Just checks for self-consistency of the flags. + * Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down + */ +#ifdef EIGEN_INTERNAL_DEBUGGING + EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor)) + && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))), + INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION) +#endif + } + + private: + explicit DenseBase(int); + DenseBase(int,int); + template explicit DenseBase(const DenseBase&); +}; + +} // end namespace Eigen + +#endif // EIGEN_DENSEBASE_H diff --git a/tools/Eigen/src/Core/DenseCoeffsBase.h b/tools/Eigen/src/Core/DenseCoeffsBase.h new file mode 100644 index 0000000..3c890f2 --- /dev/null +++ b/tools/Eigen/src/Core/DenseCoeffsBase.h @@ -0,0 +1,754 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DENSECOEFFSBASE_H +#define EIGEN_DENSECOEFFSBASE_H + +namespace Eigen { + +namespace internal { +template struct add_const_on_value_type_if_arithmetic +{ + typedef typename conditional::value, T, typename add_const_on_value_type::type>::type type; +}; +} + +/** \brief Base class providing read-only coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * \tparam #ReadOnlyAccessors Constant indicating read-only access + * + * This class defines the \c operator() \c const function and friends, which can be used to read specific + * entries of a matrix or array. + * + * \sa DenseCoeffsBase, DenseCoeffsBase, + * \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase : public EigenBase +{ + public: + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + + // Explanation for this CoeffReturnType typedef. + // - This is the return type of the coeff() method. + // - The LvalueBit means exactly that we can offer a coeffRef() method, which means exactly that we can get references + // to coeffs, which means exactly that we can have coeff() return a const reference (as opposed to returning a value). + // - The is_artihmetic check is required since "const int", "const double", etc. will cause warnings on some systems + // while the declaration of "const T", where T is a non arithmetic type does not. Always returning "const Scalar&" is + // not possible, since the underlying expressions might not offer a valid address the reference could be referring to. + typedef typename internal::conditional::Flags&LvalueBit), + const Scalar&, + typename internal::conditional::value, Scalar, const Scalar>::type + >::type CoeffReturnType; + + typedef typename internal::add_const_on_value_type_if_arithmetic< + typename internal::packet_traits::type + >::type PacketReturnType; + + typedef EigenBase Base; + using Base::rows; + using Base::cols; + using Base::size; + using Base::derived; + + EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const + { + return int(Derived::RowsAtCompileTime) == 1 ? 0 + : int(Derived::ColsAtCompileTime) == 1 ? inner + : int(Derived::Flags)&RowMajorBit ? outer + : inner; + } + + EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const + { + return int(Derived::ColsAtCompileTime) == 1 ? 0 + : int(Derived::RowsAtCompileTime) == 1 ? inner + : int(Derived::Flags)&RowMajorBit ? inner + : outer; + } + + /** Short version: don't use this function, use + * \link operator()(Index,Index) const \endlink instead. + * + * Long version: this function is similar to + * \link operator()(Index,Index) const \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameters \a row and \a col are in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator()(Index,Index) const \endlink. + * + * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const + */ + EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const + { + eigen_internal_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + return derived().coeff(row, col); + } + + EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const + { + return coeff(rowIndexByOuterInner(outer, inner), + colIndexByOuterInner(outer, inner)); + } + + /** \returns the coefficient at given the given row and column. + * + * \sa operator()(Index,Index), operator[](Index) + */ + EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const + { + eigen_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + return derived().coeff(row, col); + } + + /** Short version: don't use this function, use + * \link operator[](Index) const \endlink instead. + * + * Long version: this function is similar to + * \link operator[](Index) const \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameter \a index is in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator[](Index) const \endlink. + * + * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const + */ + + EIGEN_STRONG_INLINE CoeffReturnType + coeff(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return derived().coeff(index); + } + + + /** \returns the coefficient at given index. + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index), operator()(Index,Index) const, x() const, y() const, + * z() const, w() const + */ + + EIGEN_STRONG_INLINE CoeffReturnType + operator[](Index index) const + { + #ifndef EIGEN2_SUPPORT + EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, + THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) + #endif + eigen_assert(index >= 0 && index < size()); + return derived().coeff(index); + } + + /** \returns the coefficient at given index. + * + * This is synonymous to operator[](Index) const. + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index), operator()(Index,Index) const, x() const, y() const, + * z() const, w() const + */ + + EIGEN_STRONG_INLINE CoeffReturnType + operator()(Index index) const + { + eigen_assert(index >= 0 && index < size()); + return derived().coeff(index); + } + + /** equivalent to operator[](0). */ + + EIGEN_STRONG_INLINE CoeffReturnType + x() const { return (*this)[0]; } + + /** equivalent to operator[](1). */ + + EIGEN_STRONG_INLINE CoeffReturnType + y() const { return (*this)[1]; } + + /** equivalent to operator[](2). */ + + EIGEN_STRONG_INLINE CoeffReturnType + z() const { return (*this)[2]; } + + /** equivalent to operator[](3). */ + + EIGEN_STRONG_INLINE CoeffReturnType + w() const { return (*this)[3]; } + + /** \internal + * \returns the packet of coefficients starting at the given row and column. It is your responsibility + * to ensure that a packet really starts there. This method is only available on expressions having the + * PacketAccessBit. + * + * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select + * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets + * starting at an address which is a multiple of the packet size. + */ + + template + EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const + { + eigen_internal_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + return derived().template packet(row,col); + } + + + /** \internal */ + template + EIGEN_STRONG_INLINE PacketReturnType packetByOuterInner(Index outer, Index inner) const + { + return packet(rowIndexByOuterInner(outer, inner), + colIndexByOuterInner(outer, inner)); + } + + /** \internal + * \returns the packet of coefficients starting at the given index. It is your responsibility + * to ensure that a packet really starts there. This method is only available on expressions having the + * PacketAccessBit and the LinearAccessBit. + * + * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select + * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets + * starting at an address which is a multiple of the packet size. + */ + + template + EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return derived().template packet(index); + } + + protected: + // explanation: DenseBase is doing "using ..." on the methods from DenseCoeffsBase. + // But some methods are only available in the DirectAccess case. + // So we add dummy methods here with these names, so that "using... " doesn't fail. + // It's not private so that the child class DenseBase can access them, and it's not public + // either since it's an implementation detail, so has to be protected. + void coeffRef(); + void coeffRefByOuterInner(); + void writePacket(); + void writePacketByOuterInner(); + void copyCoeff(); + void copyCoeffByOuterInner(); + void copyPacket(); + void copyPacketByOuterInner(); + void stride(); + void innerStride(); + void outerStride(); + void rowStride(); + void colStride(); +}; + +/** \brief Base class providing read/write coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * \tparam #WriteAccessors Constant indicating read/write access + * + * This class defines the non-const \c operator() function and friends, which can be used to write specific + * entries of a matrix or array. This class inherits DenseCoeffsBase which + * defines the const variant for reading specific entries. + * + * \sa DenseCoeffsBase, \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase : public DenseCoeffsBase +{ + public: + + typedef DenseCoeffsBase Base; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + + using Base::coeff; + using Base::rows; + using Base::cols; + using Base::size; + using Base::derived; + using Base::rowIndexByOuterInner; + using Base::colIndexByOuterInner; + using Base::operator[]; + using Base::operator(); + using Base::x; + using Base::y; + using Base::z; + using Base::w; + + /** Short version: don't use this function, use + * \link operator()(Index,Index) \endlink instead. + * + * Long version: this function is similar to + * \link operator()(Index,Index) \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameters \a row and \a col are in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator()(Index,Index) \endlink. + * + * \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index) + */ + EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) + { + eigen_internal_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + return derived().coeffRef(row, col); + } + + EIGEN_STRONG_INLINE Scalar& + coeffRefByOuterInner(Index outer, Index inner) + { + return coeffRef(rowIndexByOuterInner(outer, inner), + colIndexByOuterInner(outer, inner)); + } + + /** \returns a reference to the coefficient at given the given row and column. + * + * \sa operator[](Index) + */ + + EIGEN_STRONG_INLINE Scalar& + operator()(Index row, Index col) + { + eigen_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + return derived().coeffRef(row, col); + } + + + /** Short version: don't use this function, use + * \link operator[](Index) \endlink instead. + * + * Long version: this function is similar to + * \link operator[](Index) \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameters \a row and \a col are in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator[](Index) \endlink. + * + * \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index) + */ + + EIGEN_STRONG_INLINE Scalar& + coeffRef(Index index) + { + eigen_internal_assert(index >= 0 && index < size()); + return derived().coeffRef(index); + } + + /** \returns a reference to the coefficient at given index. + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w() + */ + + EIGEN_STRONG_INLINE Scalar& + operator[](Index index) + { + #ifndef EIGEN2_SUPPORT + EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, + THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) + #endif + eigen_assert(index >= 0 && index < size()); + return derived().coeffRef(index); + } + + /** \returns a reference to the coefficient at given index. + * + * This is synonymous to operator[](Index). + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w() + */ + + EIGEN_STRONG_INLINE Scalar& + operator()(Index index) + { + eigen_assert(index >= 0 && index < size()); + return derived().coeffRef(index); + } + + /** equivalent to operator[](0). */ + + EIGEN_STRONG_INLINE Scalar& + x() { return (*this)[0]; } + + /** equivalent to operator[](1). */ + + EIGEN_STRONG_INLINE Scalar& + y() { return (*this)[1]; } + + /** equivalent to operator[](2). */ + + EIGEN_STRONG_INLINE Scalar& + z() { return (*this)[2]; } + + /** equivalent to operator[](3). */ + + EIGEN_STRONG_INLINE Scalar& + w() { return (*this)[3]; } + + /** \internal + * Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility + * to ensure that a packet really starts there. This method is only available on expressions having the + * PacketAccessBit. + * + * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select + * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets + * starting at an address which is a multiple of the packet size. + */ + + template + EIGEN_STRONG_INLINE void writePacket + (Index row, Index col, const typename internal::packet_traits::type& val) + { + eigen_internal_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + derived().template writePacket(row,col,val); + } + + + /** \internal */ + template + EIGEN_STRONG_INLINE void writePacketByOuterInner + (Index outer, Index inner, const typename internal::packet_traits::type& val) + { + writePacket(rowIndexByOuterInner(outer, inner), + colIndexByOuterInner(outer, inner), + val); + } + + /** \internal + * Stores the given packet of coefficients, at the given index in this expression. It is your responsibility + * to ensure that a packet really starts there. This method is only available on expressions having the + * PacketAccessBit and the LinearAccessBit. + * + * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select + * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets + * starting at an address which is a multiple of the packet size. + */ + template + EIGEN_STRONG_INLINE void writePacket + (Index index, const typename internal::packet_traits::type& val) + { + eigen_internal_assert(index >= 0 && index < size()); + derived().template writePacket(index,val); + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + + /** \internal Copies the coefficient at position (row,col) of other into *this. + * + * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code + * with usual assignments. + * + * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. + */ + + template + EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase& other) + { + eigen_internal_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + derived().coeffRef(row, col) = other.derived().coeff(row, col); + } + + /** \internal Copies the coefficient at the given index of other into *this. + * + * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code + * with usual assignments. + * + * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. + */ + + template + EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase& other) + { + eigen_internal_assert(index >= 0 && index < size()); + derived().coeffRef(index) = other.derived().coeff(index); + } + + + template + EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase& other) + { + const Index row = rowIndexByOuterInner(outer,inner); + const Index col = colIndexByOuterInner(outer,inner); + // derived() is important here: copyCoeff() may be reimplemented in Derived! + derived().copyCoeff(row, col, other); + } + + /** \internal Copies the packet at position (row,col) of other into *this. + * + * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code + * with usual assignments. + * + * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. + */ + + template + EIGEN_STRONG_INLINE void copyPacket(Index row, Index col, const DenseBase& other) + { + eigen_internal_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + derived().template writePacket(row, col, + other.derived().template packet(row, col)); + } + + /** \internal Copies the packet at the given index of other into *this. + * + * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code + * with usual assignments. + * + * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. + */ + + template + EIGEN_STRONG_INLINE void copyPacket(Index index, const DenseBase& other) + { + eigen_internal_assert(index >= 0 && index < size()); + derived().template writePacket(index, + other.derived().template packet(index)); + } + + /** \internal */ + template + EIGEN_STRONG_INLINE void copyPacketByOuterInner(Index outer, Index inner, const DenseBase& other) + { + const Index row = rowIndexByOuterInner(outer,inner); + const Index col = colIndexByOuterInner(outer,inner); + // derived() is important here: copyCoeff() may be reimplemented in Derived! + derived().template copyPacket< OtherDerived, StoreMode, LoadMode>(row, col, other); + } +#endif + +}; + +/** \brief Base class providing direct read-only coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * \tparam #DirectAccessors Constant indicating direct access + * + * This class defines functions to work with strides which can be used to access entries directly. This class + * inherits DenseCoeffsBase which defines functions to access entries read-only using + * \c operator() . + * + * \sa \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase : public DenseCoeffsBase +{ + public: + + typedef DenseCoeffsBase Base; + typedef typename internal::traits::Index Index; + typedef typename internal::traits::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + using Base::rows; + using Base::cols; + using Base::size; + using Base::derived; + + /** \returns the pointer increment between two consecutive elements within a slice in the inner direction. + * + * \sa outerStride(), rowStride(), colStride() + */ + inline Index innerStride() const + { + return derived().innerStride(); + } + + /** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns + * in a column-major matrix). + * + * \sa innerStride(), rowStride(), colStride() + */ + inline Index outerStride() const + { + return derived().outerStride(); + } + + // FIXME shall we remove it ? + inline Index stride() const + { + return Derived::IsVectorAtCompileTime ? innerStride() : outerStride(); + } + + /** \returns the pointer increment between two consecutive rows. + * + * \sa innerStride(), outerStride(), colStride() + */ + inline Index rowStride() const + { + return Derived::IsRowMajor ? outerStride() : innerStride(); + } + + /** \returns the pointer increment between two consecutive columns. + * + * \sa innerStride(), outerStride(), rowStride() + */ + inline Index colStride() const + { + return Derived::IsRowMajor ? innerStride() : outerStride(); + } +}; + +/** \brief Base class providing direct read/write coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * \tparam #DirectWriteAccessors Constant indicating direct access + * + * This class defines functions to work with strides which can be used to access entries directly. This class + * inherits DenseCoeffsBase which defines functions to access entries read/write using + * \c operator(). + * + * \sa \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase + : public DenseCoeffsBase +{ + public: + + typedef DenseCoeffsBase Base; + typedef typename internal::traits::Index Index; + typedef typename internal::traits::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + using Base::rows; + using Base::cols; + using Base::size; + using Base::derived; + + /** \returns the pointer increment between two consecutive elements within a slice in the inner direction. + * + * \sa outerStride(), rowStride(), colStride() + */ + inline Index innerStride() const + { + return derived().innerStride(); + } + + /** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns + * in a column-major matrix). + * + * \sa innerStride(), rowStride(), colStride() + */ + inline Index outerStride() const + { + return derived().outerStride(); + } + + // FIXME shall we remove it ? + inline Index stride() const + { + return Derived::IsVectorAtCompileTime ? innerStride() : outerStride(); + } + + /** \returns the pointer increment between two consecutive rows. + * + * \sa innerStride(), outerStride(), colStride() + */ + inline Index rowStride() const + { + return Derived::IsRowMajor ? outerStride() : innerStride(); + } + + /** \returns the pointer increment between two consecutive columns. + * + * \sa innerStride(), outerStride(), rowStride() + */ + inline Index colStride() const + { + return Derived::IsRowMajor ? innerStride() : outerStride(); + } +}; + +namespace internal { + +template +struct first_aligned_impl +{ + static inline typename Derived::Index run(const Derived&) + { return 0; } +}; + +template +struct first_aligned_impl +{ + static inline typename Derived::Index run(const Derived& m) + { + return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size()); + } +}; + +/** \internal \returns the index of the first element of the array that is well aligned for vectorization. + * + * There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more + * documentation. + */ +template +static inline typename Derived::Index first_aligned(const Derived& m) +{ + return first_aligned_impl + + ::run(m); +} + +template::ret> +struct inner_stride_at_compile_time +{ + enum { ret = traits::InnerStrideAtCompileTime }; +}; + +template +struct inner_stride_at_compile_time +{ + enum { ret = 0 }; +}; + +template::ret> +struct outer_stride_at_compile_time +{ + enum { ret = traits::OuterStrideAtCompileTime }; +}; + +template +struct outer_stride_at_compile_time +{ + enum { ret = 0 }; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_DENSECOEFFSBASE_H diff --git a/tools/Eigen/src/Core/DenseStorage.h b/tools/Eigen/src/Core/DenseStorage.h new file mode 100644 index 0000000..568493c --- /dev/null +++ b/tools/Eigen/src/Core/DenseStorage.h @@ -0,0 +1,434 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2009 Benoit Jacob +// Copyright (C) 2010 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIXSTORAGE_H +#define EIGEN_MATRIXSTORAGE_H + +#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN; +#else + #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN +#endif + +namespace Eigen { + +namespace internal { + +struct constructor_without_unaligned_array_assert {}; + +template void check_static_allocation_size() +{ + // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit + #if EIGEN_STACK_ALLOCATION_LIMIT + EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); + #endif +} + +/** \internal + * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned: + * to 16 bytes boundary if the total size is a multiple of 16 bytes. + */ +template +struct plain_array +{ + T array[Size]; + + plain_array() + { + check_static_allocation_size(); + } + + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +#if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT) + #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) +#elif EIGEN_GNUC_AT_LEAST(4,7) + // GCC 4.7 is too aggressive in its optimizations and remove the alignement test based on the fact the array is declared to be aligned. + // See this bug report: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53900 + // Hiding the origin of the array pointer behind a function argument seems to do the trick even if the function is inlined: + template + EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; } + #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ + eigen_assert((reinterpret_cast(eigen_unaligned_array_assert_workaround_gcc47(array)) & sizemask) == 0 \ + && "this assertion is explained here: " \ + "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ + " **** READ THIS WEB PAGE !!! ****"); +#else + #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ + eigen_assert((reinterpret_cast(array) & sizemask) == 0 \ + && "this assertion is explained here: " \ + "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ + " **** READ THIS WEB PAGE !!! ****"); +#endif + +template +struct plain_array +{ + EIGEN_USER_ALIGN16 T array[Size]; + + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf); + check_static_allocation_size(); + } + + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_USER_ALIGN16 T array[1]; + plain_array() {} + plain_array(constructor_without_unaligned_array_assert) {} +}; + +} // end namespace internal + +/** \internal + * + * \class DenseStorage + * \ingroup Core_Module + * + * \brief Stores the data of a matrix + * + * This class stores the data of fixed-size, dynamic-size or mixed matrices + * in a way as compact as possible. + * + * \sa Matrix + */ +template class DenseStorage; + +// purely fixed-size matrix +template class DenseStorage +{ + internal::plain_array m_data; + public: + DenseStorage() {} + DenseStorage(internal::constructor_without_unaligned_array_assert) + : m_data(internal::constructor_without_unaligned_array_assert()) {} + DenseStorage(const DenseStorage& other) : m_data(other.m_data) {} + DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) m_data = other.m_data; + return *this; + } + DenseStorage(DenseIndex,DenseIndex,DenseIndex) {} + void swap(DenseStorage& other) { std::swap(m_data,other.m_data); } + static DenseIndex rows(void) {return _Rows;} + static DenseIndex cols(void) {return _Cols;} + void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {} + void resize(DenseIndex,DenseIndex,DenseIndex) {} + const T *data() const { return m_data.array; } + T *data() { return m_data.array; } +}; + +// null matrix +template class DenseStorage +{ + public: + DenseStorage() {} + DenseStorage(internal::constructor_without_unaligned_array_assert) {} + DenseStorage(const DenseStorage&) {} + DenseStorage& operator=(const DenseStorage&) { return *this; } + DenseStorage(DenseIndex,DenseIndex,DenseIndex) {} + void swap(DenseStorage& ) {} + static DenseIndex rows(void) {return _Rows;} + static DenseIndex cols(void) {return _Cols;} + void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {} + void resize(DenseIndex,DenseIndex,DenseIndex) {} + const T *data() const { return 0; } + T *data() { return 0; } +}; + +// more specializations for null matrices; these are necessary to resolve ambiguities +template class DenseStorage +: public DenseStorage { }; + +template class DenseStorage +: public DenseStorage { }; + +template class DenseStorage +: public DenseStorage { }; + +// dynamic-size matrix with fixed-size storage +template class DenseStorage +{ + internal::plain_array m_data; + DenseIndex m_rows; + DenseIndex m_cols; + public: + DenseStorage() : m_rows(0), m_cols(0) {} + DenseStorage(internal::constructor_without_unaligned_array_assert) + : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {} + DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {} + DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + m_data = other.m_data; + m_rows = other.m_rows; + m_cols = other.m_cols; + } + return *this; + } + DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) : m_rows(nbRows), m_cols(nbCols) {} + void swap(DenseStorage& other) + { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } + DenseIndex rows() const {return m_rows;} + DenseIndex cols() const {return m_cols;} + void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; } + void resize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; } + const T *data() const { return m_data.array; } + T *data() { return m_data.array; } +}; + +// dynamic-size matrix with fixed-size storage and fixed width +template class DenseStorage +{ + internal::plain_array m_data; + DenseIndex m_rows; + public: + DenseStorage() : m_rows(0) {} + DenseStorage(internal::constructor_without_unaligned_array_assert) + : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {} + DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {} + DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + m_data = other.m_data; + m_rows = other.m_rows; + } + return *this; + } + DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex) : m_rows(nbRows) {} + void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } + DenseIndex rows(void) const {return m_rows;} + DenseIndex cols(void) const {return _Cols;} + void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; } + void resize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; } + const T *data() const { return m_data.array; } + T *data() { return m_data.array; } +}; + +// dynamic-size matrix with fixed-size storage and fixed height +template class DenseStorage +{ + internal::plain_array m_data; + DenseIndex m_cols; + public: + DenseStorage() : m_cols(0) {} + DenseStorage(internal::constructor_without_unaligned_array_assert) + : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {} + DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {} + DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + m_data = other.m_data; + m_cols = other.m_cols; + } + return *this; + } + DenseStorage(DenseIndex, DenseIndex, DenseIndex nbCols) : m_cols(nbCols) {} + void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } + DenseIndex rows(void) const {return _Rows;} + DenseIndex cols(void) const {return m_cols;} + void conservativeResize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; } + void resize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; } + const T *data() const { return m_data.array; } + T *data() { return m_data.array; } +}; + +// purely dynamic matrix. +template class DenseStorage +{ + T *m_data; + DenseIndex m_rows; + DenseIndex m_cols; + public: + DenseStorage() : m_data(0), m_rows(0), m_cols(0) {} + DenseStorage(internal::constructor_without_unaligned_array_assert) + : m_data(0), m_rows(0), m_cols(0) {} + DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols) + : m_data(internal::conditional_aligned_new_auto(size)), m_rows(nbRows), m_cols(nbCols) + { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + DenseStorage(DenseStorage&& other) + : m_data(std::move(other.m_data)) + , m_rows(std::move(other.m_rows)) + , m_cols(std::move(other.m_cols)) + { + other.m_data = nullptr; + } + DenseStorage& operator=(DenseStorage&& other) + { + using std::swap; + swap(m_data, other.m_data); + swap(m_rows, other.m_rows); + swap(m_cols, other.m_cols); + return *this; + } +#endif + ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, m_rows*m_cols); } + void swap(DenseStorage& other) + { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } + DenseIndex rows(void) const {return m_rows;} + DenseIndex cols(void) const {return m_cols;} + void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols) + { + m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows*m_cols); + m_rows = nbRows; + m_cols = nbCols; + } + void resize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols) + { + if(size != m_rows*m_cols) + { + internal::conditional_aligned_delete_auto(m_data, m_rows*m_cols); + if (size) + m_data = internal::conditional_aligned_new_auto(size); + else + m_data = 0; + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + } + m_rows = nbRows; + m_cols = nbCols; + } + const T *data() const { return m_data; } + T *data() { return m_data; } + private: + DenseStorage(const DenseStorage&); + DenseStorage& operator=(const DenseStorage&); +}; + +// matrix with dynamic width and fixed height (so that matrix has dynamic size). +template class DenseStorage +{ + T *m_data; + DenseIndex m_cols; + public: + DenseStorage() : m_data(0), m_cols(0) {} + DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {} + DenseStorage(DenseIndex size, DenseIndex, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto(size)), m_cols(nbCols) + { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + DenseStorage(DenseStorage&& other) + : m_data(std::move(other.m_data)) + , m_cols(std::move(other.m_cols)) + { + other.m_data = nullptr; + } + DenseStorage& operator=(DenseStorage&& other) + { + using std::swap; + swap(m_data, other.m_data); + swap(m_cols, other.m_cols); + return *this; + } +#endif + ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Rows*m_cols); } + void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } + static DenseIndex rows(void) {return _Rows;} + DenseIndex cols(void) const {return m_cols;} + void conservativeResize(DenseIndex size, DenseIndex, DenseIndex nbCols) + { + m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, _Rows*m_cols); + m_cols = nbCols; + } + EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex, DenseIndex nbCols) + { + if(size != _Rows*m_cols) + { + internal::conditional_aligned_delete_auto(m_data, _Rows*m_cols); + if (size) + m_data = internal::conditional_aligned_new_auto(size); + else + m_data = 0; + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + } + m_cols = nbCols; + } + const T *data() const { return m_data; } + T *data() { return m_data; } + private: + DenseStorage(const DenseStorage&); + DenseStorage& operator=(const DenseStorage&); +}; + +// matrix with dynamic height and fixed width (so that matrix has dynamic size). +template class DenseStorage +{ + T *m_data; + DenseIndex m_rows; + public: + DenseStorage() : m_data(0), m_rows(0) {} + DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {} + DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex) : m_data(internal::conditional_aligned_new_auto(size)), m_rows(nbRows) + { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + DenseStorage(DenseStorage&& other) + : m_data(std::move(other.m_data)) + , m_rows(std::move(other.m_rows)) + { + other.m_data = nullptr; + } + DenseStorage& operator=(DenseStorage&& other) + { + using std::swap; + swap(m_data, other.m_data); + swap(m_rows, other.m_rows); + return *this; + } +#endif + ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Cols*m_rows); } + void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } + DenseIndex rows(void) const {return m_rows;} + static DenseIndex cols(void) {return _Cols;} + void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex) + { + m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows*_Cols); + m_rows = nbRows; + } + EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex nbRows, DenseIndex) + { + if(size != m_rows*_Cols) + { + internal::conditional_aligned_delete_auto(m_data, _Cols*m_rows); + if (size) + m_data = internal::conditional_aligned_new_auto(size); + else + m_data = 0; + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + } + m_rows = nbRows; + } + const T *data() const { return m_data; } + T *data() { return m_data; } + private: + DenseStorage(const DenseStorage&); + DenseStorage& operator=(const DenseStorage&); +}; + +} // end namespace Eigen + +#endif // EIGEN_MATRIX_H From cf3a21dab96ade54cb0fad6341c69179e26d446b Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 14:09:58 +0200 Subject: [PATCH 14/25] add more eigen files --- tools/Eigen/src/Core/CwiseUnaryOp.h | 126 +++ tools/Eigen/src/Core/Diagonal.h | 237 +++++ tools/Eigen/src/Core/DiagonalMatrix.h | 313 +++++++ tools/Eigen/src/Core/DiagonalProduct.h | 131 +++ tools/Eigen/src/Core/Dot.h | 263 ++++++ tools/Eigen/src/Core/EigenBase.h | 131 +++ tools/Eigen/src/Core/Flagged.h | 140 +++ tools/Eigen/src/Core/ForceAlignedAccess.h | 146 +++ tools/Eigen/src/Core/Functors.h | 1026 +++++++++++++++++++++ tools/Eigen/src/Core/Fuzzy.h | 150 +++ 10 files changed, 2663 insertions(+) create mode 100644 tools/Eigen/src/Core/CwiseUnaryOp.h create mode 100644 tools/Eigen/src/Core/Diagonal.h create mode 100644 tools/Eigen/src/Core/DiagonalMatrix.h create mode 100644 tools/Eigen/src/Core/DiagonalProduct.h create mode 100644 tools/Eigen/src/Core/Dot.h create mode 100644 tools/Eigen/src/Core/EigenBase.h create mode 100644 tools/Eigen/src/Core/Flagged.h create mode 100644 tools/Eigen/src/Core/ForceAlignedAccess.h create mode 100644 tools/Eigen/src/Core/Functors.h create mode 100644 tools/Eigen/src/Core/Fuzzy.h diff --git a/tools/Eigen/src/Core/CwiseUnaryOp.h b/tools/Eigen/src/Core/CwiseUnaryOp.h new file mode 100644 index 0000000..f7ee60e --- /dev/null +++ b/tools/Eigen/src/Core/CwiseUnaryOp.h @@ -0,0 +1,126 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_UNARY_OP_H +#define EIGEN_CWISE_UNARY_OP_H + +namespace Eigen { + +/** \class CwiseUnaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise unary operator is applied to an expression + * + * \param UnaryOp template functor implementing the operator + * \param XprType the type of the expression to which we are applying the unary operator + * + * This class represents an expression where a unary operator is applied to an expression. + * It is the return type of all operations taking exactly 1 input expression, regardless of the + * presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix + * is considered unary, because only the right-hand side is an expression, and its + * return type is a specialization of CwiseUnaryOp. + * + * Most of the time, this is the only way that it is used, so you typically don't have to name + * CwiseUnaryOp types explicitly. + * + * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp + */ + +namespace internal { +template +struct traits > + : traits +{ + typedef typename result_of< + UnaryOp(typename XprType::Scalar) + >::type Scalar; + typedef typename XprType::Nested XprTypeNested; + typedef typename remove_reference::type _XprTypeNested; + enum { + Flags = _XprTypeNested::Flags & ( + HereditaryBits | LinearAccessBit | AlignedBit + | (functor_traits::PacketAccess ? PacketAccessBit : 0)), + CoeffReadCost = EIGEN_ADD_COST(_XprTypeNested::CoeffReadCost, functor_traits::Cost) + }; +}; +} + +template +class CwiseUnaryOpImpl; + +template +class CwiseUnaryOp : internal::no_assignment_operator, + public CwiseUnaryOpImpl::StorageKind> +{ + public: + + typedef typename CwiseUnaryOpImpl::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp) + + inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + : m_xpr(xpr), m_functor(func) {} + + EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); } + EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); } + + /** \returns the functor representing the unary operation */ + const UnaryOp& functor() const { return m_functor; } + + /** \returns the nested expression */ + const typename internal::remove_all::type& + nestedExpression() const { return m_xpr; } + + /** \returns the nested expression */ + typename internal::remove_all::type& + nestedExpression() { return m_xpr.const_cast_derived(); } + + protected: + typename XprType::Nested m_xpr; + const UnaryOp m_functor; +}; + +// This is the generic implementation for dense storage. +// It can be used for any expression types implementing the dense concept. +template +class CwiseUnaryOpImpl + : public internal::dense_xpr_base >::type +{ + public: + + typedef CwiseUnaryOp Derived; + typedef typename internal::dense_xpr_base >::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + + EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const + { + return derived().functor()(derived().nestedExpression().coeff(rowId, colId)); + } + + template + EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const + { + return derived().functor().packetOp(derived().nestedExpression().template packet(rowId, colId)); + } + + EIGEN_STRONG_INLINE const Scalar coeff(Index index) const + { + return derived().functor()(derived().nestedExpression().coeff(index)); + } + + template + EIGEN_STRONG_INLINE PacketScalar packet(Index index) const + { + return derived().functor().packetOp(derived().nestedExpression().template packet(index)); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CWISE_UNARY_OP_H diff --git a/tools/Eigen/src/Core/Diagonal.h b/tools/Eigen/src/Core/Diagonal.h new file mode 100644 index 0000000..68cf6d4 --- /dev/null +++ b/tools/Eigen/src/Core/Diagonal.h @@ -0,0 +1,237 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2009 Benoit Jacob +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DIAGONAL_H +#define EIGEN_DIAGONAL_H + +namespace Eigen { + +/** \class Diagonal + * \ingroup Core_Module + * + * \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix + * + * \param MatrixType the type of the object in which we are taking a sub/main/super diagonal + * \param DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal. + * A positive value means a superdiagonal, a negative value means a subdiagonal. + * You can also use Dynamic so the index can be set at runtime. + * + * The matrix is not required to be square. + * + * This class represents an expression of the main diagonal, or any sub/super diagonal + * of a square matrix. It is the return type of MatrixBase::diagonal() and MatrixBase::diagonal(Index) and most of the + * time this is the only way it is used. + * + * \sa MatrixBase::diagonal(), MatrixBase::diagonal(Index) + */ + +namespace internal { +template +struct traits > + : traits +{ + typedef typename nested::type MatrixTypeNested; + typedef typename remove_reference::type _MatrixTypeNested; + typedef typename MatrixType::StorageKind StorageKind; + enum { + RowsAtCompileTime = (int(DiagIndex) == DynamicIndex || int(MatrixType::SizeAtCompileTime) == Dynamic) ? Dynamic + : (EIGEN_PLAIN_ENUM_MIN(MatrixType::RowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0), + MatrixType::ColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))), + ColsAtCompileTime = 1, + MaxRowsAtCompileTime = int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic + : DiagIndex == DynamicIndex ? EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime, + MatrixType::MaxColsAtCompileTime) + : (EIGEN_PLAIN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0), + MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))), + MaxColsAtCompileTime = 1, + MaskLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, + CoeffReadCost = _MatrixTypeNested::CoeffReadCost, + MatrixTypeOuterStride = outer_stride_at_compile_time::ret, + InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1, + OuterStrideAtCompileTime = 0 + }; +}; +} + +template class Diagonal + : public internal::dense_xpr_base< Diagonal >::type +{ + public: + + enum { DiagIndex = _DiagIndex }; + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) + + inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) + + inline Index rows() const + { return m_index.value()<0 ? (std::min)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min)(m_matrix.rows(),m_matrix.cols()-m_index.value()); } + + inline Index cols() const { return 1; } + + inline Index innerStride() const + { + return m_matrix.outerStride() + 1; + } + + inline Index outerStride() const + { + return 0; + } + + typedef typename internal::conditional< + internal::is_lvalue::value, + Scalar, + const Scalar + >::type ScalarWithConstIfNotLvalue; + + inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); } + inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); } + + inline Scalar& coeffRef(Index row, Index) + { + EIGEN_STATIC_ASSERT_LVALUE(MatrixType) + return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset()); + } + + inline const Scalar& coeffRef(Index row, Index) const + { + return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset()); + } + + inline CoeffReturnType coeff(Index row, Index) const + { + return m_matrix.coeff(row+rowOffset(), row+colOffset()); + } + + inline Scalar& coeffRef(Index idx) + { + EIGEN_STATIC_ASSERT_LVALUE(MatrixType) + return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset()); + } + + inline const Scalar& coeffRef(Index idx) const + { + return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset()); + } + + inline CoeffReturnType coeff(Index idx) const + { + return m_matrix.coeff(idx+rowOffset(), idx+colOffset()); + } + + const typename internal::remove_all::type& + nestedExpression() const + { + return m_matrix; + } + + int index() const + { + return m_index.value(); + } + + protected: + typename MatrixType::Nested m_matrix; + const internal::variable_if_dynamicindex m_index; + + private: + // some compilers may fail to optimize std::max etc in case of compile-time constants... + EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); } + EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); } + EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; } + // triger a compile time error is someone try to call packet + template typename MatrixType::PacketReturnType packet(Index) const; + template typename MatrixType::PacketReturnType packet(Index,Index) const; +}; + +/** \returns an expression of the main diagonal of the matrix \c *this + * + * \c *this is not required to be square. + * + * Example: \include MatrixBase_diagonal.cpp + * Output: \verbinclude MatrixBase_diagonal.out + * + * \sa class Diagonal */ +template +inline typename MatrixBase::DiagonalReturnType +MatrixBase::diagonal() +{ + return derived(); +} + +/** This is the const version of diagonal(). */ +template +inline typename MatrixBase::ConstDiagonalReturnType +MatrixBase::diagonal() const +{ + return ConstDiagonalReturnType(derived()); +} + +/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this + * + * \c *this is not required to be square. + * + * The template parameter \a DiagIndex represent a super diagonal if \a DiagIndex > 0 + * and a sub diagonal otherwise. \a DiagIndex == 0 is equivalent to the main diagonal. + * + * Example: \include MatrixBase_diagonal_int.cpp + * Output: \verbinclude MatrixBase_diagonal_int.out + * + * \sa MatrixBase::diagonal(), class Diagonal */ +template +inline typename MatrixBase::DiagonalDynamicIndexReturnType +MatrixBase::diagonal(Index index) +{ + return DiagonalDynamicIndexReturnType(derived(), index); +} + +/** This is the const version of diagonal(Index). */ +template +inline typename MatrixBase::ConstDiagonalDynamicIndexReturnType +MatrixBase::diagonal(Index index) const +{ + return ConstDiagonalDynamicIndexReturnType(derived(), index); +} + +/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this + * + * \c *this is not required to be square. + * + * The template parameter \a DiagIndex represent a super diagonal if \a DiagIndex > 0 + * and a sub diagonal otherwise. \a DiagIndex == 0 is equivalent to the main diagonal. + * + * Example: \include MatrixBase_diagonal_template_int.cpp + * Output: \verbinclude MatrixBase_diagonal_template_int.out + * + * \sa MatrixBase::diagonal(), class Diagonal */ +template +template +inline typename MatrixBase::template DiagonalIndexReturnType::Type +MatrixBase::diagonal() +{ + return derived(); +} + +/** This is the const version of diagonal(). */ +template +template +inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type +MatrixBase::diagonal() const +{ + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_DIAGONAL_H diff --git a/tools/Eigen/src/Core/DiagonalMatrix.h b/tools/Eigen/src/Core/DiagonalMatrix.h new file mode 100644 index 0000000..e6c220f --- /dev/null +++ b/tools/Eigen/src/Core/DiagonalMatrix.h @@ -0,0 +1,313 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// Copyright (C) 2007-2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DIAGONALMATRIX_H +#define EIGEN_DIAGONALMATRIX_H + +namespace Eigen { + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +class DiagonalBase : public EigenBase +{ + public: + typedef typename internal::traits::DiagonalVectorType DiagonalVectorType; + typedef typename DiagonalVectorType::Scalar Scalar; + typedef typename DiagonalVectorType::RealScalar RealScalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + + enum { + RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + IsVectorAtCompileTime = 0, + Flags = 0 + }; + + typedef Matrix DenseMatrixType; + typedef DenseMatrixType DenseType; + typedef DiagonalMatrix PlainObject; + + inline const Derived& derived() const { return *static_cast(this); } + inline Derived& derived() { return *static_cast(this); } + + DenseMatrixType toDenseMatrix() const { return derived(); } + template + void evalTo(MatrixBase &other) const; + template + void addTo(MatrixBase &other) const + { other.diagonal() += diagonal(); } + template + void subTo(MatrixBase &other) const + { other.diagonal() -= diagonal(); } + + inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); } + inline DiagonalVectorType& diagonal() { return derived().diagonal(); } + + inline Index rows() const { return diagonal().size(); } + inline Index cols() const { return diagonal().size(); } + + /** \returns the diagonal matrix product of \c *this by the matrix \a matrix. + */ + template + const DiagonalProduct + operator*(const MatrixBase &matrix) const + { + return DiagonalProduct(matrix.derived(), derived()); + } + + inline const DiagonalWrapper, const DiagonalVectorType> > + inverse() const + { + return diagonal().cwiseInverse(); + } + + inline const DiagonalWrapper, const DiagonalVectorType> > + operator*(const Scalar& scalar) const + { + return diagonal() * scalar; + } + friend inline const DiagonalWrapper, const DiagonalVectorType> > + operator*(const Scalar& scalar, const DiagonalBase& other) + { + return other.diagonal() * scalar; + } + + #ifdef EIGEN2_SUPPORT + template + bool isApprox(const DiagonalBase& other, typename NumTraits::Real precision = NumTraits::dummy_precision()) const + { + return diagonal().isApprox(other.diagonal(), precision); + } + template + bool isApprox(const MatrixBase& other, typename NumTraits::Real precision = NumTraits::dummy_precision()) const + { + return toDenseMatrix().isApprox(other, precision); + } + #endif +}; + +template +template +void DiagonalBase::evalTo(MatrixBase &other) const +{ + other.setZero(); + other.diagonal() = diagonal(); +} +#endif + +/** \class DiagonalMatrix + * \ingroup Core_Module + * + * \brief Represents a diagonal matrix with its storage + * + * \param _Scalar the type of coefficients + * \param SizeAtCompileTime the dimension of the matrix, or Dynamic + * \param MaxSizeAtCompileTime the dimension of the matrix, or Dynamic. This parameter is optional and defaults + * to SizeAtCompileTime. Most of the time, you do not need to specify it. + * + * \sa class DiagonalWrapper + */ + +namespace internal { +template +struct traits > + : traits > +{ + typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType; + typedef Dense StorageKind; + typedef DenseIndex Index; + enum { + Flags = LvalueBit + }; +}; +} +template +class DiagonalMatrix + : public DiagonalBase > +{ + public: + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename internal::traits::DiagonalVectorType DiagonalVectorType; + typedef const DiagonalMatrix& Nested; + typedef _Scalar Scalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + #endif + + protected: + + DiagonalVectorType m_diagonal; + + public: + + /** const version of diagonal(). */ + inline const DiagonalVectorType& diagonal() const { return m_diagonal; } + /** \returns a reference to the stored vector of diagonal coefficients. */ + inline DiagonalVectorType& diagonal() { return m_diagonal; } + + /** Default constructor without initialization */ + inline DiagonalMatrix() {} + + /** Constructs a diagonal matrix with given dimension */ + inline DiagonalMatrix(Index dim) : m_diagonal(dim) {} + + /** 2D constructor. */ + inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {} + + /** 3D constructor. */ + inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {} + + /** Copy constructor. */ + template + inline DiagonalMatrix(const DiagonalBase& other) : m_diagonal(other.diagonal()) {} + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** copy constructor. prevent a default copy constructor from hiding the other templated constructor */ + inline DiagonalMatrix(const DiagonalMatrix& other) : m_diagonal(other.diagonal()) {} + #endif + + /** generic constructor from expression of the diagonal coefficients */ + template + explicit inline DiagonalMatrix(const MatrixBase& other) : m_diagonal(other) + {} + + /** Copy operator. */ + template + DiagonalMatrix& operator=(const DiagonalBase& other) + { + m_diagonal = other.diagonal(); + return *this; + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + DiagonalMatrix& operator=(const DiagonalMatrix& other) + { + m_diagonal = other.diagonal(); + return *this; + } + #endif + + /** Resizes to given size. */ + inline void resize(Index size) { m_diagonal.resize(size); } + /** Sets all coefficients to zero. */ + inline void setZero() { m_diagonal.setZero(); } + /** Resizes and sets all coefficients to zero. */ + inline void setZero(Index size) { m_diagonal.setZero(size); } + /** Sets this matrix to be the identity matrix of the current size. */ + inline void setIdentity() { m_diagonal.setOnes(); } + /** Sets this matrix to be the identity matrix of the given size. */ + inline void setIdentity(Index size) { m_diagonal.setOnes(size); } +}; + +/** \class DiagonalWrapper + * \ingroup Core_Module + * + * \brief Expression of a diagonal matrix + * + * \param _DiagonalVectorType the type of the vector of diagonal coefficients + * + * This class is an expression of a diagonal matrix, but not storing its own vector of diagonal coefficients, + * instead wrapping an existing vector expression. It is the return type of MatrixBase::asDiagonal() + * and most of the time this is the only way that it is used. + * + * \sa class DiagonalMatrix, class DiagonalBase, MatrixBase::asDiagonal() + */ + +namespace internal { +template +struct traits > +{ + typedef _DiagonalVectorType DiagonalVectorType; + typedef typename DiagonalVectorType::Scalar Scalar; + typedef typename DiagonalVectorType::Index Index; + typedef typename DiagonalVectorType::StorageKind StorageKind; + enum { + RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + Flags = traits::Flags & LvalueBit + }; +}; +} + +template +class DiagonalWrapper + : public DiagonalBase >, internal::no_assignment_operator +{ + public: + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef _DiagonalVectorType DiagonalVectorType; + typedef DiagonalWrapper Nested; + #endif + + /** Constructor from expression of diagonal coefficients to wrap. */ + inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {} + + /** \returns a const reference to the wrapped expression of diagonal coefficients. */ + const DiagonalVectorType& diagonal() const { return m_diagonal; } + + protected: + typename DiagonalVectorType::Nested m_diagonal; +}; + +/** \returns a pseudo-expression of a diagonal matrix with *this as vector of diagonal coefficients + * + * \only_for_vectors + * + * Example: \include MatrixBase_asDiagonal.cpp + * Output: \verbinclude MatrixBase_asDiagonal.out + * + * \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal() + **/ +template +inline const DiagonalWrapper +MatrixBase::asDiagonal() const +{ + return derived(); +} + +/** \returns true if *this is approximately equal to a diagonal matrix, + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isDiagonal.cpp + * Output: \verbinclude MatrixBase_isDiagonal.out + * + * \sa asDiagonal() + */ +template +bool MatrixBase::isDiagonal(const RealScalar& prec) const +{ + using std::abs; + if(cols() != rows()) return false; + RealScalar maxAbsOnDiagonal = static_cast(-1); + for(Index j = 0; j < cols(); ++j) + { + RealScalar absOnDiagonal = abs(coeff(j,j)); + if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal; + } + for(Index j = 0; j < cols(); ++j) + for(Index i = 0; i < j; ++i) + { + if(!internal::isMuchSmallerThan(coeff(i, j), maxAbsOnDiagonal, prec)) return false; + if(!internal::isMuchSmallerThan(coeff(j, i), maxAbsOnDiagonal, prec)) return false; + } + return true; +} + +} // end namespace Eigen + +#endif // EIGEN_DIAGONALMATRIX_H diff --git a/tools/Eigen/src/Core/DiagonalProduct.h b/tools/Eigen/src/Core/DiagonalProduct.h new file mode 100644 index 0000000..cc6b536 --- /dev/null +++ b/tools/Eigen/src/Core/DiagonalProduct.h @@ -0,0 +1,131 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2007-2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DIAGONALPRODUCT_H +#define EIGEN_DIAGONALPRODUCT_H + +namespace Eigen { + +namespace internal { +template +struct traits > + : traits +{ + typedef typename scalar_product_traits::ReturnType Scalar; + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, + + _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor, + _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) + ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), + _SameTypes = is_same::value, + // FIXME currently we need same types, but in the future the next rule should be the one + //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), + _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), + _LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0, + + Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0),//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit), + Cost0 = EIGEN_ADD_COST(NumTraits::MulCost, MatrixType::CoeffReadCost), + CoeffReadCost = EIGEN_ADD_COST(Cost0,DiagonalType::DiagonalVectorType::CoeffReadCost) + }; +}; +} + +template +class DiagonalProduct : internal::no_assignment_operator, + public MatrixBase > +{ + public: + + typedef MatrixBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(DiagonalProduct) + + inline DiagonalProduct(const MatrixType& matrix, const DiagonalType& diagonal) + : m_matrix(matrix), m_diagonal(diagonal) + { + eigen_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols())); + } + + EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); } + EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); } + + EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + return m_diagonal.diagonal().coeff(ProductOrder == OnTheLeft ? row : col) * m_matrix.coeff(row, col); + } + + EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const + { + enum { + StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor + }; + return coeff(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } + + template + EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const + { + enum { + StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor + }; + const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col; + return packet_impl(row,col,indexInDiagonalVector,typename internal::conditional< + ((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft) + ||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), internal::true_type, internal::false_type>::type()); + } + + template + EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const + { + enum { + StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor + }; + return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } + + protected: + template + EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const + { + return internal::pmul(m_matrix.template packet(row, col), + internal::pset1(m_diagonal.diagonal().coeff(id))); + } + + template + EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const + { + enum { + InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, + DiagonalVectorPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) + }; + return internal::pmul(m_matrix.template packet(row, col), + m_diagonal.diagonal().template packet(id)); + } + + typename MatrixType::Nested m_matrix; + typename DiagonalType::Nested m_diagonal; +}; + +/** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal. + */ +template +template +inline const DiagonalProduct +MatrixBase::operator*(const DiagonalBase &a_diagonal) const +{ + return DiagonalProduct(derived(), a_diagonal.derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_DIAGONALPRODUCT_H diff --git a/tools/Eigen/src/Core/Dot.h b/tools/Eigen/src/Core/Dot.h new file mode 100644 index 0000000..9d7651f --- /dev/null +++ b/tools/Eigen/src/Core/Dot.h @@ -0,0 +1,263 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008, 2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DOT_H +#define EIGEN_DOT_H + +namespace Eigen { + +namespace internal { + +// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot +// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE +// looking at the static assertions. Thus this is a trick to get better compile errors. +template +struct dot_nocheck +{ + typedef typename scalar_product_traits::Scalar,typename traits::Scalar>::ReturnType ResScalar; + static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) + { + return a.template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); + } +}; + +template +struct dot_nocheck +{ + typedef typename scalar_product_traits::Scalar,typename traits::Scalar>::ReturnType ResScalar; + static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) + { + return a.transpose().template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); + } +}; + +} // end namespace internal + +/** \returns the dot product of *this with other. + * + * \only_for_vectors + * + * \note If the scalar type is complex numbers, then this function returns the hermitian + * (sesquilinear) dot product, conjugate-linear in the first variable and linear in the + * second variable. + * + * \sa squaredNorm(), norm() + */ +template +template +typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType +MatrixBase::dot(const MatrixBase& other) const +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) + typedef internal::scalar_conj_product_op func; + EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar); + + eigen_assert(size() == other.size()); + + return internal::dot_nocheck::run(*this, other); +} + +#ifdef EIGEN2_SUPPORT +/** \returns the dot product of *this with other, with the Eigen2 convention that the dot product is linear in the first variable + * (conjugating the second variable). Of course this only makes a difference in the complex case. + * + * This method is only available in EIGEN2_SUPPORT mode. + * + * \only_for_vectors + * + * \sa dot() + */ +template +template +typename internal::traits::Scalar +MatrixBase::eigen2_dot(const MatrixBase& other) const +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) + EIGEN_STATIC_ASSERT((internal::is_same::value), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + + eigen_assert(size() == other.size()); + + return internal::dot_nocheck::run(other,*this); +} +#endif + + +//---------- implementation of L2 norm and related functions ---------- + +/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm. + * In both cases, it consists in the sum of the square of all the matrix entries. + * For vectors, this is also equals to the dot product of \c *this with itself. + * + * \sa dot(), norm() + */ +template +EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real MatrixBase::squaredNorm() const +{ + return numext::real((*this).cwiseAbs2().sum()); +} + +/** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm. + * In both cases, it consists in the square root of the sum of the square of all the matrix entries. + * For vectors, this is also equals to the square root of the dot product of \c *this with itself. + * + * \sa dot(), squaredNorm() + */ +template +inline typename NumTraits::Scalar>::Real MatrixBase::norm() const +{ + using std::sqrt; + return sqrt(squaredNorm()); +} + +/** \returns an expression of the quotient of *this by its own norm. + * + * \only_for_vectors + * + * \sa norm(), normalize() + */ +template +inline const typename MatrixBase::PlainObject +MatrixBase::normalized() const +{ + typedef typename internal::nested::type Nested; + typedef typename internal::remove_reference::type _Nested; + _Nested n(derived()); + return n / n.norm(); +} + +/** Normalizes the vector, i.e. divides it by its own norm. + * + * \only_for_vectors + * + * \sa norm(), normalized() + */ +template +inline void MatrixBase::normalize() +{ + *this /= norm(); +} + +//---------- implementation of other norms ---------- + +namespace internal { + +template +struct lpNorm_selector +{ + typedef typename NumTraits::Scalar>::Real RealScalar; + static inline RealScalar run(const MatrixBase& m) + { + using std::pow; + return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p); + } +}; + +template +struct lpNorm_selector +{ + static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) + { + return m.cwiseAbs().sum(); + } +}; + +template +struct lpNorm_selector +{ + static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) + { + return m.norm(); + } +}; + +template +struct lpNorm_selector +{ + static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) + { + return m.cwiseAbs().maxCoeff(); + } +}; + +} // end namespace internal + +/** \returns the \f$ \ell^p \f$ norm of *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values + * of the coefficients of *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$ + * norm, that is the maximum of the absolute values of the coefficients of *this. + * + * \sa norm() + */ +template +template +inline typename NumTraits::Scalar>::Real +MatrixBase::lpNorm() const +{ + return internal::lpNorm_selector::run(*this); +} + +//---------- implementation of isOrthogonal / isUnitary ---------- + +/** \returns true if *this is approximately orthogonal to \a other, + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isOrthogonal.cpp + * Output: \verbinclude MatrixBase_isOrthogonal.out + */ +template +template +bool MatrixBase::isOrthogonal +(const MatrixBase& other, const RealScalar& prec) const +{ + typename internal::nested::type nested(derived()); + typename internal::nested::type otherNested(other.derived()); + return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm(); +} + +/** \returns true if *this is approximately an unitary matrix, + * within the precision given by \a prec. In the case where the \a Scalar + * type is real numbers, a unitary matrix is an orthogonal matrix, whence the name. + * + * \note This can be used to check whether a family of vectors forms an orthonormal basis. + * Indeed, \c m.isUnitary() returns true if and only if the columns (equivalently, the rows) of m form an + * orthonormal basis. + * + * Example: \include MatrixBase_isUnitary.cpp + * Output: \verbinclude MatrixBase_isUnitary.out + */ +template +bool MatrixBase::isUnitary(const RealScalar& prec) const +{ + typename Derived::Nested nested(derived()); + for(Index i = 0; i < cols(); ++i) + { + if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast(1), prec)) + return false; + for(Index j = 0; j < i; ++j) + if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast(1), prec)) + return false; + } + return true; +} + +} // end namespace Eigen + +#endif // EIGEN_DOT_H diff --git a/tools/Eigen/src/Core/EigenBase.h b/tools/Eigen/src/Core/EigenBase.h new file mode 100644 index 0000000..fadb458 --- /dev/null +++ b/tools/Eigen/src/Core/EigenBase.h @@ -0,0 +1,131 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Benoit Jacob +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EIGENBASE_H +#define EIGEN_EIGENBASE_H + +namespace Eigen { + +/** Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). + * + * In other words, an EigenBase object is an object that can be copied into a MatrixBase. + * + * Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc. + * + * Notice that this class is trivial, it is only used to disambiguate overloaded functions. + * + * \sa \ref TopicClassHierarchy + */ +template struct EigenBase +{ +// typedef typename internal::plain_matrix_type::type PlainObject; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + + /** \returns a reference to the derived object */ + Derived& derived() { return *static_cast(this); } + /** \returns a const reference to the derived object */ + const Derived& derived() const { return *static_cast(this); } + + inline Derived& const_cast_derived() const + { return *static_cast(const_cast(this)); } + inline const Derived& const_derived() const + { return *static_cast(this); } + + /** \returns the number of rows. \sa cols(), RowsAtCompileTime */ + inline Index rows() const { return derived().rows(); } + /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/ + inline Index cols() const { return derived().cols(); } + /** \returns the number of coefficients, which is rows()*cols(). + * \sa rows(), cols(), SizeAtCompileTime. */ + inline Index size() const { return rows() * cols(); } + + /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */ + template inline void evalTo(Dest& dst) const + { derived().evalTo(dst); } + + /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */ + template inline void addTo(Dest& dst) const + { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + typename Dest::PlainObject res(rows(),cols()); + evalTo(res); + dst += res; + } + + /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */ + template inline void subTo(Dest& dst) const + { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + typename Dest::PlainObject res(rows(),cols()); + evalTo(res); + dst -= res; + } + + /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */ + template inline void applyThisOnTheRight(Dest& dst) const + { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + dst = dst * this->derived(); + } + + /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */ + template inline void applyThisOnTheLeft(Dest& dst) const + { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + dst = this->derived() * dst; + } + +}; + +/*************************************************************************** +* Implementation of matrix base methods +***************************************************************************/ + +/** \brief Copies the generic expression \a other into *this. + * + * \details The expression must provide a (templated) evalTo(Derived& dst) const + * function which does the actual job. In practice, this allows any user to write + * its own special matrix without having to modify MatrixBase + * + * \returns a reference to *this. + */ +template +template +Derived& DenseBase::operator=(const EigenBase &other) +{ + other.derived().evalTo(derived()); + return derived(); +} + +template +template +Derived& DenseBase::operator+=(const EigenBase &other) +{ + other.derived().addTo(derived()); + return derived(); +} + +template +template +Derived& DenseBase::operator-=(const EigenBase &other) +{ + other.derived().subTo(derived()); + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_EIGENBASE_H diff --git a/tools/Eigen/src/Core/Flagged.h b/tools/Eigen/src/Core/Flagged.h new file mode 100644 index 0000000..1f2955f --- /dev/null +++ b/tools/Eigen/src/Core/Flagged.h @@ -0,0 +1,140 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FLAGGED_H +#define EIGEN_FLAGGED_H + +namespace Eigen { + +/** \class Flagged + * \ingroup Core_Module + * + * \brief Expression with modified flags + * + * \param ExpressionType the type of the object of which we are modifying the flags + * \param Added the flags added to the expression + * \param Removed the flags removed from the expression (has priority over Added). + * + * This class represents an expression whose flags have been modified. + * It is the return type of MatrixBase::flagged() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::flagged() + */ + +namespace internal { +template +struct traits > : traits +{ + enum { Flags = (ExpressionType::Flags | Added) & ~Removed }; +}; +} + +template class Flagged + : public MatrixBase > +{ + public: + + typedef MatrixBase Base; + + EIGEN_DENSE_PUBLIC_INTERFACE(Flagged) + typedef typename internal::conditional::ret, + ExpressionType, const ExpressionType&>::type ExpressionTypeNested; + typedef typename ExpressionType::InnerIterator InnerIterator; + + inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {} + + inline Index rows() const { return m_matrix.rows(); } + inline Index cols() const { return m_matrix.cols(); } + inline Index outerStride() const { return m_matrix.outerStride(); } + inline Index innerStride() const { return m_matrix.innerStride(); } + + inline CoeffReturnType coeff(Index row, Index col) const + { + return m_matrix.coeff(row, col); + } + + inline CoeffReturnType coeff(Index index) const + { + return m_matrix.coeff(index); + } + + inline const Scalar& coeffRef(Index row, Index col) const + { + return m_matrix.const_cast_derived().coeffRef(row, col); + } + + inline const Scalar& coeffRef(Index index) const + { + return m_matrix.const_cast_derived().coeffRef(index); + } + + inline Scalar& coeffRef(Index row, Index col) + { + return m_matrix.const_cast_derived().coeffRef(row, col); + } + + inline Scalar& coeffRef(Index index) + { + return m_matrix.const_cast_derived().coeffRef(index); + } + + template + inline const PacketScalar packet(Index row, Index col) const + { + return m_matrix.template packet(row, col); + } + + template + inline void writePacket(Index row, Index col, const PacketScalar& x) + { + m_matrix.const_cast_derived().template writePacket(row, col, x); + } + + template + inline const PacketScalar packet(Index index) const + { + return m_matrix.template packet(index); + } + + template + inline void writePacket(Index index, const PacketScalar& x) + { + m_matrix.const_cast_derived().template writePacket(index, x); + } + + const ExpressionType& _expression() const { return m_matrix; } + + template + typename ExpressionType::PlainObject solveTriangular(const MatrixBase& other) const; + + template + void solveTriangularInPlace(const MatrixBase& other) const; + + protected: + ExpressionTypeNested m_matrix; +}; + +/** \returns an expression of *this with added and removed flags + * + * This is mostly for internal use. + * + * \sa class Flagged + */ +template +template +inline const Flagged +DenseBase::flagged() const +{ + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_FLAGGED_H diff --git a/tools/Eigen/src/Core/ForceAlignedAccess.h b/tools/Eigen/src/Core/ForceAlignedAccess.h new file mode 100644 index 0000000..807c7a2 --- /dev/null +++ b/tools/Eigen/src/Core/ForceAlignedAccess.h @@ -0,0 +1,146 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FORCEALIGNEDACCESS_H +#define EIGEN_FORCEALIGNEDACCESS_H + +namespace Eigen { + +/** \class ForceAlignedAccess + * \ingroup Core_Module + * + * \brief Enforce aligned packet loads and stores regardless of what is requested + * + * \param ExpressionType the type of the object of which we are forcing aligned packet access + * + * This class is the return type of MatrixBase::forceAlignedAccess() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::forceAlignedAccess() + */ + +namespace internal { +template +struct traits > : public traits +{}; +} + +template class ForceAlignedAccess + : public internal::dense_xpr_base< ForceAlignedAccess >::type +{ + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess) + + inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} + + inline Index rows() const { return m_expression.rows(); } + inline Index cols() const { return m_expression.cols(); } + inline Index outerStride() const { return m_expression.outerStride(); } + inline Index innerStride() const { return m_expression.innerStride(); } + + inline const CoeffReturnType coeff(Index row, Index col) const + { + return m_expression.coeff(row, col); + } + + inline Scalar& coeffRef(Index row, Index col) + { + return m_expression.const_cast_derived().coeffRef(row, col); + } + + inline const CoeffReturnType coeff(Index index) const + { + return m_expression.coeff(index); + } + + inline Scalar& coeffRef(Index index) + { + return m_expression.const_cast_derived().coeffRef(index); + } + + template + inline const PacketScalar packet(Index row, Index col) const + { + return m_expression.template packet(row, col); + } + + template + inline void writePacket(Index row, Index col, const PacketScalar& x) + { + m_expression.const_cast_derived().template writePacket(row, col, x); + } + + template + inline const PacketScalar packet(Index index) const + { + return m_expression.template packet(index); + } + + template + inline void writePacket(Index index, const PacketScalar& x) + { + m_expression.const_cast_derived().template writePacket(index, x); + } + + operator const ExpressionType&() const { return m_expression; } + + protected: + const ExpressionType& m_expression; + + private: + ForceAlignedAccess& operator=(const ForceAlignedAccess&); +}; + +/** \returns an expression of *this with forced aligned access + * \sa forceAlignedAccessIf(),class ForceAlignedAccess + */ +template +inline const ForceAlignedAccess +MatrixBase::forceAlignedAccess() const +{ + return ForceAlignedAccess(derived()); +} + +/** \returns an expression of *this with forced aligned access + * \sa forceAlignedAccessIf(), class ForceAlignedAccess + */ +template +inline ForceAlignedAccess +MatrixBase::forceAlignedAccess() +{ + return ForceAlignedAccess(derived()); +} + +/** \returns an expression of *this with forced aligned access if \a Enable is true. + * \sa forceAlignedAccess(), class ForceAlignedAccess + */ +template +template +inline typename internal::add_const_on_value_type,Derived&>::type>::type +MatrixBase::forceAlignedAccessIf() const +{ + return derived(); +} + +/** \returns an expression of *this with forced aligned access if \a Enable is true. + * \sa forceAlignedAccess(), class ForceAlignedAccess + */ +template +template +inline typename internal::conditional,Derived&>::type +MatrixBase::forceAlignedAccessIf() +{ + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_FORCEALIGNEDACCESS_H diff --git a/tools/Eigen/src/Core/Functors.h b/tools/Eigen/src/Core/Functors.h new file mode 100644 index 0000000..5f14c65 --- /dev/null +++ b/tools/Eigen/src/Core/Functors.h @@ -0,0 +1,1026 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FUNCTORS_H +#define EIGEN_FUNCTORS_H + +namespace Eigen { + +namespace internal { + +// associative functors: + +/** \internal + * \brief Template functor to compute the sum of two scalars + * + * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, MatrixBase::sum() + */ +template struct scalar_sum_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) + EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; } + template + EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + { return internal::padd(a,b); } + template + EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + { return internal::predux(a); } +}; +template +struct functor_traits > { + enum { + Cost = NumTraits::AddCost, + PacketAccess = packet_traits::HasAdd + }; +}; + +/** \internal + * \brief Template functor to compute the product of two scalars + * + * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux() + */ +template struct scalar_product_op { + enum { + // TODO vectorize mixed product + Vectorizable = is_same::value && packet_traits::HasMul && packet_traits::HasMul + }; + typedef typename scalar_product_traits::ReturnType result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) + EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } + template + EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + { return internal::pmul(a,b); } + template + EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const + { return internal::predux_mul(a); } +}; +template +struct functor_traits > { + enum { + Cost = (NumTraits::MulCost + NumTraits::MulCost)/2, // rough estimate! + PacketAccess = scalar_product_op::Vectorizable + }; +}; + +/** \internal + * \brief Template functor to compute the conjugate product of two scalars + * + * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y) + */ +template struct scalar_conj_product_op { + + enum { + Conj = NumTraits::IsComplex + }; + + typedef typename scalar_product_traits::ReturnType result_type; + + EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op) + EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const + { return conj_helper().pmul(a,b); } + + template + EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + { return conj_helper().pmul(a,b); } +}; +template +struct functor_traits > { + enum { + Cost = NumTraits::MulCost, + PacketAccess = internal::is_same::value && packet_traits::HasMul + }; +}; + +/** \internal + * \brief Template functor to compute the min of two scalars + * + * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff() + */ +template struct scalar_min_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) + EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::min; return (min)(a, b); } + template + EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + { return internal::pmin(a,b); } + template + EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + { return internal::predux_min(a); } +}; +template +struct functor_traits > { + enum { + Cost = NumTraits::AddCost, + PacketAccess = packet_traits::HasMin + }; +}; + +/** \internal + * \brief Template functor to compute the max of two scalars + * + * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff() + */ +template struct scalar_max_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) + EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::max; return (max)(a, b); } + template + EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + { return internal::pmax(a,b); } + template + EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + { return internal::predux_max(a); } +}; +template +struct functor_traits > { + enum { + Cost = NumTraits::AddCost, + PacketAccess = packet_traits::HasMax + }; +}; + +/** \internal + * \brief Template functor to compute the hypot of two scalars + * + * \sa MatrixBase::stableNorm(), class Redux + */ +template struct scalar_hypot_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) +// typedef typename NumTraits::Real result_type; + EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const + { + using std::max; + using std::min; + using std::sqrt; + Scalar p = (max)(_x, _y); + Scalar q = (min)(_x, _y); + Scalar qp = q/p; + return p * sqrt(Scalar(1) + qp*qp); + } +}; +template +struct functor_traits > { + enum { Cost = 5 * NumTraits::MulCost, PacketAccess=0 }; +}; + +/** \internal + * \brief Template functor to compute the pow of two scalars + */ +template struct scalar_binary_pow_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op) + inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); } +}; +template +struct functor_traits > { + enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; +}; + +// other binary functors: + +/** \internal + * \brief Template functor to compute the difference of two scalars + * + * \sa class CwiseBinaryOp, MatrixBase::operator- + */ +template struct scalar_difference_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) + EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; } + template + EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + { return internal::psub(a,b); } +}; +template +struct functor_traits > { + enum { + Cost = NumTraits::AddCost, + PacketAccess = packet_traits::HasSub + }; +}; + +/** \internal + * \brief Template functor to compute the quotient of two scalars + * + * \sa class CwiseBinaryOp, Cwise::operator/() + */ +template struct scalar_quotient_op { + enum { + // TODO vectorize mixed product + Vectorizable = is_same::value && packet_traits::HasDiv && packet_traits::HasDiv + }; + typedef typename scalar_product_traits::ReturnType result_type; + EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) + EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } + template + EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + { return internal::pdiv(a,b); } +}; +template +struct functor_traits > { + enum { + Cost = (NumTraits::MulCost + NumTraits::MulCost), // rough estimate! + PacketAccess = scalar_quotient_op::Vectorizable + }; +}; + + + +/** \internal + * \brief Template functor to compute the and of two booleans + * + * \sa class CwiseBinaryOp, ArrayBase::operator&& + */ +struct scalar_boolean_and_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op) + EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; } +}; +template<> struct functor_traits { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false + }; +}; + +/** \internal + * \brief Template functor to compute the or of two booleans + * + * \sa class CwiseBinaryOp, ArrayBase::operator|| + */ +struct scalar_boolean_or_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op) + EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; } +}; +template<> struct functor_traits { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false + }; +}; + +/** \internal + * \brief Template functors for comparison of two scalars + * \todo Implement packet-comparisons + */ +template struct scalar_cmp_op; + +template +struct functor_traits > { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false + }; +}; + +template +struct result_of(Scalar,Scalar)> { + typedef bool type; +}; + + +template struct scalar_cmp_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;} +}; +template struct scalar_cmp_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a struct scalar_cmp_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;} +}; +template struct scalar_cmp_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);} +}; +template struct scalar_cmp_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;} +}; + +// unary functors: + +/** \internal + * \brief Template functor to compute the opposite of a scalar + * + * \sa class CwiseUnaryOp, MatrixBase::operator- + */ +template struct scalar_opposite_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op) + EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; } + template + EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + { return internal::pnegate(a); } +}; +template +struct functor_traits > +{ enum { + Cost = NumTraits::AddCost, + PacketAccess = packet_traits::HasNegate }; +}; + +/** \internal + * \brief Template functor to compute the absolute value of a scalar + * + * \sa class CwiseUnaryOp, Cwise::abs + */ +template struct scalar_abs_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op) + typedef typename NumTraits::Real result_type; + EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); } + template + EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + { return internal::pabs(a); } +}; +template +struct functor_traits > +{ + enum { + Cost = NumTraits::AddCost, + PacketAccess = packet_traits::HasAbs + }; +}; + +/** \internal + * \brief Template functor to compute the squared absolute value of a scalar + * + * \sa class CwiseUnaryOp, Cwise::abs2 + */ +template struct scalar_abs2_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op) + typedef typename NumTraits::Real result_type; + EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); } + template + EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + { return internal::pmul(a,a); } +}; +template +struct functor_traits > +{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasAbs2 }; }; + +/** \internal + * \brief Template functor to compute the conjugate of a complex value + * + * \sa class CwiseUnaryOp, MatrixBase::conjugate() + */ +template struct scalar_conjugate_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op) + EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); } + template + EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); } +}; +template +struct functor_traits > +{ + enum { + Cost = NumTraits::IsComplex ? NumTraits::AddCost : 0, + PacketAccess = packet_traits::HasConj + }; +}; + +/** \internal + * \brief Template functor to cast a scalar to another type + * + * \sa class CwiseUnaryOp, MatrixBase::cast() + */ +template +struct scalar_cast_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) + typedef NewType result_type; + EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast(a); } +}; +template +struct functor_traits > +{ enum { Cost = is_same::value ? 0 : NumTraits::AddCost, PacketAccess = false }; }; + +/** \internal + * \brief Template functor to extract the real part of a complex + * + * \sa class CwiseUnaryOp, MatrixBase::real() + */ +template +struct scalar_real_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op) + typedef typename NumTraits::Real result_type; + EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); } +}; +template +struct functor_traits > +{ enum { Cost = 0, PacketAccess = false }; }; + +/** \internal + * \brief Template functor to extract the imaginary part of a complex + * + * \sa class CwiseUnaryOp, MatrixBase::imag() + */ +template +struct scalar_imag_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op) + typedef typename NumTraits::Real result_type; + EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); } +}; +template +struct functor_traits > +{ enum { Cost = 0, PacketAccess = false }; }; + +/** \internal + * \brief Template functor to extract the real part of a complex as a reference + * + * \sa class CwiseUnaryOp, MatrixBase::real() + */ +template +struct scalar_real_ref_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op) + typedef typename NumTraits::Real result_type; + EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast(&a)); } +}; +template +struct functor_traits > +{ enum { Cost = 0, PacketAccess = false }; }; + +/** \internal + * \brief Template functor to extract the imaginary part of a complex as a reference + * + * \sa class CwiseUnaryOp, MatrixBase::imag() + */ +template +struct scalar_imag_ref_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op) + typedef typename NumTraits::Real result_type; + EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast(&a)); } +}; +template +struct functor_traits > +{ enum { Cost = 0, PacketAccess = false }; }; + +/** \internal + * + * \brief Template functor to compute the exponential of a scalar + * + * \sa class CwiseUnaryOp, Cwise::exp() + */ +template struct scalar_exp_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op) + inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::pexp(a); } +}; +template +struct functor_traits > +{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = packet_traits::HasExp }; }; + +/** \internal + * + * \brief Template functor to compute the logarithm of a scalar + * + * \sa class CwiseUnaryOp, Cwise::log() + */ +template struct scalar_log_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op) + inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::plog(a); } +}; +template +struct functor_traits > +{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = packet_traits::HasLog }; }; + +/** \internal + * \brief Template functor to multiply a scalar by a fixed other one + * + * \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/ + */ +/* NOTE why doing the pset1() in packetOp *is* an optimization ? + * indeed it seems better to declare m_other as a Packet and do the pset1() once + * in the constructor. However, in practice: + * - GCC does not like m_other as a Packet and generate a load every time it needs it + * - on the other hand GCC is able to moves the pset1() outside the loop :) + * - simpler code ;) + * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y) + */ +template +struct scalar_multiple_op { + typedef typename packet_traits::type Packet; + // FIXME default copy constructors seems bugged with std::complex<> + EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { } + EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { } + EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } + EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + { return internal::pmul(a, pset1(m_other)); } + typename add_const_on_value_type::Nested>::type m_other; +}; +template +struct functor_traits > +{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; + +template +struct scalar_multiple2_op { + typedef typename scalar_product_traits::ReturnType result_type; + EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { } + EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { } + EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } + typename add_const_on_value_type::Nested>::type m_other; +}; +template +struct functor_traits > +{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; + +/** \internal + * \brief Template functor to divide a scalar by a fixed other one + * + * This functor is used to implement the quotient of a matrix by + * a scalar where the scalar type is not necessarily a floating point type. + * + * \sa class CwiseUnaryOp, MatrixBase::operator/ + */ +template +struct scalar_quotient1_op { + typedef typename packet_traits::type Packet; + // FIXME default copy constructors seems bugged with std::complex<> + EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { } + EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {} + EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } + EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + { return internal::pdiv(a, pset1(m_other)); } + typename add_const_on_value_type::Nested>::type m_other; +}; +template +struct functor_traits > +{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; + +// nullary functors + +template +struct scalar_constant_op { + typedef typename packet_traits::type Packet; + EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { } + EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { } + template + EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; } + template + EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1(m_other); } + const Scalar m_other; +}; +template +struct functor_traits > +// FIXME replace this packet test by a safe one +{ enum { Cost = 1, PacketAccess = packet_traits::Vectorizable, IsRepeatable = true }; }; + +template struct scalar_identity_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op) + template + EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); } +}; +template +struct functor_traits > +{ enum { Cost = NumTraits::AddCost, PacketAccess = false, IsRepeatable = true }; }; + +template struct linspaced_op_impl; + +// linear access for packet ops: +// 1) initialization +// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0]) +// 2) each step (where size is 1 for coeff access or PacketSize for packet access) +// base += [size*step, ..., size*step] +// +// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp) +// in order to avoid the padd() in operator() ? +template +struct linspaced_op_impl +{ + typedef typename packet_traits::type Packet; + + linspaced_op_impl(const Scalar& low, const Scalar& step) : + m_low(low), m_step(step), + m_packetStep(pset1(packet_traits::size*step)), + m_base(padd(pset1(low), pmul(pset1(step),plset(-packet_traits::size)))) {} + + template + EIGEN_STRONG_INLINE const Scalar operator() (Index i) const + { + m_base = padd(m_base, pset1(m_step)); + return m_low+Scalar(i)*m_step; + } + + template + EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); } + + const Scalar m_low; + const Scalar m_step; + const Packet m_packetStep; + mutable Packet m_base; +}; + +// random access for packet ops: +// 1) each step +// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) +template +struct linspaced_op_impl +{ + typedef typename packet_traits::type Packet; + + linspaced_op_impl(const Scalar& low, const Scalar& step) : + m_low(low), m_step(step), + m_lowPacket(pset1(m_low)), m_stepPacket(pset1(m_step)), m_interPacket(plset(0)) {} + + template + EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; } + + template + EIGEN_STRONG_INLINE const Packet packetOp(Index i) const + { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1(Scalar(i)),m_interPacket))); } + + const Scalar m_low; + const Scalar m_step; + const Packet m_lowPacket; + const Packet m_stepPacket; + const Packet m_interPacket; +}; + +// ----- Linspace functor ---------------------------------------------------------------- + +// Forward declaration (we default to random access which does not really give +// us a speed gain when using packet access but it allows to use the functor in +// nested expressions). +template struct linspaced_op; +template struct functor_traits< linspaced_op > +{ enum { Cost = 1, PacketAccess = packet_traits::HasSetLinear, IsRepeatable = true }; }; +template struct linspaced_op +{ + typedef typename packet_traits::type Packet; + linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {} + + template + EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); } + + // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since + // there row==0 and col is used for the actual iteration. + template + EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const + { + eigen_assert(col==0 || row==0); + return impl(col + row); + } + + template + EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); } + + // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since + // there row==0 and col is used for the actual iteration. + template + EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const + { + eigen_assert(col==0 || row==0); + return impl.packetOp(col + row); + } + + // This proxy object handles the actual required temporaries, the different + // implementations (random vs. sequential access) as well as the + // correct piping to size 2/4 packet operations. + const linspaced_op_impl impl; +}; + +// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta +// to indicate whether a functor allows linear access, just always answering 'yes' except for +// scalar_identity_op. +// FIXME move this to functor_traits adding a functor_default +template struct functor_has_linear_access { enum { ret = 1 }; }; +template struct functor_has_linear_access > { enum { ret = 0 }; }; + +// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication +// where the mixing of different types is handled by scalar_product_traits +// In particular, real * complex is allowed. +// FIXME move this to functor_traits adding a functor_default +template struct functor_is_product_like { enum { ret = 0 }; }; +template struct functor_is_product_like > { enum { ret = 1 }; }; +template struct functor_is_product_like > { enum { ret = 1 }; }; +template struct functor_is_product_like > { enum { ret = 1 }; }; + + +/** \internal + * \brief Template functor to add a scalar to a fixed other one + * \sa class CwiseUnaryOp, Array::operator+ + */ +/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */ +template +struct scalar_add_op { + typedef typename packet_traits::type Packet; + // FIXME default copy constructors seems bugged with std::complex<> + inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { } + inline scalar_add_op(const Scalar& other) : m_other(other) { } + inline Scalar operator() (const Scalar& a) const { return a + m_other; } + inline const Packet packetOp(const Packet& a) const + { return internal::padd(a, pset1(m_other)); } + const Scalar m_other; +}; +template +struct functor_traits > +{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; + +/** \internal + * \brief Template functor to compute the square root of a scalar + * \sa class CwiseUnaryOp, Cwise::sqrt() + */ +template struct scalar_sqrt_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op) + inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); } +}; +template +struct functor_traits > +{ enum { + Cost = 5 * NumTraits::MulCost, + PacketAccess = packet_traits::HasSqrt + }; +}; + +/** \internal + * \brief Template functor to compute the cosine of a scalar + * \sa class CwiseUnaryOp, ArrayBase::cos() + */ +template struct scalar_cos_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op) + inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::pcos(a); } +}; +template +struct functor_traits > +{ + enum { + Cost = 5 * NumTraits::MulCost, + PacketAccess = packet_traits::HasCos + }; +}; + +/** \internal + * \brief Template functor to compute the sine of a scalar + * \sa class CwiseUnaryOp, ArrayBase::sin() + */ +template struct scalar_sin_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op) + inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::psin(a); } +}; +template +struct functor_traits > +{ + enum { + Cost = 5 * NumTraits::MulCost, + PacketAccess = packet_traits::HasSin + }; +}; + + +/** \internal + * \brief Template functor to compute the tan of a scalar + * \sa class CwiseUnaryOp, ArrayBase::tan() + */ +template struct scalar_tan_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op) + inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::ptan(a); } +}; +template +struct functor_traits > +{ + enum { + Cost = 5 * NumTraits::MulCost, + PacketAccess = packet_traits::HasTan + }; +}; + +/** \internal + * \brief Template functor to compute the arc cosine of a scalar + * \sa class CwiseUnaryOp, ArrayBase::acos() + */ +template struct scalar_acos_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op) + inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::pacos(a); } +}; +template +struct functor_traits > +{ + enum { + Cost = 5 * NumTraits::MulCost, + PacketAccess = packet_traits::HasACos + }; +}; + +/** \internal + * \brief Template functor to compute the arc sine of a scalar + * \sa class CwiseUnaryOp, ArrayBase::asin() + */ +template struct scalar_asin_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op) + inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::pasin(a); } +}; +template +struct functor_traits > +{ + enum { + Cost = 5 * NumTraits::MulCost, + PacketAccess = packet_traits::HasASin + }; +}; + +/** \internal + * \brief Template functor to raise a scalar to a power + * \sa class CwiseUnaryOp, Cwise::pow + */ +template +struct scalar_pow_op { + // FIXME default copy constructors seems bugged with std::complex<> + inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { } + inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {} + inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); } + const Scalar m_exponent; +}; +template +struct functor_traits > +{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; + +/** \internal + * \brief Template functor to compute the quotient between a scalar and array entries. + * \sa class CwiseUnaryOp, Cwise::inverse() + */ +template +struct scalar_inverse_mult_op { + scalar_inverse_mult_op(const Scalar& other) : m_other(other) {} + inline Scalar operator() (const Scalar& a) const { return m_other / a; } + template + inline const Packet packetOp(const Packet& a) const + { return internal::pdiv(pset1(m_other),a); } + Scalar m_other; +}; + +/** \internal + * \brief Template functor to compute the inverse of a scalar + * \sa class CwiseUnaryOp, Cwise::inverse() + */ +template +struct scalar_inverse_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op) + inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; } + template + inline const Packet packetOp(const Packet& a) const + { return internal::pdiv(pset1(Scalar(1)),a); } +}; +template +struct functor_traits > +{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; + +/** \internal + * \brief Template functor to compute the square of a scalar + * \sa class CwiseUnaryOp, Cwise::square() + */ +template +struct scalar_square_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op) + inline Scalar operator() (const Scalar& a) const { return a*a; } + template + inline const Packet packetOp(const Packet& a) const + { return internal::pmul(a,a); } +}; +template +struct functor_traits > +{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; + +/** \internal + * \brief Template functor to compute the cube of a scalar + * \sa class CwiseUnaryOp, Cwise::cube() + */ +template +struct scalar_cube_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op) + inline Scalar operator() (const Scalar& a) const { return a*a*a; } + template + inline const Packet packetOp(const Packet& a) const + { return internal::pmul(a,pmul(a,a)); } +}; +template +struct functor_traits > +{ enum { Cost = 2*NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; + +// default functor traits for STL functors: + +template +struct functor_traits > +{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = 1, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = 1, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = 1, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = 1, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = 1, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = 1, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = 1, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = 1, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = 1, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = functor_traits::Cost, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = functor_traits::Cost, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = 1 + functor_traits::Cost, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = 1 + functor_traits::Cost, PacketAccess = false }; }; + +#ifdef EIGEN_STDEXT_SUPPORT + +template +struct functor_traits > +{ enum { Cost = 0, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = 0, PacketAccess = false }; }; + +template +struct functor_traits > > +{ enum { Cost = 0, PacketAccess = false }; }; + +template +struct functor_traits > > +{ enum { Cost = 0, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = functor_traits::Cost + functor_traits::Cost, PacketAccess = false }; }; + +template +struct functor_traits > +{ enum { Cost = functor_traits::Cost + functor_traits::Cost + functor_traits::Cost, PacketAccess = false }; }; + +#endif // EIGEN_STDEXT_SUPPORT + +// allow to add new functors and specializations of functor_traits from outside Eigen. +// this macro is really needed because functor_traits must be specialized after it is declared but before it is used... +#ifdef EIGEN_FUNCTORS_PLUGIN +#include EIGEN_FUNCTORS_PLUGIN +#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_FUNCTORS_H diff --git a/tools/Eigen/src/Core/Fuzzy.h b/tools/Eigen/src/Core/Fuzzy.h new file mode 100644 index 0000000..fe63bd2 --- /dev/null +++ b/tools/Eigen/src/Core/Fuzzy.h @@ -0,0 +1,150 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FUZZY_H +#define EIGEN_FUZZY_H + +namespace Eigen { + +namespace internal +{ + +template::IsInteger> +struct isApprox_selector +{ + static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) + { + using std::min; + typename internal::nested::type nested(x); + typename internal::nested::type otherNested(y); + return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum()); + } +}; + +template +struct isApprox_selector +{ + static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&) + { + return x.matrix() == y.matrix(); + } +}; + +template::IsInteger> +struct isMuchSmallerThan_object_selector +{ + static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) + { + return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum(); + } +}; + +template +struct isMuchSmallerThan_object_selector +{ + static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&) + { + return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix(); + } +}; + +template::IsInteger> +struct isMuchSmallerThan_scalar_selector +{ + static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec) + { + return x.cwiseAbs2().sum() <= numext::abs2(prec * y); + } +}; + +template +struct isMuchSmallerThan_scalar_selector +{ + static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&) + { + return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix(); + } +}; + +} // end namespace internal + + +/** \returns \c true if \c *this is approximately equal to \a other, within the precision + * determined by \a prec. + * + * \note The fuzzy compares are done multiplicatively. Two vectors \f$ v \f$ and \f$ w \f$ + * are considered to be approximately equal within precision \f$ p \f$ if + * \f[ \Vert v - w \Vert \leqslant p\,\min(\Vert v\Vert, \Vert w\Vert). \f] + * For matrices, the comparison is done using the Hilbert-Schmidt norm (aka Frobenius norm + * L2 norm). + * + * \note Because of the multiplicativeness of this comparison, one can't use this function + * to check whether \c *this is approximately equal to the zero matrix or vector. + * Indeed, \c isApprox(zero) returns false unless \c *this itself is exactly the zero matrix + * or vector. If you want to test whether \c *this is zero, use internal::isMuchSmallerThan(const + * RealScalar&, RealScalar) instead. + * + * \sa internal::isMuchSmallerThan(const RealScalar&, RealScalar) const + */ +template +template +bool DenseBase::isApprox( + const DenseBase& other, + const RealScalar& prec +) const +{ + return internal::isApprox_selector::run(derived(), other.derived(), prec); +} + +/** \returns \c true if the norm of \c *this is much smaller than \a other, + * within the precision determined by \a prec. + * + * \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is + * considered to be much smaller than \f$ x \f$ within precision \f$ p \f$ if + * \f[ \Vert v \Vert \leqslant p\,\vert x\vert. \f] + * + * For matrices, the comparison is done using the Hilbert-Schmidt norm. For this reason, + * the value of the reference scalar \a other should come from the Hilbert-Schmidt norm + * of a reference matrix of same dimensions. + * + * \sa isApprox(), isMuchSmallerThan(const DenseBase&, RealScalar) const + */ +template +bool DenseBase::isMuchSmallerThan( + const typename NumTraits::Real& other, + const RealScalar& prec +) const +{ + return internal::isMuchSmallerThan_scalar_selector::run(derived(), other, prec); +} + +/** \returns \c true if the norm of \c *this is much smaller than the norm of \a other, + * within the precision determined by \a prec. + * + * \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is + * considered to be much smaller than a vector \f$ w \f$ within precision \f$ p \f$ if + * \f[ \Vert v \Vert \leqslant p\,\Vert w\Vert. \f] + * For matrices, the comparison is done using the Hilbert-Schmidt norm. + * + * \sa isApprox(), isMuchSmallerThan(const RealScalar&, RealScalar) const + */ +template +template +bool DenseBase::isMuchSmallerThan( + const DenseBase& other, + const RealScalar& prec +) const +{ + return internal::isMuchSmallerThan_object_selector::run(derived(), other.derived(), prec); +} + +} // end namespace Eigen + +#endif // EIGEN_FUZZY_H From ef389a8eaffed06ddb077935235c7eccae2aab84 Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 14:10:32 +0200 Subject: [PATCH 15/25] add more eigen files --- tools/Eigen/src/Core/GeneralProduct.h | 638 ++++++++++++++ tools/Eigen/src/Core/GenericPacketMath.h | 350 ++++++++ tools/Eigen/src/Core/GlobalFunctions.h | 92 ++ tools/Eigen/src/Core/IO.h | 250 ++++++ tools/Eigen/src/Core/Map.h | 192 ++++ tools/Eigen/src/Core/MapBase.h | 251 ++++++ tools/Eigen/src/Core/MathFunctions.h | 768 ++++++++++++++++ tools/Eigen/src/Core/Matrix.h | 420 +++++++++ tools/Eigen/src/Core/MatrixBase.h | 563 ++++++++++++ tools/Eigen/src/Core/NestByValue.h | 111 +++ tools/Eigen/src/Core/NoAlias.h | 134 +++ tools/Eigen/src/Core/NumTraits.h | 150 ++++ tools/Eigen/src/Core/PlainObjectBase.h | 822 ++++++++++++++++++ tools/Eigen/src/Core/ProductBase.h | 290 ++++++ tools/Eigen/src/Core/products/CMakeLists.txt | 6 + .../src/Core/products/CoeffBasedProduct.h | 476 ++++++++++ 16 files changed, 5513 insertions(+) create mode 100644 tools/Eigen/src/Core/GeneralProduct.h create mode 100644 tools/Eigen/src/Core/GenericPacketMath.h create mode 100644 tools/Eigen/src/Core/GlobalFunctions.h create mode 100644 tools/Eigen/src/Core/IO.h create mode 100644 tools/Eigen/src/Core/Map.h create mode 100644 tools/Eigen/src/Core/MapBase.h create mode 100644 tools/Eigen/src/Core/MathFunctions.h create mode 100644 tools/Eigen/src/Core/Matrix.h create mode 100644 tools/Eigen/src/Core/MatrixBase.h create mode 100644 tools/Eigen/src/Core/NestByValue.h create mode 100644 tools/Eigen/src/Core/NoAlias.h create mode 100644 tools/Eigen/src/Core/NumTraits.h create mode 100644 tools/Eigen/src/Core/PlainObjectBase.h create mode 100644 tools/Eigen/src/Core/ProductBase.h create mode 100644 tools/Eigen/src/Core/products/CMakeLists.txt create mode 100644 tools/Eigen/src/Core/products/CoeffBasedProduct.h diff --git a/tools/Eigen/src/Core/GeneralProduct.h b/tools/Eigen/src/Core/GeneralProduct.h new file mode 100644 index 0000000..29ac522 --- /dev/null +++ b/tools/Eigen/src/Core/GeneralProduct.h @@ -0,0 +1,638 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GENERAL_PRODUCT_H +#define EIGEN_GENERAL_PRODUCT_H + +namespace Eigen { + +/** \class GeneralProduct + * \ingroup Core_Module + * + * \brief Expression of the product of two general matrices or vectors + * + * \param LhsNested the type used to store the left-hand side + * \param RhsNested the type used to store the right-hand side + * \param ProductMode the type of the product + * + * This class represents an expression of the product of two general matrices. + * We call a general matrix, a dense matrix with full storage. For instance, + * This excludes triangular, selfadjoint, and sparse matrices. + * It is the return type of the operator* between general matrices. Its template + * arguments are determined automatically by ProductReturnType. Therefore, + * GeneralProduct should never be used direclty. To determine the result type of a + * function which involves a matrix product, use ProductReturnType::Type. + * + * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase&) + */ +template::value> +class GeneralProduct; + +enum { + Large = 2, + Small = 3 +}; + +namespace internal { + +template struct product_type_selector; + +template struct product_size_category +{ + enum { is_large = MaxSize == Dynamic || + Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD, + value = is_large ? Large + : Size == 1 ? 1 + : Small + }; +}; + +template struct product_type +{ + typedef typename remove_all::type _Lhs; + typedef typename remove_all::type _Rhs; + enum { + MaxRows = _Lhs::MaxRowsAtCompileTime, + Rows = _Lhs::RowsAtCompileTime, + MaxCols = _Rhs::MaxColsAtCompileTime, + Cols = _Rhs::ColsAtCompileTime, + MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime, + _Rhs::MaxRowsAtCompileTime), + Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, + _Rhs::RowsAtCompileTime), + LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + }; + + // the splitting into different lines of code here, introducing the _select enums and the typedef below, + // is to work around an internal compiler error with gcc 4.1 and 4.2. +private: + enum { + rows_select = product_size_category::value, + cols_select = product_size_category::value, + depth_select = product_size_category::value + }; + typedef product_type_selector selector; + +public: + enum { + value = selector::ret + }; +#ifdef EIGEN_DEBUG_PRODUCT + static void debug() + { + EIGEN_DEBUG_VAR(Rows); + EIGEN_DEBUG_VAR(Cols); + EIGEN_DEBUG_VAR(Depth); + EIGEN_DEBUG_VAR(rows_select); + EIGEN_DEBUG_VAR(cols_select); + EIGEN_DEBUG_VAR(depth_select); + EIGEN_DEBUG_VAR(value); + } +#endif +}; + + +/* The following allows to select the kind of product at compile time + * based on the three dimensions of the product. + * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */ +// FIXME I'm not sure the current mapping is the ideal one. +template struct product_type_selector { enum { ret = OuterProduct }; }; +template struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; }; +template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; +template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; }; +template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = GemvProduct }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; + +} // end namespace internal + +/** \class ProductReturnType + * \ingroup Core_Module + * + * \brief Helper class to get the correct and optimized returned type of operator* + * + * \param Lhs the type of the left-hand side + * \param Rhs the type of the right-hand side + * \param ProductMode the type of the product (determined automatically by internal::product_mode) + * + * This class defines the typename Type representing the optimized product expression + * between two matrix expressions. In practice, using ProductReturnType::Type + * is the recommended way to define the result type of a function returning an expression + * which involve a matrix product. The class Product should never be + * used directly. + * + * \sa class Product, MatrixBase::operator*(const MatrixBase&) + */ +template +struct ProductReturnType +{ + // TODO use the nested type to reduce instanciations ???? +// typedef typename internal::nested::type LhsNested; +// typedef typename internal::nested::type RhsNested; + + typedef GeneralProduct Type; +}; + +template +struct ProductReturnType +{ + typedef typename internal::nested::type >::type LhsNested; + typedef typename internal::nested::type >::type RhsNested; + typedef CoeffBasedProduct Type; +}; + +template +struct ProductReturnType +{ + typedef typename internal::nested::type >::type LhsNested; + typedef typename internal::nested::type >::type RhsNested; + typedef CoeffBasedProduct Type; +}; + +// this is a workaround for sun CC +template +struct LazyProductReturnType : public ProductReturnType +{}; + +/*********************************************************************** +* Implementation of Inner Vector Vector Product +***********************************************************************/ + +// FIXME : maybe the "inner product" could return a Scalar +// instead of a 1x1 matrix ?? +// Pro: more natural for the user +// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix +// product ends up to a row-vector times col-vector product... To tackle this use +// case, we could have a specialization for Block with: operator=(Scalar x); + +namespace internal { + +template +struct traits > + : traits::ReturnType,1,1> > +{}; + +} + +template +class GeneralProduct + : internal::no_assignment_operator, + public Matrix::ReturnType,1,1> +{ + typedef Matrix::ReturnType,1,1> Base; + public: + GeneralProduct(const Lhs& lhs, const Rhs& rhs) + { + EIGEN_STATIC_ASSERT((internal::is_same::value), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + + Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); + } + + /** Convertion to scalar */ + operator const typename Base::Scalar() const { + return Base::coeff(0,0); + } +}; + +/*********************************************************************** +* Implementation of Outer Vector Vector Product +***********************************************************************/ + +namespace internal { + +// Column major +template +EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&) +{ + typedef typename Dest::Index Index; + // FIXME make sure lhs is sequentially stored + // FIXME not very good if rhs is real and lhs complex while alpha is real too + const Index cols = dest.cols(); + for (Index j=0; j +EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) { + typedef typename Dest::Index Index; + // FIXME make sure rhs is sequentially stored + // FIXME not very good if lhs is real and rhs complex while alpha is real too + const Index rows = dest.rows(); + for (Index i=0; i +struct traits > + : traits, Lhs, Rhs> > +{}; + +} + +template +class GeneralProduct + : public ProductBase, Lhs, Rhs> +{ + template struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; + + public: + EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) + + GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) + { + EIGEN_STATIC_ASSERT((internal::is_same::value), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + } + + struct set { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; + struct add { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; + struct sub { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; + struct adds { + Scalar m_scale; + adds(const Scalar& s) : m_scale(s) {} + template void operator()(const Dst& dst, const Src& src) const { + dst.const_cast_derived() += m_scale * src; + } + }; + + template + inline void evalTo(Dest& dest) const { + internal::outer_product_selector_run(*this, dest, set(), is_row_major()); + } + + template + inline void addTo(Dest& dest) const { + internal::outer_product_selector_run(*this, dest, add(), is_row_major()); + } + + template + inline void subTo(Dest& dest) const { + internal::outer_product_selector_run(*this, dest, sub(), is_row_major()); + } + + template void scaleAndAddTo(Dest& dest, const Scalar& alpha) const + { + internal::outer_product_selector_run(*this, dest, adds(alpha), is_row_major()); + } +}; + +/*********************************************************************** +* Implementation of General Matrix Vector Product +***********************************************************************/ + +/* According to the shape/flags of the matrix we have to distinghish 3 different cases: + * 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine + * 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine + * 3 - all other cases are handled using a simple loop along the outer-storage direction. + * Therefore we need a lower level meta selector. + * Furthermore, if the matrix is the rhs, then the product has to be transposed. + */ +namespace internal { + +template +struct traits > + : traits, Lhs, Rhs> > +{}; + +template +struct gemv_selector; + +} // end namespace internal + +template +class GeneralProduct + : public ProductBase, Lhs, Rhs> +{ + public: + EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) + + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + + GeneralProduct(const Lhs& a_lhs, const Rhs& a_rhs) : Base(a_lhs,a_rhs) + { +// EIGEN_STATIC_ASSERT((internal::is_same::value), +// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + } + + enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; + typedef typename internal::conditional::type MatrixType; + + template void scaleAndAddTo(Dest& dst, const Scalar& alpha) const + { + eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols()); + internal::gemv_selector::HasUsableDirectAccess)>::run(*this, dst, alpha); + } +}; + +namespace internal { + +// The vector is on the left => transposition +template +struct gemv_selector +{ + template + static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + { + Transpose destT(dest); + enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; + gemv_selector + ::run(GeneralProduct,Transpose, GemvProduct> + (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha); + } +}; + +template struct gemv_static_vector_if; + +template +struct gemv_static_vector_if +{ + EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; } +}; + +template +struct gemv_static_vector_if +{ + EIGEN_STRONG_INLINE Scalar* data() { return 0; } +}; + +template +struct gemv_static_vector_if +{ + #if EIGEN_ALIGN_STATICALLY + internal::plain_array m_data; + EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } + #else + // Some architectures cannot align on the stack, + // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. + enum { + ForceAlignment = internal::packet_traits::Vectorizable, + PacketSize = internal::packet_traits::size + }; + internal::plain_array m_data; + EIGEN_STRONG_INLINE Scalar* data() { + return ForceAlignment + ? reinterpret_cast((reinterpret_cast(m_data.array) & ~(size_t(15))) + 16) + : m_data.array; + } + #endif +}; + +template<> struct gemv_selector +{ + template + static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + { + typedef typename ProductType::Index Index; + typedef typename ProductType::LhsScalar LhsScalar; + typedef typename ProductType::RhsScalar RhsScalar; + typedef typename ProductType::Scalar ResScalar; + typedef typename ProductType::RealScalar RealScalar; + typedef typename ProductType::ActualLhsType ActualLhsType; + typedef typename ProductType::ActualRhsType ActualRhsType; + typedef typename ProductType::LhsBlasTraits LhsBlasTraits; + typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + typedef Map, Aligned> MappedDest; + + ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs()); + ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs()); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) + * RhsBlasTraits::extractScalarFactor(prod.rhs()); + + // make sure Dest is a compile-time vector type (bug 1166) + typedef typename conditional::type ActualDest; + + enum { + // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 + // on, the other hand it is good for the cache to pack the vector anyways... + EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1), + ComplexByReal = (NumTraits::IsComplex) && (!NumTraits::IsComplex), + MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal + }; + + gemv_static_vector_if static_dest; + + bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); + bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; + + RhsScalar compatibleAlpha = get_factor::run(actualAlpha); + + ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), + evalToDest ? dest.data() : static_dest.data()); + + if(!evalToDest) + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + int size = dest.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + if(!alphaIsCompatible) + { + MappedDest(actualDestPtr, dest.size()).setZero(); + compatibleAlpha = RhsScalar(1); + } + else + MappedDest(actualDestPtr, dest.size()) = dest; + } + + general_matrix_vector_product + ::run( + actualLhs.rows(), actualLhs.cols(), + actualLhs.data(), actualLhs.outerStride(), + actualRhs.data(), actualRhs.innerStride(), + actualDestPtr, 1, + compatibleAlpha); + + if (!evalToDest) + { + if(!alphaIsCompatible) + dest += actualAlpha * MappedDest(actualDestPtr, dest.size()); + else + dest = MappedDest(actualDestPtr, dest.size()); + } + } +}; + +template<> struct gemv_selector +{ + template + static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + { + typedef typename ProductType::LhsScalar LhsScalar; + typedef typename ProductType::RhsScalar RhsScalar; + typedef typename ProductType::Scalar ResScalar; + typedef typename ProductType::Index Index; + typedef typename ProductType::ActualLhsType ActualLhsType; + typedef typename ProductType::ActualRhsType ActualRhsType; + typedef typename ProductType::_ActualRhsType _ActualRhsType; + typedef typename ProductType::LhsBlasTraits LhsBlasTraits; + typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + + typename add_const::type actualLhs = LhsBlasTraits::extract(prod.lhs()); + typename add_const::type actualRhs = RhsBlasTraits::extract(prod.rhs()); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) + * RhsBlasTraits::extractScalarFactor(prod.rhs()); + + enum { + // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 + // on, the other hand it is good for the cache to pack the vector anyways... + DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1 + }; + + gemv_static_vector_if static_rhs; + + ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(), + DirectlyUseRhs ? const_cast(actualRhs.data()) : static_rhs.data()); + + if(!DirectlyUseRhs) + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + int size = actualRhs.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + Map(actualRhsPtr, actualRhs.size()) = actualRhs; + } + + general_matrix_vector_product + ::run( + actualLhs.rows(), actualLhs.cols(), + actualLhs.data(), actualLhs.outerStride(), + actualRhsPtr, 1, + dest.data(), dest.col(0).innerStride(), //NOTE if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166) + actualAlpha); + } +}; + +template<> struct gemv_selector +{ + template + static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + { + typedef typename Dest::Index Index; + // TODO makes sure dest is sequentially stored in memory, otherwise use a temp + const Index size = prod.rhs().rows(); + for(Index k=0; k struct gemv_selector +{ + template + static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + { + typedef typename Dest::Index Index; + // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp + const Index rows = prod.rows(); + for(Index i=0; i +template +inline const typename ProductReturnType::Type +MatrixBase::operator*(const MatrixBase &other) const +{ + // A note regarding the function declaration: In MSVC, this function will sometimes + // not be inlined since DenseStorage is an unwindable object for dynamic + // matrices and product types are holding a member to store the result. + // Thus it does not help tagging this function with EIGEN_STRONG_INLINE. + enum { + ProductIsValid = Derived::ColsAtCompileTime==Dynamic + || OtherDerived::RowsAtCompileTime==Dynamic + || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime), + AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, + SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived) + }; + // note to the lost user: + // * for a dot product use: v1.dot(v2) + // * for a coeff-wise product use: v1.cwiseProduct(v2) + EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), + INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) + EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), + INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) + EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) +#ifdef EIGEN_DEBUG_PRODUCT + internal::product_type::debug(); +#endif + return typename ProductReturnType::Type(derived(), other.derived()); +} + +/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. + * + * The returned product will behave like any other expressions: the coefficients of the product will be + * computed once at a time as requested. This might be useful in some extremely rare cases when only + * a small and no coherent fraction of the result's coefficients have to be computed. + * + * \warning This version of the matrix product can be much much slower. So use it only if you know + * what you are doing and that you measured a true speed improvement. + * + * \sa operator*(const MatrixBase&) + */ +template +template +const typename LazyProductReturnType::Type +MatrixBase::lazyProduct(const MatrixBase &other) const +{ + enum { + ProductIsValid = Derived::ColsAtCompileTime==Dynamic + || OtherDerived::RowsAtCompileTime==Dynamic + || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime), + AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, + SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived) + }; + // note to the lost user: + // * for a dot product use: v1.dot(v2) + // * for a coeff-wise product use: v1.cwiseProduct(v2) + EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), + INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) + EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), + INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) + EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) + + return typename LazyProductReturnType::Type(derived(), other.derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_H diff --git a/tools/Eigen/src/Core/GenericPacketMath.h b/tools/Eigen/src/Core/GenericPacketMath.h new file mode 100644 index 0000000..5f783eb --- /dev/null +++ b/tools/Eigen/src/Core/GenericPacketMath.h @@ -0,0 +1,350 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GENERIC_PACKET_MATH_H +#define EIGEN_GENERIC_PACKET_MATH_H + +namespace Eigen { + +namespace internal { + +/** \internal + * \file GenericPacketMath.h + * + * Default implementation for types not supported by the vectorization. + * In practice these functions are provided to make easier the writing + * of generic vectorized code. + */ + +#ifndef EIGEN_DEBUG_ALIGNED_LOAD +#define EIGEN_DEBUG_ALIGNED_LOAD +#endif + +#ifndef EIGEN_DEBUG_UNALIGNED_LOAD +#define EIGEN_DEBUG_UNALIGNED_LOAD +#endif + +#ifndef EIGEN_DEBUG_ALIGNED_STORE +#define EIGEN_DEBUG_ALIGNED_STORE +#endif + +#ifndef EIGEN_DEBUG_UNALIGNED_STORE +#define EIGEN_DEBUG_UNALIGNED_STORE +#endif + +struct default_packet_traits +{ + enum { + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasNegate = 1, + HasAbs = 1, + HasAbs2 = 1, + HasMin = 1, + HasMax = 1, + HasConj = 1, + HasSetLinear = 1, + + HasDiv = 0, + HasSqrt = 0, + HasExp = 0, + HasLog = 0, + HasPow = 0, + + HasSin = 0, + HasCos = 0, + HasTan = 0, + HasASin = 0, + HasACos = 0, + HasATan = 0 + }; +}; + +template struct packet_traits : default_packet_traits +{ + typedef T type; + enum { + Vectorizable = 0, + size = 1, + AlignedOnScalar = 0 + }; + enum { + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasConj = 0, + HasSetLinear = 0 + }; +}; + +/** \internal \returns a + b (coeff-wise) */ +template inline Packet +padd(const Packet& a, + const Packet& b) { return a+b; } + +/** \internal \returns a - b (coeff-wise) */ +template inline Packet +psub(const Packet& a, + const Packet& b) { return a-b; } + +/** \internal \returns -a (coeff-wise) */ +template inline Packet +pnegate(const Packet& a) { return -a; } + +/** \internal \returns conj(a) (coeff-wise) */ +template inline Packet +pconj(const Packet& a) { return numext::conj(a); } + +/** \internal \returns a * b (coeff-wise) */ +template inline Packet +pmul(const Packet& a, + const Packet& b) { return a*b; } + +/** \internal \returns a / b (coeff-wise) */ +template inline Packet +pdiv(const Packet& a, + const Packet& b) { return a/b; } + +/** \internal \returns the min of \a a and \a b (coeff-wise) */ +template inline Packet +pmin(const Packet& a, + const Packet& b) { using std::min; return (min)(a, b); } + +/** \internal \returns the max of \a a and \a b (coeff-wise) */ +template inline Packet +pmax(const Packet& a, + const Packet& b) { using std::max; return (max)(a, b); } + +/** \internal \returns the absolute value of \a a */ +template inline Packet +pabs(const Packet& a) { using std::abs; return abs(a); } + +/** \internal \returns the bitwise and of \a a and \a b */ +template inline Packet +pand(const Packet& a, const Packet& b) { return a & b; } + +/** \internal \returns the bitwise or of \a a and \a b */ +template inline Packet +por(const Packet& a, const Packet& b) { return a | b; } + +/** \internal \returns the bitwise xor of \a a and \a b */ +template inline Packet +pxor(const Packet& a, const Packet& b) { return a ^ b; } + +/** \internal \returns the bitwise andnot of \a a and \a b */ +template inline Packet +pandnot(const Packet& a, const Packet& b) { return a & (!b); } + +/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */ +template inline Packet +pload(const typename unpacket_traits::type* from) { return *from; } + +/** \internal \returns a packet version of \a *from, (un-aligned load) */ +template inline Packet +ploadu(const typename unpacket_traits::type* from) { return *from; } + +/** \internal \returns a packet with elements of \a *from duplicated. + * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and + * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]} + * Currently, this function is only used for scalar * complex products. + */ +template inline Packet +ploaddup(const typename unpacket_traits::type* from) { return *from; } + +/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ +template inline Packet +pset1(const typename unpacket_traits::type& a) { return a; } + +/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ +template inline typename packet_traits::type +plset(const Scalar& a) { return a; } + +/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ +template inline void pstore(Scalar* to, const Packet& from) +{ (*to) = from; } + +/** \internal copy the packet \a from to \a *to, (un-aligned store) */ +template inline void pstoreu(Scalar* to, const Packet& from) +{ (*to) = from; } + +/** \internal tries to do cache prefetching of \a addr */ +template inline void prefetch(const Scalar* addr) +{ +#if !defined(_MSC_VER) +__builtin_prefetch(addr); +#endif +} + +/** \internal \returns the first element of a packet */ +template inline typename unpacket_traits::type pfirst(const Packet& a) +{ return a; } + +/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */ +template inline Packet +preduxp(const Packet* vecs) { return vecs[0]; } + +/** \internal \returns the sum of the elements of \a a*/ +template inline typename unpacket_traits::type predux(const Packet& a) +{ return a; } + +/** \internal \returns the product of the elements of \a a*/ +template inline typename unpacket_traits::type predux_mul(const Packet& a) +{ return a; } + +/** \internal \returns the min of the elements of \a a*/ +template inline typename unpacket_traits::type predux_min(const Packet& a) +{ return a; } + +/** \internal \returns the max of the elements of \a a*/ +template inline typename unpacket_traits::type predux_max(const Packet& a) +{ return a; } + +/** \internal \returns the reversed elements of \a a*/ +template inline Packet preverse(const Packet& a) +{ return a; } + + +/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */ +template inline Packet pcplxflip(const Packet& a) +{ + // FIXME: uncomment the following in case we drop the internal imag and real functions. +// using std::imag; +// using std::real; + return Packet(imag(a),real(a)); +} + +/************************** +* Special math functions +***************************/ + +/** \internal \returns the sine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet psin(const Packet& a) { using std::sin; return sin(a); } + +/** \internal \returns the cosine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pcos(const Packet& a) { using std::cos; return cos(a); } + +/** \internal \returns the tan of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet ptan(const Packet& a) { using std::tan; return tan(a); } + +/** \internal \returns the arc sine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pasin(const Packet& a) { using std::asin; return asin(a); } + +/** \internal \returns the arc cosine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pacos(const Packet& a) { using std::acos; return acos(a); } + +/** \internal \returns the exp of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pexp(const Packet& a) { using std::exp; return exp(a); } + +/** \internal \returns the log of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plog(const Packet& a) { using std::log; return log(a); } + +/** \internal \returns the square-root of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); } + +/*************************************************************************** +* The following functions might not have to be overwritten for vectorized types +***************************************************************************/ + +/** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */ +// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type) +template +inline void pstore1(typename unpacket_traits::type* to, const typename unpacket_traits::type& a) +{ + pstore(to, pset1(a)); +} + +/** \internal \returns a * b + c (coeff-wise) */ +template inline Packet +pmadd(const Packet& a, + const Packet& b, + const Packet& c) +{ return padd(pmul(a, b),c); } + +/** \internal \returns a packet version of \a *from. + * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */ +template +inline Packet ploadt(const typename unpacket_traits::type* from) +{ + if(LoadMode == Aligned) + return pload(from); + else + return ploadu(from); +} + +/** \internal copy the packet \a from to \a *to. + * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */ +template +inline void pstoret(Scalar* to, const Packet& from) +{ + if(LoadMode == Aligned) + pstore(to, from); + else + pstoreu(to, from); +} + +/** \internal default implementation of palign() allowing partial specialization */ +template +struct palign_impl +{ + // by default data are aligned, so there is nothing to be done :) + static inline void run(PacketType&, const PacketType&) {} +}; + +/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements + * of \a first and \a Offset first elements of \a second. + * + * This function is currently only used to optimize matrix-vector products on unligned matrices. + * It takes 2 packets that represent a contiguous memory array, and returns a packet starting + * at the position \a Offset. For instance, for packets of 4 elements, we have: + * Input: + * - first = {f0,f1,f2,f3} + * - second = {s0,s1,s2,s3} + * Output: + * - if Offset==0 then {f0,f1,f2,f3} + * - if Offset==1 then {f1,f2,f3,s0} + * - if Offset==2 then {f2,f3,s0,s1} + * - if Offset==3 then {f3,s0,s1,s3} + */ +template +inline void palign(PacketType& first, const PacketType& second) +{ + palign_impl::run(first,second); +} + +/*************************************************************************** +* Fast complex products (GCC generates a function call which is very slow) +***************************************************************************/ + +template<> inline std::complex pmul(const std::complex& a, const std::complex& b) +{ return std::complex(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } + +template<> inline std::complex pmul(const std::complex& a, const std::complex& b) +{ return std::complex(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_GENERIC_PACKET_MATH_H + diff --git a/tools/Eigen/src/Core/GlobalFunctions.h b/tools/Eigen/src/Core/GlobalFunctions.h new file mode 100644 index 0000000..2acf977 --- /dev/null +++ b/tools/Eigen/src/Core/GlobalFunctions.h @@ -0,0 +1,92 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010-2012 Gael Guennebaud +// Copyright (C) 2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GLOBAL_FUNCTIONS_H +#define EIGEN_GLOBAL_FUNCTIONS_H + +#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR) \ + template \ + inline const Eigen::CwiseUnaryOp, const Derived> \ + NAME(const Eigen::ArrayBase& x) { \ + return x.derived(); \ + } + +#define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \ + \ + template \ + struct NAME##_retval > \ + { \ + typedef const Eigen::CwiseUnaryOp, const Derived> type; \ + }; \ + template \ + struct NAME##_impl > \ + { \ + static inline typename NAME##_retval >::type run(const Eigen::ArrayBase& x) \ + { \ + return x.derived(); \ + } \ + }; + + +namespace Eigen +{ + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op) + + template + inline const Eigen::CwiseUnaryOp, const Derived> + pow(const Eigen::ArrayBase& x, const typename Derived::Scalar& exponent) { + return x.derived().pow(exponent); + } + + template + inline const Eigen::CwiseBinaryOp, const Derived, const Derived> + pow(const Eigen::ArrayBase& x, const Eigen::ArrayBase& exponents) + { + return Eigen::CwiseBinaryOp, const Derived, const Derived>( + x.derived(), + exponents.derived() + ); + } + + /** + * \brief Component-wise division of a scalar by array elements. + **/ + template + inline const Eigen::CwiseUnaryOp, const Derived> + operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase& a) + { + return Eigen::CwiseUnaryOp, const Derived>( + a.derived(), + Eigen::internal::scalar_inverse_mult_op(s) + ); + } + + namespace internal + { + EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op) + EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(imag,scalar_imag_op) + EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs2,scalar_abs2_op) + } +} + +// TODO: cleanly disable those functions that are not supported on Array (numext::real_ref, internal::random, internal::isApprox...) + +#endif // EIGEN_GLOBAL_FUNCTIONS_H diff --git a/tools/Eigen/src/Core/IO.h b/tools/Eigen/src/Core/IO.h new file mode 100644 index 0000000..8d4bc59 --- /dev/null +++ b/tools/Eigen/src/Core/IO.h @@ -0,0 +1,250 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_IO_H +#define EIGEN_IO_H + +namespace Eigen { + +enum { DontAlignCols = 1 }; +enum { StreamPrecision = -1, + FullPrecision = -2 }; + +namespace internal { +template +std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt); +} + +/** \class IOFormat + * \ingroup Core_Module + * + * \brief Stores a set of parameters controlling the way matrices are printed + * + * List of available parameters: + * - \b precision number of digits for floating point values, or one of the special constants \c StreamPrecision and \c FullPrecision. + * The default is the special value \c StreamPrecision which means to use the + * stream's own precision setting, as set for instance using \c cout.precision(3). The other special value + * \c FullPrecision means that the number of digits will be computed to match the full precision of each floating-point + * type. + * - \b flags an OR-ed combination of flags, the default value is 0, the only currently available flag is \c DontAlignCols which + * allows to disable the alignment of columns, resulting in faster code. + * - \b coeffSeparator string printed between two coefficients of the same row + * - \b rowSeparator string printed between two rows + * - \b rowPrefix string printed at the beginning of each row + * - \b rowSuffix string printed at the end of each row + * - \b matPrefix string printed at the beginning of the matrix + * - \b matSuffix string printed at the end of the matrix + * + * Example: \include IOFormat.cpp + * Output: \verbinclude IOFormat.out + * + * \sa DenseBase::format(), class WithFormat + */ +struct IOFormat +{ + /** Default contructor, see class IOFormat for the meaning of the parameters */ + IOFormat(int _precision = StreamPrecision, int _flags = 0, + const std::string& _coeffSeparator = " ", + const std::string& _rowSeparator = "\n", const std::string& _rowPrefix="", const std::string& _rowSuffix="", + const std::string& _matPrefix="", const std::string& _matSuffix="") + : matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator), + rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags) + { + int i = int(matSuffix.length())-1; + while (i>=0 && matSuffix[i]!='\n') + { + rowSpacer += ' '; + i--; + } + } + std::string matPrefix, matSuffix; + std::string rowPrefix, rowSuffix, rowSeparator, rowSpacer; + std::string coeffSeparator; + int precision; + int flags; +}; + +/** \class WithFormat + * \ingroup Core_Module + * + * \brief Pseudo expression providing matrix output with given format + * + * \param ExpressionType the type of the object on which IO stream operations are performed + * + * This class represents an expression with stream operators controlled by a given IOFormat. + * It is the return type of DenseBase::format() + * and most of the time this is the only way it is used. + * + * See class IOFormat for some examples. + * + * \sa DenseBase::format(), class IOFormat + */ +template +class WithFormat +{ + public: + + WithFormat(const ExpressionType& matrix, const IOFormat& format) + : m_matrix(matrix), m_format(format) + {} + + friend std::ostream & operator << (std::ostream & s, const WithFormat& wf) + { + return internal::print_matrix(s, wf.m_matrix.eval(), wf.m_format); + } + + protected: + const typename ExpressionType::Nested m_matrix; + IOFormat m_format; +}; + +/** \returns a WithFormat proxy object allowing to print a matrix the with given + * format \a fmt. + * + * See class IOFormat for some examples. + * + * \sa class IOFormat, class WithFormat + */ +template +inline const WithFormat +DenseBase::format(const IOFormat& fmt) const +{ + return WithFormat(derived(), fmt); +} + +namespace internal { + +template +struct significant_decimals_default_impl +{ + typedef typename NumTraits::Real RealScalar; + static inline int run() + { + using std::ceil; + using std::log; + return cast(ceil(-log(NumTraits::epsilon())/log(RealScalar(10)))); + } +}; + +template +struct significant_decimals_default_impl +{ + static inline int run() + { + return 0; + } +}; + +template +struct significant_decimals_impl + : significant_decimals_default_impl::IsInteger> +{}; + +/** \internal + * print the matrix \a _m to the output stream \a s using the output format \a fmt */ +template +std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt) +{ + if(_m.size() == 0) + { + s << fmt.matPrefix << fmt.matSuffix; + return s; + } + + typename Derived::Nested m = _m; + typedef typename Derived::Scalar Scalar; + typedef typename Derived::Index Index; + + Index width = 0; + + std::streamsize explicit_precision; + if(fmt.precision == StreamPrecision) + { + explicit_precision = 0; + } + else if(fmt.precision == FullPrecision) + { + if (NumTraits::IsInteger) + { + explicit_precision = 0; + } + else + { + explicit_precision = significant_decimals_impl::run(); + } + } + else + { + explicit_precision = fmt.precision; + } + + std::streamsize old_precision = 0; + if(explicit_precision) old_precision = s.precision(explicit_precision); + + bool align_cols = !(fmt.flags & DontAlignCols); + if(align_cols) + { + // compute the largest width + for(Index j = 0; j < m.cols(); ++j) + for(Index i = 0; i < m.rows(); ++i) + { + std::stringstream sstr; + sstr.copyfmt(s); + sstr << m.coeff(i,j); + width = std::max(width, Index(sstr.str().length())); + } + } + s << fmt.matPrefix; + for(Index i = 0; i < m.rows(); ++i) + { + if (i) + s << fmt.rowSpacer; + s << fmt.rowPrefix; + if(width) s.width(width); + s << m.coeff(i, 0); + for(Index j = 1; j < m.cols(); ++j) + { + s << fmt.coeffSeparator; + if (width) s.width(width); + s << m.coeff(i, j); + } + s << fmt.rowSuffix; + if( i < m.rows() - 1) + s << fmt.rowSeparator; + } + s << fmt.matSuffix; + if(explicit_precision) s.precision(old_precision); + return s; +} + +} // end namespace internal + +/** \relates DenseBase + * + * Outputs the matrix, to the given stream. + * + * If you wish to print the matrix with a format different than the default, use DenseBase::format(). + * + * It is also possible to change the default format by defining EIGEN_DEFAULT_IO_FORMAT before including Eigen headers. + * If not defined, this will automatically be defined to Eigen::IOFormat(), that is the Eigen::IOFormat with default parameters. + * + * \sa DenseBase::format() + */ +template +std::ostream & operator << +(std::ostream & s, + const DenseBase & m) +{ + return internal::print_matrix(s, m.eval(), EIGEN_DEFAULT_IO_FORMAT); +} + +} // end namespace Eigen + +#endif // EIGEN_IO_H diff --git a/tools/Eigen/src/Core/Map.h b/tools/Eigen/src/Core/Map.h new file mode 100644 index 0000000..f804c89 --- /dev/null +++ b/tools/Eigen/src/Core/Map.h @@ -0,0 +1,192 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2010 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MAP_H +#define EIGEN_MAP_H + +namespace Eigen { + +/** \class Map + * \ingroup Core_Module + * + * \brief A matrix or vector expression mapping an existing array of data. + * + * \tparam PlainObjectType the equivalent matrix type of the mapped data + * \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned. + * The default is \c #Unaligned. + * \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout + * of an ordinary, contiguous array. This can be overridden by specifying strides. + * The type passed here must be a specialization of the Stride template, see examples below. + * + * This class represents a matrix or vector expression mapping an existing array of data. + * It can be used to let Eigen interface without any overhead with non-Eigen data structures, + * such as plain C arrays or structures from other libraries. By default, it assumes that the + * data is laid out contiguously in memory. You can however override this by explicitly specifying + * inner and outer strides. + * + * Here's an example of simply mapping a contiguous array as a \ref TopicStorageOrders "column-major" matrix: + * \include Map_simple.cpp + * Output: \verbinclude Map_simple.out + * + * If you need to map non-contiguous arrays, you can do so by specifying strides: + * + * Here's an example of mapping an array as a vector, specifying an inner stride, that is, the pointer + * increment between two consecutive coefficients. Here, we're specifying the inner stride as a compile-time + * fixed value. + * \include Map_inner_stride.cpp + * Output: \verbinclude Map_inner_stride.out + * + * Here's an example of mapping an array while specifying an outer stride. Here, since we're mapping + * as a column-major matrix, 'outer stride' means the pointer increment between two consecutive columns. + * Here, we're specifying the outer stride as a runtime parameter. Note that here \c OuterStride<> is + * a short version of \c OuterStride because the default template parameter of OuterStride + * is \c Dynamic + * \include Map_outer_stride.cpp + * Output: \verbinclude Map_outer_stride.out + * + * For more details and for an example of specifying both an inner and an outer stride, see class Stride. + * + * \b Tip: to change the array of data mapped by a Map object, you can use the C++ + * placement new syntax: + * + * Example: \include Map_placement_new.cpp + * Output: \verbinclude Map_placement_new.out + * + * This class is the return type of PlainObjectBase::Map() but can also be used directly. + * + * \sa PlainObjectBase::Map(), \ref TopicStorageOrders + */ + +namespace internal { +template +struct traits > + : public traits +{ + typedef traits TraitsBase; + typedef typename PlainObjectType::Index Index; + typedef typename PlainObjectType::Scalar Scalar; + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + HasNoInnerStride = InnerStrideAtCompileTime == 1, + HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, + HasNoStride = HasNoInnerStride && HasNoOuterStride, + IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), + IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, + KeepsPacketAccess = bool(HasNoInnerStride) + && ( bool(IsDynamicSize) + || HasNoOuterStride + || ( OuterStrideAtCompileTime!=Dynamic + && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ), + Flags0 = TraitsBase::Flags & (~NestByRefBit), + Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), + Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) + ? int(Flags1) : int(Flags1 & ~LinearAccessBit), + Flags3 = is_lvalue::value ? int(Flags2) : (int(Flags2) & ~LvalueBit), + Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit) + }; +private: + enum { Options }; // Expressions don't have Options +}; +} + +template class Map + : public MapBase > +{ + public: + + typedef MapBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Map) + + typedef typename Base::PointerType PointerType; +#if EIGEN2_SUPPORT_STAGE <= STAGE30_FULL_EIGEN3_API + typedef const Scalar* PointerArgType; + inline PointerType cast_to_pointer_type(PointerArgType ptr) { return const_cast(ptr); } +#else + typedef PointerType PointerArgType; + inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; } +#endif + + inline Index innerStride() const + { + return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; + } + + inline Index outerStride() const + { + return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() + : IsVectorAtCompileTime ? this->size() + : int(Flags)&RowMajorBit ? this->cols() + : this->rows(); + } + + /** Constructor in the fixed-size case. + * + * \param dataPtr pointer to the array to map + * \param a_stride optional Stride object, passing the strides. + */ + inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr)), m_stride(a_stride) + { + PlainObjectType::Base::_check_template_params(); + } + + /** Constructor in the dynamic-size vector case. + * + * \param dataPtr pointer to the array to map + * \param a_size the size of the vector expression + * \param a_stride optional Stride object, passing the strides. + */ + inline Map(PointerArgType dataPtr, Index a_size, const StrideType& a_stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr), a_size), m_stride(a_stride) + { + PlainObjectType::Base::_check_template_params(); + } + + /** Constructor in the dynamic-size matrix case. + * + * \param dataPtr pointer to the array to map + * \param nbRows the number of rows of the matrix expression + * \param nbCols the number of columns of the matrix expression + * \param a_stride optional Stride object, passing the strides. + */ + inline Map(PointerArgType dataPtr, Index nbRows, Index nbCols, const StrideType& a_stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr), nbRows, nbCols), m_stride(a_stride) + { + PlainObjectType::Base::_check_template_params(); + } + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map) + + protected: + StrideType m_stride; +}; + +template +inline Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> + ::Array(const Scalar *data) +{ + this->_set_noalias(Eigen::Map(data)); +} + +template +inline Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> + ::Matrix(const Scalar *data) +{ + this->_set_noalias(Eigen::Map(data)); +} + +} // end namespace Eigen + +#endif // EIGEN_MAP_H diff --git a/tools/Eigen/src/Core/MapBase.h b/tools/Eigen/src/Core/MapBase.h new file mode 100644 index 0000000..81efc4a --- /dev/null +++ b/tools/Eigen/src/Core/MapBase.h @@ -0,0 +1,251 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2010 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MAPBASE_H +#define EIGEN_MAPBASE_H + +#define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \ + EIGEN_STATIC_ASSERT((int(internal::traits::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \ + YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT) + +namespace Eigen { + +/** \class MapBase + * \ingroup Core_Module + * + * \brief Base class for Map and Block expression with direct access + * + * \sa class Map, class Block + */ +template class MapBase + : public internal::dense_xpr_base::type +{ + public: + + typedef typename internal::dense_xpr_base::type Base; + enum { + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + SizeAtCompileTime = Base::SizeAtCompileTime + }; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + typedef typename internal::conditional< + bool(internal::is_lvalue::value), + Scalar *, + const Scalar *>::type + PointerType; + + using Base::derived; +// using Base::RowsAtCompileTime; +// using Base::ColsAtCompileTime; +// using Base::SizeAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxSizeAtCompileTime; + using Base::IsVectorAtCompileTime; + using Base::Flags; + using Base::IsRowMajor; + + using Base::rows; + using Base::cols; + using Base::size; + using Base::coeff; + using Base::coeffRef; + using Base::lazyAssign; + using Base::eval; + + using Base::innerStride; + using Base::outerStride; + using Base::rowStride; + using Base::colStride; + + // bug 217 - compile error on ICC 11.1 + using Base::operator=; + + typedef typename Base::CoeffReturnType CoeffReturnType; + + inline Index rows() const { return m_rows.value(); } + inline Index cols() const { return m_cols.value(); } + + /** Returns a pointer to the first coefficient of the matrix or vector. + * + * \note When addressing this data, make sure to honor the strides returned by innerStride() and outerStride(). + * + * \sa innerStride(), outerStride() + */ + inline const Scalar* data() const { return m_data; } + + inline const Scalar& coeff(Index rowId, Index colId) const + { + return m_data[colId * colStride() + rowId * rowStride()]; + } + + inline const Scalar& coeff(Index index) const + { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return m_data[index * innerStride()]; + } + + inline const Scalar& coeffRef(Index rowId, Index colId) const + { + return this->m_data[colId * colStride() + rowId * rowStride()]; + } + + inline const Scalar& coeffRef(Index index) const + { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return this->m_data[index * innerStride()]; + } + + template + inline PacketScalar packet(Index rowId, Index colId) const + { + return internal::ploadt + (m_data + (colId * colStride() + rowId * rowStride())); + } + + template + inline PacketScalar packet(Index index) const + { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return internal::ploadt(m_data + index * innerStride()); + } + + explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) + { + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + checkSanity(); + } + + inline MapBase(PointerType dataPtr, Index vecSize) + : m_data(dataPtr), + m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)), + m_cols(ColsAtCompileTime == Dynamic ? vecSize : Index(ColsAtCompileTime)) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + eigen_assert(vecSize >= 0); + eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize); + checkSanity(); + } + + inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) + : m_data(dataPtr), m_rows(nbRows), m_cols(nbCols) + { + eigen_assert( (dataPtr == 0) + || ( nbRows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows) + && nbCols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols))); + checkSanity(); + } + + #ifdef EIGEN_MAPBASE_PLUGIN + #include EIGEN_MAPBASE_PLUGIN + #endif + + protected: + + void checkSanity() const + { + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits::Flags&PacketAccessBit, + internal::inner_stride_at_compile_time::ret==1), + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + eigen_assert(EIGEN_IMPLIES(internal::traits::Flags&AlignedBit, (size_t(m_data) % 16) == 0) + && "input pointer is not aligned on a 16 byte boundary"); + } + + PointerType m_data; + const internal::variable_if_dynamic m_rows; + const internal::variable_if_dynamic m_cols; +}; + +template class MapBase + : public MapBase +{ + typedef MapBase ReadOnlyMapBase; + public: + + typedef MapBase Base; + + typedef typename Base::Scalar Scalar; + typedef typename Base::PacketScalar PacketScalar; + typedef typename Base::Index Index; + typedef typename Base::PointerType PointerType; + + using Base::derived; + using Base::rows; + using Base::cols; + using Base::size; + using Base::coeff; + using Base::coeffRef; + + using Base::innerStride; + using Base::outerStride; + using Base::rowStride; + using Base::colStride; + + typedef typename internal::conditional< + internal::is_lvalue::value, + Scalar, + const Scalar + >::type ScalarWithConstIfNotLvalue; + + inline const Scalar* data() const { return this->m_data; } + inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error + + inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col) + { + return this->m_data[col * colStride() + row * rowStride()]; + } + + inline ScalarWithConstIfNotLvalue& coeffRef(Index index) + { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return this->m_data[index * innerStride()]; + } + + template + inline void writePacket(Index row, Index col, const PacketScalar& val) + { + internal::pstoret + (this->m_data + (col * colStride() + row * rowStride()), val); + } + + template + inline void writePacket(Index index, const PacketScalar& val) + { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + internal::pstoret + (this->m_data + index * innerStride(), val); + } + + explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {} + inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {} + inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {} + + Derived& operator=(const MapBase& other) + { + ReadOnlyMapBase::Base::operator=(other); + return derived(); + } + + // In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base, + // see bugs 821 and 920. + using ReadOnlyMapBase::Base::operator=; +}; + +#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS + +} // end namespace Eigen + +#endif // EIGEN_MAPBASE_H diff --git a/tools/Eigen/src/Core/MathFunctions.h b/tools/Eigen/src/Core/MathFunctions.h new file mode 100644 index 0000000..4e17ecd --- /dev/null +++ b/tools/Eigen/src/Core/MathFunctions.h @@ -0,0 +1,768 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATHFUNCTIONS_H +#define EIGEN_MATHFUNCTIONS_H + +namespace Eigen { + +namespace internal { + +/** \internal \struct global_math_functions_filtering_base + * + * What it does: + * Defines a typedef 'type' as follows: + * - if type T has a member typedef Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl, then + * global_math_functions_filtering_base::type is a typedef for it. + * - otherwise, global_math_functions_filtering_base::type is a typedef for T. + * + * How it's used: + * To allow to defined the global math functions (like sin...) in certain cases, like the Array expressions. + * When you do sin(array1+array2), the object array1+array2 has a complicated expression type, all what you want to know + * is that it inherits ArrayBase. So we implement a partial specialization of sin_impl for ArrayBase. + * So we must make sure to use sin_impl > and not sin_impl, otherwise our partial specialization + * won't be used. How does sin know that? That's exactly what global_math_functions_filtering_base tells it. + * + * How it's implemented: + * SFINAE in the style of enable_if. Highly susceptible of breaking compilers. With GCC, it sure does work, but if you replace + * the typename dummy by an integer template parameter, it doesn't work anymore! + */ + +template +struct global_math_functions_filtering_base +{ + typedef T type; +}; + +template struct always_void { typedef void type; }; + +template +struct global_math_functions_filtering_base + ::type + > +{ + typedef typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl type; +}; + +#define EIGEN_MATHFUNC_IMPL(func, scalar) Eigen::internal::func##_impl::type> +#define EIGEN_MATHFUNC_RETVAL(func, scalar) typename Eigen::internal::func##_retval::type>::type + +/**************************************************************************** +* Implementation of real * +****************************************************************************/ + +template::IsComplex> +struct real_default_impl +{ + typedef typename NumTraits::Real RealScalar; + static inline RealScalar run(const Scalar& x) + { + return x; + } +}; + +template +struct real_default_impl +{ + typedef typename NumTraits::Real RealScalar; + static inline RealScalar run(const Scalar& x) + { + using std::real; + return real(x); + } +}; + +template struct real_impl : real_default_impl {}; + +template +struct real_retval +{ + typedef typename NumTraits::Real type; +}; + + +/**************************************************************************** +* Implementation of imag * +****************************************************************************/ + +template::IsComplex> +struct imag_default_impl +{ + typedef typename NumTraits::Real RealScalar; + static inline RealScalar run(const Scalar&) + { + return RealScalar(0); + } +}; + +template +struct imag_default_impl +{ + typedef typename NumTraits::Real RealScalar; + static inline RealScalar run(const Scalar& x) + { + using std::imag; + return imag(x); + } +}; + +template struct imag_impl : imag_default_impl {}; + +template +struct imag_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of real_ref * +****************************************************************************/ + +template +struct real_ref_impl +{ + typedef typename NumTraits::Real RealScalar; + static inline RealScalar& run(Scalar& x) + { + return reinterpret_cast(&x)[0]; + } + static inline const RealScalar& run(const Scalar& x) + { + return reinterpret_cast(&x)[0]; + } +}; + +template +struct real_ref_retval +{ + typedef typename NumTraits::Real & type; +}; + +/**************************************************************************** +* Implementation of imag_ref * +****************************************************************************/ + +template +struct imag_ref_default_impl +{ + typedef typename NumTraits::Real RealScalar; + static inline RealScalar& run(Scalar& x) + { + return reinterpret_cast(&x)[1]; + } + static inline const RealScalar& run(const Scalar& x) + { + return reinterpret_cast(&x)[1]; + } +}; + +template +struct imag_ref_default_impl +{ + static inline Scalar run(Scalar&) + { + return Scalar(0); + } + static inline const Scalar run(const Scalar&) + { + return Scalar(0); + } +}; + +template +struct imag_ref_impl : imag_ref_default_impl::IsComplex> {}; + +template +struct imag_ref_retval +{ + typedef typename NumTraits::Real & type; +}; + +/**************************************************************************** +* Implementation of conj * +****************************************************************************/ + +template::IsComplex> +struct conj_impl +{ + static inline Scalar run(const Scalar& x) + { + return x; + } +}; + +template +struct conj_impl +{ + static inline Scalar run(const Scalar& x) + { + using std::conj; + return conj(x); + } +}; + +template +struct conj_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** +* Implementation of abs2 * +****************************************************************************/ + +template +struct abs2_impl +{ + typedef typename NumTraits::Real RealScalar; + static inline RealScalar run(const Scalar& x) + { + return x*x; + } +}; + +template +struct abs2_impl > +{ + static inline RealScalar run(const std::complex& x) + { + return real(x)*real(x) + imag(x)*imag(x); + } +}; + +template +struct abs2_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of norm1 * +****************************************************************************/ + +template +struct norm1_default_impl +{ + typedef typename NumTraits::Real RealScalar; + static inline RealScalar run(const Scalar& x) + { + using std::abs; + return abs(real(x)) + abs(imag(x)); + } +}; + +template +struct norm1_default_impl +{ + static inline Scalar run(const Scalar& x) + { + using std::abs; + return abs(x); + } +}; + +template +struct norm1_impl : norm1_default_impl::IsComplex> {}; + +template +struct norm1_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of hypot * +****************************************************************************/ + +template +struct hypot_impl +{ + typedef typename NumTraits::Real RealScalar; + static inline RealScalar run(const Scalar& x, const Scalar& y) + { + using std::max; + using std::min; + using std::abs; + using std::sqrt; + RealScalar _x = abs(x); + RealScalar _y = abs(y); + RealScalar p = (max)(_x, _y); + if(p==RealScalar(0)) return RealScalar(0); + RealScalar q = (min)(_x, _y); + RealScalar qp = q/p; + return p * sqrt(RealScalar(1) + qp*qp); + } +}; + +template +struct hypot_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of cast * +****************************************************************************/ + +template +struct cast_impl +{ + static inline NewType run(const OldType& x) + { + return static_cast(x); + } +}; + +// here, for once, we're plainly returning NewType: we don't want cast to do weird things. + +template +inline NewType cast(const OldType& x) +{ + return cast_impl::run(x); +} + +/**************************************************************************** +* Implementation of atanh2 * +****************************************************************************/ + +template +struct atanh2_default_impl +{ + typedef Scalar retval; + typedef typename NumTraits::Real RealScalar; + static inline Scalar run(const Scalar& x, const Scalar& y) + { + using std::abs; + using std::log; + using std::sqrt; + Scalar z = x / y; + if (y == Scalar(0) || abs(z) > sqrt(NumTraits::epsilon())) + return RealScalar(0.5) * log((y + x) / (y - x)); + else + return z + z*z*z / RealScalar(3); + } +}; + +template +struct atanh2_default_impl +{ + static inline Scalar run(const Scalar&, const Scalar&) + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + return Scalar(0); + } +}; + +template +struct atanh2_impl : atanh2_default_impl::IsInteger> {}; + +template +struct atanh2_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** +* Implementation of pow * +****************************************************************************/ + +template +struct pow_default_impl +{ + typedef Scalar retval; + static inline Scalar run(const Scalar& x, const Scalar& y) + { + using std::pow; + return pow(x, y); + } +}; + +template +struct pow_default_impl +{ + static inline Scalar run(Scalar x, Scalar y) + { + Scalar res(1); + eigen_assert(!NumTraits::IsSigned || y >= 0); + if(y & 1) res *= x; + y >>= 1; + while(y) + { + x *= x; + if(y&1) res *= x; + y >>= 1; + } + return res; + } +}; + +template +struct pow_impl : pow_default_impl::IsInteger> {}; + +template +struct pow_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** +* Implementation of random * +****************************************************************************/ + +template +struct random_default_impl {}; + +template +struct random_impl : random_default_impl::IsComplex, NumTraits::IsInteger> {}; + +template +struct random_retval +{ + typedef Scalar type; +}; + +template inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y); +template inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(); + +template +struct random_default_impl +{ + static inline Scalar run(const Scalar& x, const Scalar& y) + { + return x + (y-x) * Scalar(std::rand()) / Scalar(RAND_MAX); + } + static inline Scalar run() + { + return run(Scalar(NumTraits::IsSigned ? -1 : 0), Scalar(1)); + } +}; + +enum { + floor_log2_terminate, + floor_log2_move_up, + floor_log2_move_down, + floor_log2_bogus +}; + +template struct floor_log2_selector +{ + enum { middle = (lower + upper) / 2, + value = (upper <= lower + 1) ? int(floor_log2_terminate) + : (n < (1 << middle)) ? int(floor_log2_move_down) + : (n==0) ? int(floor_log2_bogus) + : int(floor_log2_move_up) + }; +}; + +template::value> +struct floor_log2 {}; + +template +struct floor_log2 +{ + enum { value = floor_log2::middle>::value }; +}; + +template +struct floor_log2 +{ + enum { value = floor_log2::middle, upper>::value }; +}; + +template +struct floor_log2 +{ + enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower }; +}; + +template +struct floor_log2 +{ + // no value, error at compile time +}; + +template +struct random_default_impl +{ + typedef typename NumTraits::NonInteger NonInteger; + + static inline Scalar run(const Scalar& x, const Scalar& y) + { + return x + Scalar((NonInteger(y)-x+1) * std::rand() / (RAND_MAX + NonInteger(1))); + } + + static inline Scalar run() + { +#ifdef EIGEN_MAKING_DOCS + return run(Scalar(NumTraits::IsSigned ? -10 : 0), Scalar(10)); +#else + enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value, + scalar_bits = sizeof(Scalar) * CHAR_BIT, + shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)), + offset = NumTraits::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0 + }; + return Scalar((std::rand() >> shift) - offset); +#endif + } +}; + +template +struct random_default_impl +{ + static inline Scalar run(const Scalar& x, const Scalar& y) + { + return Scalar(random(real(x), real(y)), + random(imag(x), imag(y))); + } + static inline Scalar run() + { + typedef typename NumTraits::Real RealScalar; + return Scalar(random(), random()); + } +}; + +template +inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y) +{ + return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y); +} + +template +inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() +{ + return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(); +} + +} // end namespace internal + +/**************************************************************************** +* Generic math function * +****************************************************************************/ + +namespace numext { + +template +inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x); +} + +template +inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x) +{ + return internal::real_ref_impl::run(x); +} + +template +inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x); +} + +template +inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x); +} + +template +inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x) +{ + return internal::imag_ref_impl::run(x); +} + +template +inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x); +} + +template +inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x); +} + +template +inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x); +} + +template +inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x); +} + +template +inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y) +{ + return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y); +} + +template +inline EIGEN_MATHFUNC_RETVAL(atanh2, Scalar) atanh2(const Scalar& x, const Scalar& y) +{ + return EIGEN_MATHFUNC_IMPL(atanh2, Scalar)::run(x, y); +} + +template +inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) +{ + return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); +} + +// std::isfinite is non standard, so let's define our own version, +// even though it is not very efficient. +template bool (isfinite)(const T& x) +{ + return x::highest() && x>NumTraits::lowest(); +} + +} // end namespace numext + +namespace internal { + +/**************************************************************************** +* Implementation of fuzzy comparisons * +****************************************************************************/ + +template +struct scalar_fuzzy_default_impl {}; + +template +struct scalar_fuzzy_default_impl +{ + typedef typename NumTraits::Real RealScalar; + template + static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) + { + using std::abs; + return abs(x) <= abs(y) * prec; + } + static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) + { + using std::min; + using std::abs; + return abs(x - y) <= (min)(abs(x), abs(y)) * prec; + } + static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec) + { + return x <= y || isApprox(x, y, prec); + } +}; + +template +struct scalar_fuzzy_default_impl +{ + typedef typename NumTraits::Real RealScalar; + template + static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&) + { + return x == Scalar(0); + } + static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&) + { + return x == y; + } + static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&) + { + return x <= y; + } +}; + +template +struct scalar_fuzzy_default_impl +{ + typedef typename NumTraits::Real RealScalar; + template + static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) + { + return numext::abs2(x) <= numext::abs2(y) * prec * prec; + } + static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) + { + using std::min; + return numext::abs2(x - y) <= (min)(numext::abs2(x), numext::abs2(y)) * prec * prec; + } +}; + +template +struct scalar_fuzzy_impl : scalar_fuzzy_default_impl::IsComplex, NumTraits::IsInteger> {}; + +template +inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, + const typename NumTraits::Real &precision = NumTraits::dummy_precision()) +{ + return scalar_fuzzy_impl::template isMuchSmallerThan(x, y, precision); +} + +template +inline bool isApprox(const Scalar& x, const Scalar& y, + const typename NumTraits::Real &precision = NumTraits::dummy_precision()) +{ + return scalar_fuzzy_impl::isApprox(x, y, precision); +} + +template +inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, + const typename NumTraits::Real &precision = NumTraits::dummy_precision()) +{ + return scalar_fuzzy_impl::isApproxOrLessThan(x, y, precision); +} + +/****************************************** +*** The special case of the bool type *** +******************************************/ + +template<> struct random_impl +{ + static inline bool run() + { + return random(0,1)==0 ? false : true; + } +}; + +template<> struct scalar_fuzzy_impl +{ + typedef bool RealScalar; + + template + static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&) + { + return !x; + } + + static inline bool isApprox(bool x, bool y, bool) + { + return x == y; + } + + static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&) + { + return (!x) || y; + } + +}; + + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MATHFUNCTIONS_H diff --git a/tools/Eigen/src/Core/Matrix.h b/tools/Eigen/src/Core/Matrix.h new file mode 100644 index 0000000..02be142 --- /dev/null +++ b/tools/Eigen/src/Core/Matrix.h @@ -0,0 +1,420 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIX_H +#define EIGEN_MATRIX_H + +namespace Eigen { + +/** \class Matrix + * \ingroup Core_Module + * + * \brief The matrix class, also used for vectors and row-vectors + * + * The %Matrix class is the work-horse for all \em dense (\ref dense "note") matrices and vectors within Eigen. + * Vectors are matrices with one column, and row-vectors are matrices with one row. + * + * The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note"). + * + * The first three template parameters are required: + * \tparam _Scalar \anchor matrix_tparam_scalar Numeric type, e.g. float, double, int or std::complex. + * User defined sclar types are supported as well (see \ref user_defined_scalars "here"). + * \tparam _Rows Number of rows, or \b Dynamic + * \tparam _Cols Number of columns, or \b Dynamic + * + * The remaining template parameters are optional -- in most cases you don't have to worry about them. + * \tparam _Options \anchor matrix_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either + * \b #AutoAlign or \b #DontAlign. + * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required + * for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size. + * \tparam _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note"). + * \tparam _MaxCols Maximum number of columns. Defaults to \a _Cols (\ref maxrows "note"). + * + * Eigen provides a number of typedefs covering the usual cases. Here are some examples: + * + * \li \c Matrix2d is a 2x2 square matrix of doubles (\c Matrix) + * \li \c Vector4f is a vector of 4 floats (\c Matrix) + * \li \c RowVector3i is a row-vector of 3 ints (\c Matrix) + * + * \li \c MatrixXf is a dynamic-size matrix of floats (\c Matrix) + * \li \c VectorXf is a dynamic-size vector of floats (\c Matrix) + * + * \li \c Matrix2Xf is a partially fixed-size (dynamic-size) matrix of floats (\c Matrix) + * \li \c MatrixX3d is a partially dynamic-size (fixed-size) matrix of double (\c Matrix) + * + * See \link matrixtypedefs this page \endlink for a complete list of predefined \em %Matrix and \em Vector typedefs. + * + * You can access elements of vectors and matrices using normal subscripting: + * + * \code + * Eigen::VectorXd v(10); + * v[0] = 0.1; + * v[1] = 0.2; + * v(0) = 0.3; + * v(1) = 0.4; + * + * Eigen::MatrixXi m(10, 10); + * m(0, 1) = 1; + * m(0, 2) = 2; + * m(0, 3) = 3; + * \endcode + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN. + * + * Some notes: + * + *
+ *
\anchor dense Dense versus sparse:
+ *
This %Matrix class handles dense, not sparse matrices and vectors. For sparse matrices and vectors, see the Sparse module. + * + * Dense matrices and vectors are plain usual arrays of coefficients. All the coefficients are stored, in an ordinary contiguous array. + * This is unlike Sparse matrices and vectors where the coefficients are stored as a list of nonzero coefficients.
+ * + *
\anchor fixedsize Fixed-size versus dynamic-size:
+ *
Fixed-size means that the numbers of rows and columns are known are compile-time. In this case, Eigen allocates the array + * of coefficients as a fixed-size array, as a class member. This makes sense for very small matrices, typically up to 4x4, sometimes up + * to 16x16. Larger matrices should be declared as dynamic-size even if one happens to know their size at compile-time. + * + * Dynamic-size means that the numbers of rows or columns are not necessarily known at compile-time. In this case they are runtime + * variables, and the array of coefficients is allocated dynamically on the heap. + * + * Note that \em dense matrices, be they Fixed-size or Dynamic-size, do not expand dynamically in the sense of a std::map. + * If you want this behavior, see the Sparse module.
+ * + *
\anchor maxrows _MaxRows and _MaxCols:
+ *
In most cases, one just leaves these parameters to the default values. + * These parameters mean the maximum size of rows and columns that the matrix may have. They are useful in cases + * when the exact numbers of rows and columns are not known are compile-time, but it is known at compile-time that they cannot + * exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case _MaxRows and _MaxCols + * are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.
+ *
+ * + * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy, + * \ref TopicStorageOrders + */ + +namespace internal { +template +struct traits > +{ + typedef _Scalar Scalar; + typedef Dense StorageKind; + typedef DenseIndex Index; + typedef MatrixXpr XprKind; + enum { + RowsAtCompileTime = _Rows, + ColsAtCompileTime = _Cols, + MaxRowsAtCompileTime = _MaxRows, + MaxColsAtCompileTime = _MaxCols, + Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, + CoeffReadCost = NumTraits::ReadCost, + Options = _Options, + InnerStrideAtCompileTime = 1, + OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime + }; +}; +} + +template +class Matrix + : public PlainObjectBase > +{ + public: + + /** \brief Base class typedef. + * \sa PlainObjectBase + */ + typedef PlainObjectBase Base; + + enum { Options = _Options }; + + EIGEN_DENSE_PUBLIC_INTERFACE(Matrix) + + typedef typename Base::PlainObject PlainObject; + + using Base::base; + using Base::coeffRef; + + /** + * \brief Assigns matrices to each other. + * + * \note This is a special case of the templated operator=. Its purpose is + * to prevent a default operator= from hiding the templated operator=. + * + * \callgraph + */ + EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other) + { + return Base::_set(other); + } + + /** \internal + * \brief Copies the value of the expression \a other into \c *this with automatic resizing. + * + * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), + * it will be initialized. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase& other) + { + return Base::_set(other); + } + + /* Here, doxygen failed to copy the brief information when using \copydoc */ + + /** + * \brief Copies the generic expression \a other into *this. + * \copydetails DenseBase::operator=(const EigenBase &other) + */ + template + EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase &other) + { + return Base::operator=(other); + } + + template + EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue& func) + { + return Base::operator=(func); + } + + /** \brief Default constructor. + * + * For fixed-size matrices, does nothing. + * + * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix + * is called a null matrix. This constructor is the unique way to create null matrices: resizing + * a matrix to 0 is not supported. + * + * \sa resize(Index,Index) + */ + EIGEN_STRONG_INLINE Matrix() : Base() + { + Base::_check_template_params(); + EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + + // FIXME is it still needed + Matrix(internal::constructor_without_unaligned_array_assert) + : Base(internal::constructor_without_unaligned_array_assert()) + { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } + +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + Matrix(Matrix&& other) + : Base(std::move(other)) + { + Base::_check_template_params(); + if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic) + Base::_set_noalias(other); + } + Matrix& operator=(Matrix&& other) + { + other.swap(*this); + return *this; + } +#endif + + /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors + * + * Note that this is only useful for dynamic-size vectors. For fixed-size vectors, + * it is redundant to pass the dimension here, so it makes more sense to use the default + * constructor Matrix() instead. + */ + EIGEN_STRONG_INLINE explicit Matrix(Index dim) + : Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim) + { + Base::_check_template_params(); + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix) + eigen_assert(dim >= 0); + eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim); + EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y) + { + Base::_check_template_params(); + Base::template _init2(x, y); + } + #else + /** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns. + * + * This is useful for dynamic-size matrices. For fixed-size matrices, + * it is redundant to pass these parameters, so one should use the default constructor + * Matrix() instead. */ + Matrix(Index rows, Index cols); + /** \brief Constructs an initialized 2D vector with given coefficients */ + Matrix(const Scalar& x, const Scalar& y); + #endif + + /** \brief Constructs an initialized 3D vector with given coefficients */ + EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z) + { + Base::_check_template_params(); + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3) + m_storage.data()[0] = x; + m_storage.data()[1] = y; + m_storage.data()[2] = z; + } + /** \brief Constructs an initialized 4D vector with given coefficients */ + EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w) + { + Base::_check_template_params(); + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4) + m_storage.data()[0] = x; + m_storage.data()[1] = y; + m_storage.data()[2] = z; + m_storage.data()[3] = w; + } + + explicit Matrix(const Scalar *data); + + /** \brief Constructor copying the value of the expression \a other */ + template + EIGEN_STRONG_INLINE Matrix(const MatrixBase& other) + : Base(other.rows() * other.cols(), other.rows(), other.cols()) + { + // This test resides here, to bring the error messages closer to the user. Normally, these checks + // are performed deeply within the library, thus causing long and scary error traces. + EIGEN_STATIC_ASSERT((internal::is_same::value), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + + Base::_check_template_params(); + Base::_set_noalias(other); + } + /** \brief Copy constructor */ + EIGEN_STRONG_INLINE Matrix(const Matrix& other) + : Base(other.rows() * other.cols(), other.rows(), other.cols()) + { + Base::_check_template_params(); + Base::_set_noalias(other); + } + /** \brief Copy constructor with in-place evaluation */ + template + EIGEN_STRONG_INLINE Matrix(const ReturnByValue& other) + { + Base::_check_template_params(); + Base::resize(other.rows(), other.cols()); + other.evalTo(*this); + } + + /** \brief Copy constructor for generic expressions. + * \sa MatrixBase::operator=(const EigenBase&) + */ + template + EIGEN_STRONG_INLINE Matrix(const EigenBase &other) + : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) + { + Base::_check_template_params(); + Base::_resize_to_match(other); + // FIXME/CHECK: isn't *this = other.derived() more efficient. it allows to + // go for pure _set() implementations, right? + *this = other; + } + + /** \internal + * \brief Override MatrixBase::swap() since for dynamic-sized matrices + * of same type it is enough to swap the data pointers. + */ + template + void swap(MatrixBase const & other) + { this->_swap(other.derived()); } + + inline Index innerStride() const { return 1; } + inline Index outerStride() const { return this->innerSize(); } + + /////////// Geometry module /////////// + + template + explicit Matrix(const RotationBase& r); + template + Matrix& operator=(const RotationBase& r); + + #ifdef EIGEN2_SUPPORT + template + explicit Matrix(const eigen2_RotationBase& r); + template + Matrix& operator=(const eigen2_RotationBase& r); + #endif + + // allow to extend Matrix outside Eigen + #ifdef EIGEN_MATRIX_PLUGIN + #include EIGEN_MATRIX_PLUGIN + #endif + + protected: + template + friend struct internal::conservative_resize_like_impl; + + using Base::m_storage; +}; + +/** \defgroup matrixtypedefs Global matrix typedefs + * + * \ingroup Core_Module + * + * Eigen defines several typedef shortcuts for most common matrix and vector types. + * + * The general patterns are the following: + * + * \c MatrixSizeType where \c Size can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size, + * and where \c Type can be \c i for integer, \c f for float, \c d for double, \c cf for complex float, \c cd + * for complex double. + * + * For example, \c Matrix3d is a fixed-size 3x3 matrix type of doubles, and \c MatrixXf is a dynamic-size matrix of floats. + * + * There are also \c VectorSizeType and \c RowVectorSizeType which are self-explanatory. For example, \c Vector4cf is + * a fixed-size vector of 4 complex floats. + * + * \sa class Matrix + */ + +#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Matrix##SizeSuffix##TypeSuffix; \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Vector##SizeSuffix##TypeSuffix; \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix RowVector##SizeSuffix##TypeSuffix; + +#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Matrix##Size##X##TypeSuffix; \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Matrix##X##Size##TypeSuffix; + +#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \ +EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \ +EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \ +EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \ +EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \ +EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \ +EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \ +EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 4) + +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int, i) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float, f) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(double, d) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex, cf) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex, cd) + +#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES +#undef EIGEN_MAKE_TYPEDEFS +#undef EIGEN_MAKE_FIXED_TYPEDEFS + +} // end namespace Eigen + +#endif // EIGEN_MATRIX_H diff --git a/tools/Eigen/src/Core/MatrixBase.h b/tools/Eigen/src/Core/MatrixBase.h new file mode 100644 index 0000000..e83ef4d --- /dev/null +++ b/tools/Eigen/src/Core/MatrixBase.h @@ -0,0 +1,563 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2009 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIXBASE_H +#define EIGEN_MATRIXBASE_H + +namespace Eigen { + +/** \class MatrixBase + * \ingroup Core_Module + * + * \brief Base class for all dense matrices, vectors, and expressions + * + * This class is the base that is inherited by all matrix, vector, and related expression + * types. Most of the Eigen API is contained in this class, and its base classes. Other important + * classes for the Eigen API are Matrix, and VectorwiseOp. + * + * Note that some methods are defined in other modules such as the \ref LU_Module LU module + * for all functions related to matrix inversions. + * + * \tparam Derived is the derived type, e.g. a matrix type, or an expression, etc. + * + * When writing a function taking Eigen objects as argument, if you want your function + * to take as argument any matrix, vector, or expression, just let it take a + * MatrixBase argument. As an example, here is a function printFirstRow which, given + * a matrix, vector, or expression \a x, prints the first row of \a x. + * + * \code + template + void printFirstRow(const Eigen::MatrixBase& x) + { + cout << x.row(0) << endl; + } + * \endcode + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN. + * + * \sa \ref TopicClassHierarchy + */ +template class MatrixBase + : public DenseBase +{ + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + typedef MatrixBase StorageBaseType; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + + typedef DenseBase Base; + using Base::RowsAtCompileTime; + using Base::ColsAtCompileTime; + using Base::SizeAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxSizeAtCompileTime; + using Base::IsVectorAtCompileTime; + using Base::Flags; + using Base::CoeffReadCost; + + using Base::derived; + using Base::const_cast_derived; + using Base::rows; + using Base::cols; + using Base::size; + using Base::coeff; + using Base::coeffRef; + using Base::lazyAssign; + using Base::eval; + using Base::operator+=; + using Base::operator-=; + using Base::operator*=; + using Base::operator/=; + + typedef typename Base::CoeffReturnType CoeffReturnType; + typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType; + typedef typename Base::RowXpr RowXpr; + typedef typename Base::ColXpr ColXpr; +#endif // not EIGEN_PARSED_BY_DOXYGEN + + + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** type of the equivalent square matrix */ + typedef Matrix SquareMatrixType; +#endif // not EIGEN_PARSED_BY_DOXYGEN + + /** \returns the size of the main diagonal, which is min(rows(),cols()). + * \sa rows(), cols(), SizeAtCompileTime. */ + inline Index diagonalSize() const { return (std::min)(rows(),cols()); } + + /** \brief The plain matrix type corresponding to this expression. + * + * This is not necessarily exactly the return type of eval(). In the case of plain matrices, + * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed + * that the return type of eval() is either PlainObject or const PlainObject&. + */ + typedef Matrix::Scalar, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime + > PlainObject; + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal Represents a matrix with all coefficients equal to one another*/ + typedef CwiseNullaryOp,Derived> ConstantReturnType; + /** \internal the return type of MatrixBase::adjoint() */ + typedef typename internal::conditional::IsComplex, + CwiseUnaryOp, ConstTransposeReturnType>, + ConstTransposeReturnType + >::type AdjointReturnType; + /** \internal Return type of eigenvalues() */ + typedef Matrix, internal::traits::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType; + /** \internal the return type of identity */ + typedef CwiseNullaryOp,Derived> IdentityReturnType; + /** \internal the return type of unit vectors */ + typedef Block, SquareMatrixType>, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime> BasisReturnType; +#endif // not EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase +# include "../plugins/CommonCwiseUnaryOps.h" +# include "../plugins/CommonCwiseBinaryOps.h" +# include "../plugins/MatrixCwiseUnaryOps.h" +# include "../plugins/MatrixCwiseBinaryOps.h" +# ifdef EIGEN_MATRIXBASE_PLUGIN +# include EIGEN_MATRIXBASE_PLUGIN +# endif +#undef EIGEN_CURRENT_STORAGE_BASE_CLASS + + /** Special case of the template operator=, in order to prevent the compiler + * from generating a default operator= (issue hit with g++ 4.1) + */ + Derived& operator=(const MatrixBase& other); + + // We cannot inherit here via Base::operator= since it is causing + // trouble with MSVC. + + template + Derived& operator=(const DenseBase& other); + + template + Derived& operator=(const EigenBase& other); + + template + Derived& operator=(const ReturnByValue& other); + + template + Derived& lazyAssign(const ProductBase& other); + + template + Derived& lazyAssign(const MatrixPowerProduct& other); + + template + Derived& operator+=(const MatrixBase& other); + template + Derived& operator-=(const MatrixBase& other); + + template + const typename ProductReturnType::Type + operator*(const MatrixBase &other) const; + + template + const typename LazyProductReturnType::Type + lazyProduct(const MatrixBase &other) const; + + template + Derived& operator*=(const EigenBase& other); + + template + void applyOnTheLeft(const EigenBase& other); + + template + void applyOnTheRight(const EigenBase& other); + + template + const DiagonalProduct + operator*(const DiagonalBase &diagonal) const; + + template + typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType + dot(const MatrixBase& other) const; + + #ifdef EIGEN2_SUPPORT + template + Scalar eigen2_dot(const MatrixBase& other) const; + #endif + + RealScalar squaredNorm() const; + RealScalar norm() const; + RealScalar stableNorm() const; + RealScalar blueNorm() const; + RealScalar hypotNorm() const; + const PlainObject normalized() const; + void normalize(); + + const AdjointReturnType adjoint() const; + void adjointInPlace(); + + typedef Diagonal DiagonalReturnType; + DiagonalReturnType diagonal(); + typedef typename internal::add_const >::type ConstDiagonalReturnType; + ConstDiagonalReturnType diagonal() const; + + template struct DiagonalIndexReturnType { typedef Diagonal Type; }; + template struct ConstDiagonalIndexReturnType { typedef const Diagonal Type; }; + + template typename DiagonalIndexReturnType::Type diagonal(); + template typename ConstDiagonalIndexReturnType::Type diagonal() const; + + typedef Diagonal DiagonalDynamicIndexReturnType; + typedef typename internal::add_const >::type ConstDiagonalDynamicIndexReturnType; + + DiagonalDynamicIndexReturnType diagonal(Index index); + ConstDiagonalDynamicIndexReturnType diagonal(Index index) const; + + #ifdef EIGEN2_SUPPORT + template typename internal::eigen2_part_return_type::type part(); + template const typename internal::eigen2_part_return_type::type part() const; + + // huuuge hack. make Eigen2's matrix.part() work in eigen3. Problem: Diagonal is now a class template instead + // of an integer constant. Solution: overload the part() method template wrt template parameters list. + template class U> + const DiagonalWrapper part() const + { return diagonal().asDiagonal(); } + #endif // EIGEN2_SUPPORT + + template struct TriangularViewReturnType { typedef TriangularView Type; }; + template struct ConstTriangularViewReturnType { typedef const TriangularView Type; }; + + template typename TriangularViewReturnType::Type triangularView(); + template typename ConstTriangularViewReturnType::Type triangularView() const; + + template struct SelfAdjointViewReturnType { typedef SelfAdjointView Type; }; + template struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView Type; }; + + template typename SelfAdjointViewReturnType::Type selfadjointView(); + template typename ConstSelfAdjointViewReturnType::Type selfadjointView() const; + + const SparseView sparseView(const Scalar& m_reference = Scalar(0), + const typename NumTraits::Real& m_epsilon = NumTraits::dummy_precision()) const; + static const IdentityReturnType Identity(); + static const IdentityReturnType Identity(Index rows, Index cols); + static const BasisReturnType Unit(Index size, Index i); + static const BasisReturnType Unit(Index i); + static const BasisReturnType UnitX(); + static const BasisReturnType UnitY(); + static const BasisReturnType UnitZ(); + static const BasisReturnType UnitW(); + + const DiagonalWrapper asDiagonal() const; + const PermutationWrapper asPermutation() const; + + Derived& setIdentity(); + Derived& setIdentity(Index rows, Index cols); + + bool isIdentity(const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isDiagonal(const RealScalar& prec = NumTraits::dummy_precision()) const; + + bool isUpperTriangular(const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isLowerTriangular(const RealScalar& prec = NumTraits::dummy_precision()) const; + + template + bool isOrthogonal(const MatrixBase& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isUnitary(const RealScalar& prec = NumTraits::dummy_precision()) const; + + /** \returns true if each coefficients of \c *this and \a other are all exactly equal. + * \warning When using floating point scalar values you probably should rather use a + * fuzzy comparison such as isApprox() + * \sa isApprox(), operator!= */ + template + inline bool operator==(const MatrixBase& other) const + { return cwiseEqual(other).all(); } + + /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other. + * \warning When using floating point scalar values you probably should rather use a + * fuzzy comparison such as isApprox() + * \sa isApprox(), operator== */ + template + inline bool operator!=(const MatrixBase& other) const + { return cwiseNotEqual(other).any(); } + + NoAlias noalias(); + + inline const ForceAlignedAccess forceAlignedAccess() const; + inline ForceAlignedAccess forceAlignedAccess(); + template inline typename internal::add_const_on_value_type,Derived&>::type>::type forceAlignedAccessIf() const; + template inline typename internal::conditional,Derived&>::type forceAlignedAccessIf(); + + Scalar trace() const; + +/////////// Array module /////////// + + template RealScalar lpNorm() const; + + MatrixBase& matrix() { return *this; } + const MatrixBase& matrix() const { return *this; } + + /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix + * \sa ArrayBase::matrix() */ + ArrayWrapper array() { return derived(); } + const ArrayWrapper array() const { return derived(); } + +/////////// LU module /////////// + + const FullPivLU fullPivLu() const; + const PartialPivLU partialPivLu() const; + + #if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS + const LU lu() const; + #endif + + #ifdef EIGEN2_SUPPORT + const LU eigen2_lu() const; + #endif + + #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS + const PartialPivLU lu() const; + #endif + + #ifdef EIGEN2_SUPPORT + template + void computeInverse(MatrixBase *result) const { + *result = this->inverse(); + } + #endif + + const internal::inverse_impl inverse() const; + template + void computeInverseAndDetWithCheck( + ResultType& inverse, + typename ResultType::Scalar& determinant, + bool& invertible, + const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() + ) const; + template + void computeInverseWithCheck( + ResultType& inverse, + bool& invertible, + const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() + ) const; + Scalar determinant() const; + +/////////// Cholesky module /////////// + + const LLT llt() const; + const LDLT ldlt() const; + +/////////// QR module /////////// + + const HouseholderQR householderQr() const; + const ColPivHouseholderQR colPivHouseholderQr() const; + const FullPivHouseholderQR fullPivHouseholderQr() const; + + #ifdef EIGEN2_SUPPORT + const QR qr() const; + #endif + + EigenvaluesReturnType eigenvalues() const; + RealScalar operatorNorm() const; + +/////////// SVD module /////////// + + JacobiSVD jacobiSvd(unsigned int computationOptions = 0) const; + + #ifdef EIGEN2_SUPPORT + SVD svd() const; + #endif + +/////////// Geometry module /////////// + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /// \internal helper struct to form the return type of the cross product + template struct cross_product_return_type { + typedef typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType Scalar; + typedef Matrix type; + }; + #endif // EIGEN_PARSED_BY_DOXYGEN + template + typename cross_product_return_type::type + cross(const MatrixBase& other) const; + template + PlainObject cross3(const MatrixBase& other) const; + PlainObject unitOrthogonal(void) const; + Matrix eulerAngles(Index a0, Index a1, Index a2) const; + + #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS + ScalarMultipleReturnType operator*(const UniformScaling& s) const; + // put this as separate enum value to work around possible GCC 4.3 bug (?) + enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal }; + typedef Homogeneous HomogeneousReturnType; + HomogeneousReturnType homogeneous() const; + #endif + + enum { + SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1 + }; + typedef Block::ColsAtCompileTime==1 ? SizeMinusOne : 1, + internal::traits::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne; + typedef CwiseUnaryOp::Scalar>, + const ConstStartMinusOne > HNormalizedReturnType; + + const HNormalizedReturnType hnormalized() const; + +////////// Householder module /////////// + + void makeHouseholderInPlace(Scalar& tau, RealScalar& beta); + template + void makeHouseholder(EssentialPart& essential, + Scalar& tau, RealScalar& beta) const; + template + void applyHouseholderOnTheLeft(const EssentialPart& essential, + const Scalar& tau, + Scalar* workspace); + template + void applyHouseholderOnTheRight(const EssentialPart& essential, + const Scalar& tau, + Scalar* workspace); + +///////// Jacobi module ///////// + + template + void applyOnTheLeft(Index p, Index q, const JacobiRotation& j); + template + void applyOnTheRight(Index p, Index q, const JacobiRotation& j); + +///////// SparseCore module ///////// + + template + EIGEN_STRONG_INLINE const typename SparseMatrixBase::template CwiseProductDenseReturnType::Type + cwiseProduct(const SparseMatrixBase &other) const + { + return other.cwiseProduct(derived()); + } + +///////// MatrixFunctions module ///////// + + typedef typename internal::stem_function::type StemFunction; + const MatrixExponentialReturnValue exp() const; + const MatrixFunctionReturnValue matrixFunction(StemFunction f) const; + const MatrixFunctionReturnValue cosh() const; + const MatrixFunctionReturnValue sinh() const; + const MatrixFunctionReturnValue cos() const; + const MatrixFunctionReturnValue sin() const; + const MatrixSquareRootReturnValue sqrt() const; + const MatrixLogarithmReturnValue log() const; + const MatrixPowerReturnValue pow(const RealScalar& p) const; + +#ifdef EIGEN2_SUPPORT + template + Derived& operator+=(const Flagged, 0, + EvalBeforeAssigningBit>& other); + + template + Derived& operator-=(const Flagged, 0, + EvalBeforeAssigningBit>& other); + + /** \deprecated because .lazy() is deprecated + * Overloaded for cache friendly product evaluation */ + template + Derived& lazyAssign(const Flagged& other) + { return lazyAssign(other._expression()); } + + template + const Flagged marked() const; + const Flagged lazy() const; + + inline const Cwise cwise() const; + inline Cwise cwise(); + + VectorBlock start(Index size); + const VectorBlock start(Index size) const; + VectorBlock end(Index size); + const VectorBlock end(Index size) const; + template VectorBlock start(); + template const VectorBlock start() const; + template VectorBlock end(); + template const VectorBlock end() const; + + Minor minor(Index row, Index col); + const Minor minor(Index row, Index col) const; +#endif + + protected: + MatrixBase() : Base() {} + + private: + explicit MatrixBase(int); + MatrixBase(int,int); + template explicit MatrixBase(const MatrixBase&); + protected: + // mixing arrays and matrices is not legal + template Derived& operator+=(const ArrayBase& ) + {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;} + // mixing arrays and matrices is not legal + template Derived& operator-=(const ArrayBase& ) + {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;} +}; + + +/*************************************************************************** +* Implementation of matrix base methods +***************************************************************************/ + +/** replaces \c *this by \c *this * \a other. + * + * \returns a reference to \c *this + * + * Example: \include MatrixBase_applyOnTheRight.cpp + * Output: \verbinclude MatrixBase_applyOnTheRight.out + */ +template +template +inline Derived& +MatrixBase::operator*=(const EigenBase &other) +{ + other.derived().applyThisOnTheRight(derived()); + return derived(); +} + +/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=(). + * + * Example: \include MatrixBase_applyOnTheRight.cpp + * Output: \verbinclude MatrixBase_applyOnTheRight.out + */ +template +template +inline void MatrixBase::applyOnTheRight(const EigenBase &other) +{ + other.derived().applyThisOnTheRight(derived()); +} + +/** replaces \c *this by \a other * \c *this. + * + * Example: \include MatrixBase_applyOnTheLeft.cpp + * Output: \verbinclude MatrixBase_applyOnTheLeft.out + */ +template +template +inline void MatrixBase::applyOnTheLeft(const EigenBase &other) +{ + other.derived().applyThisOnTheLeft(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_MATRIXBASE_H diff --git a/tools/Eigen/src/Core/NestByValue.h b/tools/Eigen/src/Core/NestByValue.h new file mode 100644 index 0000000..a893b17 --- /dev/null +++ b/tools/Eigen/src/Core/NestByValue.h @@ -0,0 +1,111 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NESTBYVALUE_H +#define EIGEN_NESTBYVALUE_H + +namespace Eigen { + +/** \class NestByValue + * \ingroup Core_Module + * + * \brief Expression which must be nested by value + * + * \param ExpressionType the type of the object of which we are requiring nesting-by-value + * + * This class is the return type of MatrixBase::nestByValue() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::nestByValue() + */ + +namespace internal { +template +struct traits > : public traits +{}; +} + +template class NestByValue + : public internal::dense_xpr_base< NestByValue >::type +{ + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue) + + inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} + + inline Index rows() const { return m_expression.rows(); } + inline Index cols() const { return m_expression.cols(); } + inline Index outerStride() const { return m_expression.outerStride(); } + inline Index innerStride() const { return m_expression.innerStride(); } + + inline const CoeffReturnType coeff(Index row, Index col) const + { + return m_expression.coeff(row, col); + } + + inline Scalar& coeffRef(Index row, Index col) + { + return m_expression.const_cast_derived().coeffRef(row, col); + } + + inline const CoeffReturnType coeff(Index index) const + { + return m_expression.coeff(index); + } + + inline Scalar& coeffRef(Index index) + { + return m_expression.const_cast_derived().coeffRef(index); + } + + template + inline const PacketScalar packet(Index row, Index col) const + { + return m_expression.template packet(row, col); + } + + template + inline void writePacket(Index row, Index col, const PacketScalar& x) + { + m_expression.const_cast_derived().template writePacket(row, col, x); + } + + template + inline const PacketScalar packet(Index index) const + { + return m_expression.template packet(index); + } + + template + inline void writePacket(Index index, const PacketScalar& x) + { + m_expression.const_cast_derived().template writePacket(index, x); + } + + operator const ExpressionType&() const { return m_expression; } + + protected: + const ExpressionType m_expression; +}; + +/** \returns an expression of the temporary version of *this. + */ +template +inline const NestByValue +DenseBase::nestByValue() const +{ + return NestByValue(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_NESTBYVALUE_H diff --git a/tools/Eigen/src/Core/NoAlias.h b/tools/Eigen/src/Core/NoAlias.h new file mode 100644 index 0000000..768bfb1 --- /dev/null +++ b/tools/Eigen/src/Core/NoAlias.h @@ -0,0 +1,134 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NOALIAS_H +#define EIGEN_NOALIAS_H + +namespace Eigen { + +/** \class NoAlias + * \ingroup Core_Module + * + * \brief Pseudo expression providing an operator = assuming no aliasing + * + * \param ExpressionType the type of the object on which to do the lazy assignment + * + * This class represents an expression with special assignment operators + * assuming no aliasing between the target expression and the source expression. + * More precisely it alloas to bypass the EvalBeforeAssignBit flag of the source expression. + * It is the return type of MatrixBase::noalias() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::noalias() + */ +template class StorageBase> +class NoAlias +{ + typedef typename ExpressionType::Scalar Scalar; + public: + NoAlias(ExpressionType& expression) : m_expression(expression) {} + + /** Behaves like MatrixBase::lazyAssign(other) + * \sa MatrixBase::lazyAssign() */ + template + EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase& other) + { return internal::assign_selector::run(m_expression,other.derived()); } + + /** \sa MatrixBase::operator+= */ + template + EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase& other) + { + typedef SelfCwiseBinaryOp, ExpressionType, OtherDerived> SelfAdder; + SelfAdder tmp(m_expression); + typedef typename internal::nested::type OtherDerivedNested; + typedef typename internal::remove_all::type _OtherDerivedNested; + internal::assign_selector::run(tmp,OtherDerivedNested(other.derived())); + return m_expression; + } + + /** \sa MatrixBase::operator-= */ + template + EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase& other) + { + typedef SelfCwiseBinaryOp, ExpressionType, OtherDerived> SelfAdder; + SelfAdder tmp(m_expression); + typedef typename internal::nested::type OtherDerivedNested; + typedef typename internal::remove_all::type _OtherDerivedNested; + internal::assign_selector::run(tmp,OtherDerivedNested(other.derived())); + return m_expression; + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_STRONG_INLINE ExpressionType& operator+=(const ProductBase& other) + { other.derived().addTo(m_expression); return m_expression; } + + template + EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase& other) + { other.derived().subTo(m_expression); return m_expression; } + + template + EIGEN_STRONG_INLINE ExpressionType& operator+=(const CoeffBasedProduct& other) + { return m_expression.derived() += CoeffBasedProduct(other.lhs(), other.rhs()); } + + template + EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct& other) + { return m_expression.derived() -= CoeffBasedProduct(other.lhs(), other.rhs()); } + + template + ExpressionType& operator=(const ReturnByValue& func) + { return m_expression = func; } +#endif + + ExpressionType& expression() const + { + return m_expression; + } + + protected: + ExpressionType& m_expression; +}; + +/** \returns a pseudo expression of \c *this with an operator= assuming + * no aliasing between \c *this and the source expression. + * + * More precisely, noalias() allows to bypass the EvalBeforeAssignBit flag. + * Currently, even though several expressions may alias, only product + * expressions have this flag. Therefore, noalias() is only usefull when + * the source expression contains a matrix product. + * + * Here are some examples where noalias is usefull: + * \code + * D.noalias() = A * B; + * D.noalias() += A.transpose() * B; + * D.noalias() -= 2 * A * B.adjoint(); + * \endcode + * + * On the other hand the following example will lead to a \b wrong result: + * \code + * A.noalias() = A * B; + * \endcode + * because the result matrix A is also an operand of the matrix product. Therefore, + * there is no alternative than evaluating A * B in a temporary, that is the default + * behavior when you write: + * \code + * A = A * B; + * \endcode + * + * \sa class NoAlias + */ +template +NoAlias MatrixBase::noalias() +{ + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_NOALIAS_H diff --git a/tools/Eigen/src/Core/NumTraits.h b/tools/Eigen/src/Core/NumTraits.h new file mode 100644 index 0000000..bac9e50 --- /dev/null +++ b/tools/Eigen/src/Core/NumTraits.h @@ -0,0 +1,150 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NUMTRAITS_H +#define EIGEN_NUMTRAITS_H + +namespace Eigen { + +/** \class NumTraits + * \ingroup Core_Module + * + * \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen. + * + * \param T the numeric type at hand + * + * This class stores enums, typedefs and static methods giving information about a numeric type. + * + * The provided data consists of: + * \li A typedef \a Real, giving the "real part" type of \a T. If \a T is already real, + * then \a Real is just a typedef to \a T. If \a T is \c std::complex then \a Real + * is a typedef to \a U. + * \li A typedef \a NonInteger, giving the type that should be used for operations producing non-integral values, + * such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives + * \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to + * take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is + * only intended as a helper for code that needs to explicitly promote types. + * \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what + * this means, just use \a T here. + * \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex + * type, and to 0 otherwise. + * \li An enum value \a IsInteger. It is equal to \c 1 if \a T is an integer type such as \c int, + * and to \c 0 otherwise. + * \li Enum values ReadCost, AddCost and MulCost representing a rough estimate of the number of CPU cycles needed + * to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers. + * Stay vague here. No need to do architecture-specific stuff. + * \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned. + * \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must + * be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise. + * \li An epsilon() function which, unlike std::numeric_limits::epsilon(), returns a \a Real instead of a \a T. + * \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default + * value by the fuzzy comparison operators. + * \li highest() and lowest() functions returning the highest and lowest possible values respectively. + */ + +template struct GenericNumTraits +{ + enum { + IsInteger = std::numeric_limits::is_integer, + IsSigned = std::numeric_limits::is_signed, + IsComplex = 0, + RequireInitialization = internal::is_arithmetic::value ? 0 : 1, + ReadCost = 1, + AddCost = 1, + MulCost = 1 + }; + + typedef T Real; + typedef typename internal::conditional< + IsInteger, + typename internal::conditional::type, + T + >::type NonInteger; + typedef T Nested; + + static inline Real epsilon() { return std::numeric_limits::epsilon(); } + static inline Real dummy_precision() + { + // make sure to override this for floating-point types + return Real(0); + } + static inline T highest() { return (std::numeric_limits::max)(); } + static inline T lowest() { return IsInteger ? (std::numeric_limits::min)() : (-(std::numeric_limits::max)()); } + +#ifdef EIGEN2_SUPPORT + enum { + HasFloatingPoint = !IsInteger + }; + typedef NonInteger FloatingPoint; +#endif +}; + +template struct NumTraits : GenericNumTraits +{}; + +template<> struct NumTraits + : GenericNumTraits +{ + static inline float dummy_precision() { return 1e-5f; } +}; + +template<> struct NumTraits : GenericNumTraits +{ + static inline double dummy_precision() { return 1e-12; } +}; + +template<> struct NumTraits + : GenericNumTraits +{ + static inline long double dummy_precision() { return 1e-15l; } +}; + +template struct NumTraits > + : GenericNumTraits > +{ + typedef _Real Real; + enum { + IsComplex = 1, + RequireInitialization = NumTraits<_Real>::RequireInitialization, + ReadCost = 2 * NumTraits<_Real>::ReadCost, + AddCost = 2 * NumTraits::AddCost, + MulCost = 4 * NumTraits::MulCost + 2 * NumTraits::AddCost + }; + + static inline Real epsilon() { return NumTraits::epsilon(); } + static inline Real dummy_precision() { return NumTraits::dummy_precision(); } +}; + +template +struct NumTraits > +{ + typedef Array ArrayType; + typedef typename NumTraits::Real RealScalar; + typedef Array Real; + typedef typename NumTraits::NonInteger NonIntegerScalar; + typedef Array NonInteger; + typedef ArrayType & Nested; + + enum { + IsComplex = NumTraits::IsComplex, + IsInteger = NumTraits::IsInteger, + IsSigned = NumTraits::IsSigned, + RequireInitialization = 1, + ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::ReadCost, + AddCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::AddCost, + MulCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::MulCost + }; + + static inline RealScalar epsilon() { return NumTraits::epsilon(); } + static inline RealScalar dummy_precision() { return NumTraits::dummy_precision(); } +}; + +} // end namespace Eigen + +#endif // EIGEN_NUMTRAITS_H diff --git a/tools/Eigen/src/Core/PlainObjectBase.h b/tools/Eigen/src/Core/PlainObjectBase.h new file mode 100644 index 0000000..a4e4af4 --- /dev/null +++ b/tools/Eigen/src/Core/PlainObjectBase.h @@ -0,0 +1,822 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DENSESTORAGEBASE_H +#define EIGEN_DENSESTORAGEBASE_H + +#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO) +# define EIGEN_INITIALIZE_COEFFS +# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i::quiet_NaN(); +#else +# undef EIGEN_INITIALIZE_COEFFS +# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED +#endif + +namespace Eigen { + +namespace internal { + +template struct check_rows_cols_for_overflow { + template + static EIGEN_ALWAYS_INLINE void run(Index, Index) + { + } +}; + +template<> struct check_rows_cols_for_overflow { + template + static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols) + { + // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242 + // we assume Index is signed + Index max_index = (size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed + bool error = (rows == 0 || cols == 0) ? false + : (rows > max_index / cols); + if (error) + throw_std_bad_alloc(); + } +}; + +template +struct conservative_resize_like_impl; + +template struct matrix_swap_impl; + +} // end namespace internal + +/** \class PlainObjectBase + * \brief %Dense storage base class for matrices and arrays. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN. + * + * \sa \ref TopicClassHierarchy + */ +#ifdef EIGEN_PARSED_BY_DOXYGEN +namespace internal { + +// this is a warkaround to doxygen not being able to understand the inheritence logic +// when it is hidden by the dense_xpr_base helper struct. +template struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase {}; +/** This class is just a workaround for Doxygen and it does not not actually exist. */ +template +struct dense_xpr_base_dispatcher_for_doxygen > + : public MatrixBase > {}; +/** This class is just a workaround for Doxygen and it does not not actually exist. */ +template +struct dense_xpr_base_dispatcher_for_doxygen > + : public ArrayBase > {}; + +} // namespace internal + +template +class PlainObjectBase : public internal::dense_xpr_base_dispatcher_for_doxygen +#else +template +class PlainObjectBase : public internal::dense_xpr_base::type +#endif +{ + public: + enum { Options = internal::traits::Options }; + typedef typename internal::dense_xpr_base::type Base; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + typedef Derived DenseType; + + using Base::RowsAtCompileTime; + using Base::ColsAtCompileTime; + using Base::SizeAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxSizeAtCompileTime; + using Base::IsVectorAtCompileTime; + using Base::Flags; + + template friend class Eigen::Map; + friend class Eigen::Map; + typedef Eigen::Map MapType; + friend class Eigen::Map; + typedef const Eigen::Map ConstMapType; + friend class Eigen::Map; + typedef Eigen::Map AlignedMapType; + friend class Eigen::Map; + typedef const Eigen::Map ConstAlignedMapType; + template struct StridedMapType { typedef Eigen::Map type; }; + template struct StridedConstMapType { typedef Eigen::Map type; }; + template struct StridedAlignedMapType { typedef Eigen::Map type; }; + template struct StridedConstAlignedMapType { typedef Eigen::Map type; }; + + protected: + DenseStorage m_storage; + + public: + enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits::Flags & AlignedBit) != 0 }; + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) + + Base& base() { return *static_cast(this); } + const Base& base() const { return *static_cast(this); } + + EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); } + EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); } + + EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const + { + if(Flags & RowMajorBit) + return m_storage.data()[colId + rowId * m_storage.cols()]; + else // column-major + return m_storage.data()[rowId + colId * m_storage.rows()]; + } + + EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const + { + return m_storage.data()[index]; + } + + EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId) + { + if(Flags & RowMajorBit) + return m_storage.data()[colId + rowId * m_storage.cols()]; + else // column-major + return m_storage.data()[rowId + colId * m_storage.rows()]; + } + + EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) + { + return m_storage.data()[index]; + } + + EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const + { + if(Flags & RowMajorBit) + return m_storage.data()[colId + rowId * m_storage.cols()]; + else // column-major + return m_storage.data()[rowId + colId * m_storage.rows()]; + } + + EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const + { + return m_storage.data()[index]; + } + + /** \internal */ + template + EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const + { + return internal::ploadt + (m_storage.data() + (Flags & RowMajorBit + ? colId + rowId * m_storage.cols() + : rowId + colId * m_storage.rows())); + } + + /** \internal */ + template + EIGEN_STRONG_INLINE PacketScalar packet(Index index) const + { + return internal::ploadt(m_storage.data() + index); + } + + /** \internal */ + template + EIGEN_STRONG_INLINE void writePacket(Index rowId, Index colId, const PacketScalar& val) + { + internal::pstoret + (m_storage.data() + (Flags & RowMajorBit + ? colId + rowId * m_storage.cols() + : rowId + colId * m_storage.rows()), val); + } + + /** \internal */ + template + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketScalar& val) + { + internal::pstoret(m_storage.data() + index, val); + } + + /** \returns a const pointer to the data array of this matrix */ + EIGEN_STRONG_INLINE const Scalar *data() const + { return m_storage.data(); } + + /** \returns a pointer to the data array of this matrix */ + EIGEN_STRONG_INLINE Scalar *data() + { return m_storage.data(); } + + /** Resizes \c *this to a \a rows x \a cols matrix. + * + * This method is intended for dynamic-size matrices, although it is legal to call it on any + * matrix as long as fixed dimensions are left unchanged. If you only want to change the number + * of rows and/or of columns, you can use resize(NoChange_t, Index), resize(Index, NoChange_t). + * + * If the current number of coefficients of \c *this exactly matches the + * product \a rows * \a cols, then no memory allocation is performed and + * the current values are left unchanged. In all other cases, including + * shrinking, the data is reallocated and all previous values are lost. + * + * Example: \include Matrix_resize_int_int.cpp + * Output: \verbinclude Matrix_resize_int_int.out + * + * \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t) + */ + EIGEN_STRONG_INLINE void resize(Index nbRows, Index nbCols) + { + eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,nbRows==RowsAtCompileTime) + && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,nbCols==ColsAtCompileTime) + && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,nbRows<=MaxRowsAtCompileTime) + && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,nbCols<=MaxColsAtCompileTime) + && nbRows>=0 && nbCols>=0 && "Invalid sizes when resizing a matrix or array."); + internal::check_rows_cols_for_overflow::run(nbRows, nbCols); + #ifdef EIGEN_INITIALIZE_COEFFS + Index size = nbRows*nbCols; + bool size_changed = size != this->size(); + m_storage.resize(size, nbRows, nbCols); + if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + #else + internal::check_rows_cols_for_overflow::run(nbRows, nbCols); + m_storage.resize(nbRows*nbCols, nbRows, nbCols); + #endif + } + + /** Resizes \c *this to a vector of length \a size + * + * \only_for_vectors. This method does not work for + * partially dynamic matrices when the static dimension is anything other + * than 1. For example it will not work with Matrix. + * + * Example: \include Matrix_resize_int.cpp + * Output: \verbinclude Matrix_resize_int.out + * + * \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t) + */ + inline void resize(Index size) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase) + eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0); + #ifdef EIGEN_INITIALIZE_COEFFS + bool size_changed = size != this->size(); + #endif + if(RowsAtCompileTime == 1) + m_storage.resize(size, 1, size); + else + m_storage.resize(size, size, 1); + #ifdef EIGEN_INITIALIZE_COEFFS + if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + #endif + } + + /** Resizes the matrix, changing only the number of columns. For the parameter of type NoChange_t, just pass the special value \c NoChange + * as in the example below. + * + * Example: \include Matrix_resize_NoChange_int.cpp + * Output: \verbinclude Matrix_resize_NoChange_int.out + * + * \sa resize(Index,Index) + */ + inline void resize(NoChange_t, Index nbCols) + { + resize(rows(), nbCols); + } + + /** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special value \c NoChange + * as in the example below. + * + * Example: \include Matrix_resize_int_NoChange.cpp + * Output: \verbinclude Matrix_resize_int_NoChange.out + * + * \sa resize(Index,Index) + */ + inline void resize(Index nbRows, NoChange_t) + { + resize(nbRows, cols()); + } + + /** Resizes \c *this to have the same dimensions as \a other. + * Takes care of doing all the checking that's needed. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_STRONG_INLINE void resizeLike(const EigenBase& _other) + { + const OtherDerived& other = _other.derived(); + internal::check_rows_cols_for_overflow::run(other.rows(), other.cols()); + const Index othersize = other.rows()*other.cols(); + if(RowsAtCompileTime == 1) + { + eigen_assert(other.rows() == 1 || other.cols() == 1); + resize(1, othersize); + } + else if(ColsAtCompileTime == 1) + { + eigen_assert(other.rows() == 1 || other.cols() == 1); + resize(othersize, 1); + } + else resize(other.rows(), other.cols()); + } + + /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. + * + * The method is intended for matrices of dynamic size. If you only want to change the number + * of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or + * conservativeResize(Index, NoChange_t). + * + * Matrices are resized relative to the top-left element. In case values need to be + * appended to the matrix they will be uninitialized. + */ + EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, Index nbCols) + { + internal::conservative_resize_like_impl::run(*this, nbRows, nbCols); + } + + /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. + * + * As opposed to conservativeResize(Index rows, Index cols), this version leaves + * the number of columns unchanged. + * + * In case the matrix is growing, new rows will be uninitialized. + */ + EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, NoChange_t) + { + // Note: see the comment in conservativeResize(Index,Index) + conservativeResize(nbRows, cols()); + } + + /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. + * + * As opposed to conservativeResize(Index rows, Index cols), this version leaves + * the number of rows unchanged. + * + * In case the matrix is growing, new columns will be uninitialized. + */ + EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index nbCols) + { + // Note: see the comment in conservativeResize(Index,Index) + conservativeResize(rows(), nbCols); + } + + /** Resizes the vector to \a size while retaining old values. + * + * \only_for_vectors. This method does not work for + * partially dynamic matrices when the static dimension is anything other + * than 1. For example it will not work with Matrix. + * + * When values are appended, they will be uninitialized. + */ + EIGEN_STRONG_INLINE void conservativeResize(Index size) + { + internal::conservative_resize_like_impl::run(*this, size); + } + + /** Resizes the matrix to \a rows x \a cols of \c other, while leaving old values untouched. + * + * The method is intended for matrices of dynamic size. If you only want to change the number + * of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or + * conservativeResize(Index, NoChange_t). + * + * Matrices are resized relative to the top-left element. In case values need to be + * appended to the matrix they will copied from \c other. + */ + template + EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase& other) + { + internal::conservative_resize_like_impl::run(*this, other); + } + + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other) + { + return _set(other); + } + + /** \sa MatrixBase::lazyAssign() */ + template + EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase& other) + { + _resize_to_match(other); + return Base::lazyAssign(other.derived()); + } + + template + EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue& func) + { + resize(func.rows(), func.cols()); + return Base::operator=(func); + } + + EIGEN_STRONG_INLINE PlainObjectBase() : m_storage() + { +// _check_template_params(); +// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + // FIXME is it still needed ? + /** \internal */ + PlainObjectBase(internal::constructor_without_unaligned_array_assert) + : m_storage(internal::constructor_without_unaligned_array_assert()) + { +// _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } +#endif + +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + PlainObjectBase(PlainObjectBase&& other) + : m_storage( std::move(other.m_storage) ) + { + } + + PlainObjectBase& operator=(PlainObjectBase&& other) + { + using std::swap; + swap(m_storage, other.m_storage); + return *this; + } +#endif + + /** Copy constructor */ + EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other) + : m_storage() + { + _check_template_params(); + lazyAssign(other); + } + + template + EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase &other) + : m_storage() + { + _check_template_params(); + lazyAssign(other); + } + + EIGEN_STRONG_INLINE PlainObjectBase(Index a_size, Index nbRows, Index nbCols) + : m_storage(a_size, nbRows, nbCols) + { +// _check_template_params(); +// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + + /** \copydoc MatrixBase::operator=(const EigenBase&) + */ + template + EIGEN_STRONG_INLINE Derived& operator=(const EigenBase &other) + { + _resize_to_match(other); + Base::operator=(other.derived()); + return this->derived(); + } + + /** \sa MatrixBase::operator=(const EigenBase&) */ + template + EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase &other) + : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) + { + _check_template_params(); + internal::check_rows_cols_for_overflow::run(other.derived().rows(), other.derived().cols()); + Base::operator=(other.derived()); + } + + /** \name Map + * These are convenience functions returning Map objects. The Map() static functions return unaligned Map objects, + * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned + * \a data pointers. + * + * \see class Map + */ + //@{ + static inline ConstMapType Map(const Scalar* data) + { return ConstMapType(data); } + static inline MapType Map(Scalar* data) + { return MapType(data); } + static inline ConstMapType Map(const Scalar* data, Index size) + { return ConstMapType(data, size); } + static inline MapType Map(Scalar* data, Index size) + { return MapType(data, size); } + static inline ConstMapType Map(const Scalar* data, Index rows, Index cols) + { return ConstMapType(data, rows, cols); } + static inline MapType Map(Scalar* data, Index rows, Index cols) + { return MapType(data, rows, cols); } + + static inline ConstAlignedMapType MapAligned(const Scalar* data) + { return ConstAlignedMapType(data); } + static inline AlignedMapType MapAligned(Scalar* data) + { return AlignedMapType(data); } + static inline ConstAlignedMapType MapAligned(const Scalar* data, Index size) + { return ConstAlignedMapType(data, size); } + static inline AlignedMapType MapAligned(Scalar* data, Index size) + { return AlignedMapType(data, size); } + static inline ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols) + { return ConstAlignedMapType(data, rows, cols); } + static inline AlignedMapType MapAligned(Scalar* data, Index rows, Index cols) + { return AlignedMapType(data, rows, cols); } + + template + static inline typename StridedConstMapType >::type Map(const Scalar* data, const Stride& stride) + { return typename StridedConstMapType >::type(data, stride); } + template + static inline typename StridedMapType >::type Map(Scalar* data, const Stride& stride) + { return typename StridedMapType >::type(data, stride); } + template + static inline typename StridedConstMapType >::type Map(const Scalar* data, Index size, const Stride& stride) + { return typename StridedConstMapType >::type(data, size, stride); } + template + static inline typename StridedMapType >::type Map(Scalar* data, Index size, const Stride& stride) + { return typename StridedMapType >::type(data, size, stride); } + template + static inline typename StridedConstMapType >::type Map(const Scalar* data, Index rows, Index cols, const Stride& stride) + { return typename StridedConstMapType >::type(data, rows, cols, stride); } + template + static inline typename StridedMapType >::type Map(Scalar* data, Index rows, Index cols, const Stride& stride) + { return typename StridedMapType >::type(data, rows, cols, stride); } + + template + static inline typename StridedConstAlignedMapType >::type MapAligned(const Scalar* data, const Stride& stride) + { return typename StridedConstAlignedMapType >::type(data, stride); } + template + static inline typename StridedAlignedMapType >::type MapAligned(Scalar* data, const Stride& stride) + { return typename StridedAlignedMapType >::type(data, stride); } + template + static inline typename StridedConstAlignedMapType >::type MapAligned(const Scalar* data, Index size, const Stride& stride) + { return typename StridedConstAlignedMapType >::type(data, size, stride); } + template + static inline typename StridedAlignedMapType >::type MapAligned(Scalar* data, Index size, const Stride& stride) + { return typename StridedAlignedMapType >::type(data, size, stride); } + template + static inline typename StridedConstAlignedMapType >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride& stride) + { return typename StridedConstAlignedMapType >::type(data, rows, cols, stride); } + template + static inline typename StridedAlignedMapType >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride& stride) + { return typename StridedAlignedMapType >::type(data, rows, cols, stride); } + //@} + + using Base::setConstant; + Derived& setConstant(Index size, const Scalar& value); + Derived& setConstant(Index rows, Index cols, const Scalar& value); + + using Base::setZero; + Derived& setZero(Index size); + Derived& setZero(Index rows, Index cols); + + using Base::setOnes; + Derived& setOnes(Index size); + Derived& setOnes(Index rows, Index cols); + + using Base::setRandom; + Derived& setRandom(Index size); + Derived& setRandom(Index rows, Index cols); + + #ifdef EIGEN_PLAINOBJECTBASE_PLUGIN + #include EIGEN_PLAINOBJECTBASE_PLUGIN + #endif + + protected: + /** \internal Resizes *this in preparation for assigning \a other to it. + * Takes care of doing all the checking that's needed. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase& other) + { + #ifdef EIGEN_NO_AUTOMATIC_RESIZING + eigen_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size()) + : (rows() == other.rows() && cols() == other.cols()))) + && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); + EIGEN_ONLY_USED_FOR_DEBUG(other); + if(this->size()==0) + resizeLike(other); + #else + resizeLike(other); + #endif + } + + /** + * \brief Copies the value of the expression \a other into \c *this with automatic resizing. + * + * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), + * it will be initialized. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + * + * \sa operator=(const MatrixBase&), _set_noalias() + * + * \internal + */ + template + EIGEN_STRONG_INLINE Derived& _set(const DenseBase& other) + { + _set_selector(other.derived(), typename internal::conditional(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type()); + return this->derived(); + } + + template + EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); } + + template + EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); } + + /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which + * is the case when creating a new matrix) so one can enforce lazy evaluation. + * + * \sa operator=(const MatrixBase&), _set() + */ + template + EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase& other) + { + // I don't think we need this resize call since the lazyAssign will anyways resize + // and lazyAssign will be called by the assign selector. + //_resize_to_match(other); + // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because + // it wouldn't allow to copy a row-vector into a column-vector. + return internal::assign_selector::run(this->derived(), other.derived()); + } + + template + EIGEN_STRONG_INLINE void _init2(Index nbRows, Index nbCols, typename internal::enable_if::type* = 0) + { + EIGEN_STATIC_ASSERT(bool(NumTraits::IsInteger) && + bool(NumTraits::IsInteger), + FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) + resize(nbRows,nbCols); + } + template + EIGEN_STRONG_INLINE void _init2(const Scalar& val0, const Scalar& val1, typename internal::enable_if::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) + m_storage.data()[0] = val0; + m_storage.data()[1] = val1; + } + + template + friend struct internal::matrix_swap_impl; + + /** \internal generic implementation of swap for dense storage since for dynamic-sized matrices of same type it is enough to swap the + * data pointers. + */ + template + void _swap(DenseBase const & other) + { + enum { SwapPointers = internal::is_same::value && Base::SizeAtCompileTime==Dynamic }; + internal::matrix_swap_impl::run(this->derived(), other.const_cast_derived()); + } + + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + static EIGEN_STRONG_INLINE void _check_template_params() + { + EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor) + && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (Options&RowMajor)==0) + && ((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0)) + && ((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0)) + && ((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0)) + && ((MaxColsAtCompileTime == Dynamic) || (MaxColsAtCompileTime >= 0)) + && (MaxRowsAtCompileTime == RowsAtCompileTime || RowsAtCompileTime==Dynamic) + && (MaxColsAtCompileTime == ColsAtCompileTime || ColsAtCompileTime==Dynamic) + && (Options & (DontAlign|RowMajor)) == Options), + INVALID_MATRIX_TEMPLATE_PARAMETERS) + } +#endif + +private: + enum { ThisConstantIsPrivateInPlainObjectBase }; +}; + +namespace internal { + +template +struct conservative_resize_like_impl +{ + typedef typename Derived::Index Index; + static void run(DenseBase& _this, Index rows, Index cols) + { + if (_this.rows() == rows && _this.cols() == cols) return; + EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived) + + if ( ( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows + (!Derived::IsRowMajor && _this.rows() == rows) ) // column-major and we change only the number of columns + { + internal::check_rows_cols_for_overflow::run(rows, cols); + _this.derived().m_storage.conservativeResize(rows*cols,rows,cols); + } + else + { + // The storage order does not allow us to use reallocation. + typename Derived::PlainObject tmp(rows,cols); + const Index common_rows = (std::min)(rows, _this.rows()); + const Index common_cols = (std::min)(cols, _this.cols()); + tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); + _this.derived().swap(tmp); + } + } + + static void run(DenseBase& _this, const DenseBase& other) + { + if (_this.rows() == other.rows() && _this.cols() == other.cols()) return; + + // Note: Here is space for improvement. Basically, for conservativeResize(Index,Index), + // neither RowsAtCompileTime or ColsAtCompileTime must be Dynamic. If only one of the + // dimensions is dynamic, one could use either conservativeResize(Index rows, NoChange_t) or + // conservativeResize(NoChange_t, Index cols). For these methods new static asserts like + // EIGEN_STATIC_ASSERT_DYNAMIC_ROWS and EIGEN_STATIC_ASSERT_DYNAMIC_COLS would be good. + EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived) + EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived) + + if ( ( Derived::IsRowMajor && _this.cols() == other.cols()) || // row-major and we change only the number of rows + (!Derived::IsRowMajor && _this.rows() == other.rows()) ) // column-major and we change only the number of columns + { + const Index new_rows = other.rows() - _this.rows(); + const Index new_cols = other.cols() - _this.cols(); + _this.derived().m_storage.conservativeResize(other.size(),other.rows(),other.cols()); + if (new_rows>0) + _this.bottomRightCorner(new_rows, other.cols()) = other.bottomRows(new_rows); + else if (new_cols>0) + _this.bottomRightCorner(other.rows(), new_cols) = other.rightCols(new_cols); + } + else + { + // The storage order does not allow us to use reallocation. + typename Derived::PlainObject tmp(other); + const Index common_rows = (std::min)(tmp.rows(), _this.rows()); + const Index common_cols = (std::min)(tmp.cols(), _this.cols()); + tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); + _this.derived().swap(tmp); + } + } +}; + +// Here, the specialization for vectors inherits from the general matrix case +// to allow calling .conservativeResize(rows,cols) on vectors. +template +struct conservative_resize_like_impl + : conservative_resize_like_impl +{ + using conservative_resize_like_impl::run; + + typedef typename Derived::Index Index; + static void run(DenseBase& _this, Index size) + { + const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : size; + const Index new_cols = Derived::RowsAtCompileTime==1 ? size : 1; + _this.derived().m_storage.conservativeResize(size,new_rows,new_cols); + } + + static void run(DenseBase& _this, const DenseBase& other) + { + if (_this.rows() == other.rows() && _this.cols() == other.cols()) return; + + const Index num_new_elements = other.size() - _this.size(); + + const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : other.rows(); + const Index new_cols = Derived::RowsAtCompileTime==1 ? other.cols() : 1; + _this.derived().m_storage.conservativeResize(other.size(),new_rows,new_cols); + + if (num_new_elements > 0) + _this.tail(num_new_elements) = other.tail(num_new_elements); + } +}; + +template +struct matrix_swap_impl +{ + static inline void run(MatrixTypeA& a, MatrixTypeB& b) + { + a.base().swap(b); + } +}; + +template +struct matrix_swap_impl +{ + static inline void run(MatrixTypeA& a, MatrixTypeB& b) + { + static_cast(a).m_storage.swap(static_cast(b).m_storage); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_DENSESTORAGEBASE_H diff --git a/tools/Eigen/src/Core/ProductBase.h b/tools/Eigen/src/Core/ProductBase.h new file mode 100644 index 0000000..cf74470 --- /dev/null +++ b/tools/Eigen/src/Core/ProductBase.h @@ -0,0 +1,290 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PRODUCTBASE_H +#define EIGEN_PRODUCTBASE_H + +namespace Eigen { + +/** \class ProductBase + * \ingroup Core_Module + * + */ + +namespace internal { +template +struct traits > +{ + typedef MatrixXpr XprKind; + typedef typename remove_all<_Lhs>::type Lhs; + typedef typename remove_all<_Rhs>::type Rhs; + typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + enum { + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime, + Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0) + | EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit, + // Note that EvalBeforeNestingBit and NestByRefBit + // are not used in practice because nested is overloaded for products + CoeffReadCost = 0 // FIXME why is it needed ? + }; +}; +} + +#define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived) \ + typedef ProductBase Base; \ + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \ + typedef typename Base::LhsNested LhsNested; \ + typedef typename Base::_LhsNested _LhsNested; \ + typedef typename Base::LhsBlasTraits LhsBlasTraits; \ + typedef typename Base::ActualLhsType ActualLhsType; \ + typedef typename Base::_ActualLhsType _ActualLhsType; \ + typedef typename Base::RhsNested RhsNested; \ + typedef typename Base::_RhsNested _RhsNested; \ + typedef typename Base::RhsBlasTraits RhsBlasTraits; \ + typedef typename Base::ActualRhsType ActualRhsType; \ + typedef typename Base::_ActualRhsType _ActualRhsType; \ + using Base::m_lhs; \ + using Base::m_rhs; + +template +class ProductBase : public MatrixBase +{ + public: + typedef MatrixBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(ProductBase) + + typedef typename Lhs::Nested LhsNested; + typedef typename internal::remove_all::type _LhsNested; + typedef internal::blas_traits<_LhsNested> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef typename internal::remove_all::type _ActualLhsType; + typedef typename internal::traits::Scalar LhsScalar; + + typedef typename Rhs::Nested RhsNested; + typedef typename internal::remove_all::type _RhsNested; + typedef internal::blas_traits<_RhsNested> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all::type _ActualRhsType; + typedef typename internal::traits::Scalar RhsScalar; + + // Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once + typedef CoeffBasedProduct FullyLazyCoeffBaseProductType; + + public: + +#ifndef EIGEN_NO_MALLOC + typedef typename Base::PlainObject BasePlainObject; + typedef Matrix DynPlainObject; + typedef typename internal::conditional<(BasePlainObject::SizeAtCompileTime==Dynamic) || (BasePlainObject::SizeAtCompileTime*int(sizeof(Scalar)) < int(EIGEN_STACK_ALLOCATION_LIMIT)), + BasePlainObject, DynPlainObject>::type PlainObject; +#else + typedef typename Base::PlainObject PlainObject; +#endif + + ProductBase(const Lhs& a_lhs, const Rhs& a_rhs) + : m_lhs(a_lhs), m_rhs(a_rhs) + { + eigen_assert(a_lhs.cols() == a_rhs.rows() + && "invalid matrix product" + && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); + } + + inline Index rows() const { return m_lhs.rows(); } + inline Index cols() const { return m_rhs.cols(); } + + template + inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); } + + template + inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); } + + template + inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); } + + template + inline void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { derived().scaleAndAddTo(dst,alpha); } + + const _LhsNested& lhs() const { return m_lhs; } + const _RhsNested& rhs() const { return m_rhs; } + + // Implicit conversion to the nested type (trigger the evaluation of the product) + operator const PlainObject& () const + { + m_result.resize(m_lhs.rows(), m_rhs.cols()); + derived().evalTo(m_result); + return m_result; + } + + const Diagonal diagonal() const + { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); } + + template + const Diagonal diagonal() const + { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); } + + const Diagonal diagonal(Index index) const + { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); } + + // restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isnt a Lvalue expression + typename Base::CoeffReturnType coeff(Index row, Index col) const + { +#ifdef EIGEN2_SUPPORT + return lhs().row(row).cwiseProduct(rhs().col(col).transpose()).sum(); +#else + EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) + eigen_assert(this->rows() == 1 && this->cols() == 1); + Matrix result = *this; + return result.coeff(row,col); +#endif + } + + typename Base::CoeffReturnType coeff(Index i) const + { + EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) + eigen_assert(this->rows() == 1 && this->cols() == 1); + Matrix result = *this; + return result.coeff(i); + } + + const Scalar& coeffRef(Index row, Index col) const + { + EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) + eigen_assert(this->rows() == 1 && this->cols() == 1); + return derived().coeffRef(row,col); + } + + const Scalar& coeffRef(Index i) const + { + EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) + eigen_assert(this->rows() == 1 && this->cols() == 1); + return derived().coeffRef(i); + } + + protected: + + LhsNested m_lhs; + RhsNested m_rhs; + + mutable PlainObject m_result; +}; + +// here we need to overload the nested rule for products +// such that the nested type is a const reference to a plain matrix +namespace internal { +template +struct nested, N, PlainObject> +{ + typedef typename GeneralProduct::PlainObject const& type; +}; +template +struct nested, N, PlainObject> +{ + typedef typename GeneralProduct::PlainObject const& type; +}; +} + +template +class ScaledProduct; + +// Note that these two operator* functions are not defined as member +// functions of ProductBase, because, otherwise we would have to +// define all overloads defined in MatrixBase. Furthermore, Using +// "using Base::operator*" would not work with MSVC. +// +// Also note that here we accept any compatible scalar types +template +const ScaledProduct +operator*(const ProductBase& prod, const typename Derived::Scalar& x) +{ return ScaledProduct(prod.derived(), x); } + +template +typename internal::enable_if::value, + const ScaledProduct >::type +operator*(const ProductBase& prod, const typename Derived::RealScalar& x) +{ return ScaledProduct(prod.derived(), x); } + + +template +const ScaledProduct +operator*(const typename Derived::Scalar& x,const ProductBase& prod) +{ return ScaledProduct(prod.derived(), x); } + +template +typename internal::enable_if::value, + const ScaledProduct >::type +operator*(const typename Derived::RealScalar& x,const ProductBase& prod) +{ return ScaledProduct(prod.derived(), x); } + +namespace internal { +template +struct traits > + : traits, + typename NestedProduct::_LhsNested, + typename NestedProduct::_RhsNested> > +{ + typedef typename traits::StorageKind StorageKind; +}; +} + +template +class ScaledProduct + : public ProductBase, + typename NestedProduct::_LhsNested, + typename NestedProduct::_RhsNested> +{ + public: + typedef ProductBase, + typename NestedProduct::_LhsNested, + typename NestedProduct::_RhsNested> Base; + typedef typename Base::Scalar Scalar; + typedef typename Base::PlainObject PlainObject; +// EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct) + + ScaledProduct(const NestedProduct& prod, const Scalar& x) + : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {} + + template + inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); } + + template + inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); } + + template + inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); } + + template + inline void scaleAndAddTo(Dest& dst, const Scalar& a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); } + + const Scalar& alpha() const { return m_alpha; } + + protected: + const NestedProduct& m_prod; + Scalar m_alpha; +}; + +/** \internal + * Overloaded to perform an efficient C = (A*B).lazy() */ +template +template +Derived& MatrixBase::lazyAssign(const ProductBase& other) +{ + other.derived().evalTo(derived()); + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_PRODUCTBASE_H diff --git a/tools/Eigen/src/Core/products/CMakeLists.txt b/tools/Eigen/src/Core/products/CMakeLists.txt new file mode 100644 index 0000000..21fc94a --- /dev/null +++ b/tools/Eigen/src/Core/products/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_Core_Product_SRCS "*.h") + +INSTALL(FILES + ${Eigen_Core_Product_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/products COMPONENT Devel + ) diff --git a/tools/Eigen/src/Core/products/CoeffBasedProduct.h b/tools/Eigen/src/Core/products/CoeffBasedProduct.h new file mode 100644 index 0000000..2a9d65b --- /dev/null +++ b/tools/Eigen/src/Core/products/CoeffBasedProduct.h @@ -0,0 +1,476 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COEFFBASED_PRODUCT_H +#define EIGEN_COEFFBASED_PRODUCT_H + +namespace Eigen { + +namespace internal { + +/********************************************************************************* +* Coefficient based product implementation. +* It is designed for the following use cases: +* - small fixed sizes +* - lazy products +*********************************************************************************/ + +/* Since the all the dimensions of the product are small, here we can rely + * on the generic Assign mechanism to evaluate the product per coeff (or packet). + * + * Note that here the inner-loops should always be unrolled. + */ + +template +struct product_coeff_impl; + +template +struct product_packet_impl; + +template +struct traits > +{ + typedef MatrixXpr XprKind; + typedef typename remove_all::type _LhsNested; + typedef typename remove_all::type _RhsNested; + typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename promote_storage_type::StorageKind, + typename traits<_RhsNested>::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits<_RhsNested>::Index>::type Index; + + enum { + LhsCoeffReadCost = _LhsNested::CoeffReadCost, + RhsCoeffReadCost = _RhsNested::CoeffReadCost, + LhsFlags = _LhsNested::Flags, + RhsFlags = _RhsNested::Flags, + + RowsAtCompileTime = _LhsNested::RowsAtCompileTime, + ColsAtCompileTime = _RhsNested::ColsAtCompileTime, + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), + + MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, + MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, + + LhsRowMajor = LhsFlags & RowMajorBit, + RhsRowMajor = RhsFlags & RowMajorBit, + + SameType = is_same::value, + + CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) + && (ColsAtCompileTime == Dynamic + || ( (ColsAtCompileTime % packet_traits::size) == 0 + && (RhsFlags&AlignedBit) + ) + ), + + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) + && (RowsAtCompileTime == Dynamic + || ( (RowsAtCompileTime % packet_traits::size) == 0 + && (LhsFlags&AlignedBit) + ) + ), + + EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 + : (RhsRowMajor && !CanVectorizeLhs), + + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) + | (EvalToRowMajor ? RowMajorBit : 0) + | NestingFlags + | (LhsFlags & RhsFlags & AlignedBit) + // TODO enable vectorization for mixed types + | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), + + CoeffReadCost = InnerSize == Dynamic ? Dynamic + : InnerSize == 0 ? 0 + : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + + (InnerSize - 1) * NumTraits::AddCost, + + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ + CanVectorizeInner = SameType + && LhsRowMajor + && (!RhsRowMajor) + && (LhsFlags & RhsFlags & ActualPacketAccessBit) + && (LhsFlags & RhsFlags & AlignedBit) + && (InnerSize % packet_traits::size == 0) + }; +}; + +} // end namespace internal + +template +class CoeffBasedProduct + : internal::no_assignment_operator, + public MatrixBase > +{ + public: + + typedef MatrixBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct) + typedef typename Base::PlainObject PlainObject; + + private: + + typedef typename internal::traits::_LhsNested _LhsNested; + typedef typename internal::traits::_RhsNested _RhsNested; + + enum { + PacketSize = internal::packet_traits::size, + InnerSize = internal::traits::InnerSize, + Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + CanVectorizeInner = internal::traits::CanVectorizeInner + }; + + typedef internal::product_coeff_impl ScalarCoeffImpl; + + typedef CoeffBasedProduct LazyCoeffBasedProductType; + + public: + + inline CoeffBasedProduct(const CoeffBasedProduct& other) + : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs) + {} + + template + inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs) + : m_lhs(lhs), m_rhs(rhs) + { + // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. + // We still allow to mix T and complex. + EIGEN_STATIC_ASSERT((internal::scalar_product_traits::Defined), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + eigen_assert(lhs.cols() == rhs.rows() + && "invalid matrix product" + && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); + } + + EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } + EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } + + EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + Scalar res; + ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); + return res; + } + + /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, + * which is why we don't set the LinearAccessBit. + */ + EIGEN_STRONG_INLINE const Scalar coeff(Index index) const + { + Scalar res; + const Index row = RowsAtCompileTime == 1 ? 0 : index; + const Index col = RowsAtCompileTime == 1 ? index : 0; + ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); + return res; + } + + template + EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const + { + PacketScalar res; + internal::product_packet_impl + ::run(row, col, m_lhs, m_rhs, res); + return res; + } + + // Implicit conversion to the nested type (trigger the evaluation of the product) + EIGEN_STRONG_INLINE operator const PlainObject& () const + { + m_result.lazyAssign(*this); + return m_result; + } + + const _LhsNested& lhs() const { return m_lhs; } + const _RhsNested& rhs() const { return m_rhs; } + + const Diagonal diagonal() const + { return reinterpret_cast(*this); } + + template + const Diagonal diagonal() const + { return reinterpret_cast(*this); } + + const Diagonal diagonal(Index index) const + { return reinterpret_cast(*this).diagonal(index); } + + protected: + typename internal::add_const_on_value_type::type m_lhs; + typename internal::add_const_on_value_type::type m_rhs; + + mutable PlainObject m_result; +}; + +namespace internal { + +// here we need to overload the nested rule for products +// such that the nested type is a const reference to a plain matrix +template +struct nested, N, PlainObject> +{ + typedef PlainObject const& type; +}; + +/*************************************************************************** +* Normal product .coeff() implementation (with meta-unrolling) +***************************************************************************/ + +/************************************** +*** Scalar path - no vectorization *** +**************************************/ + +template +struct product_coeff_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) + { + product_coeff_impl::run(row, col, lhs, rhs, res); + res += lhs.coeff(row, UnrollingIndex-1) * rhs.coeff(UnrollingIndex-1, col); + } +}; + +template +struct product_coeff_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) + { + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + } +}; + +template +struct product_coeff_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res) + { + res = RetScalar(0); + } +}; + +template +struct product_coeff_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) + { + res = (lhs.row(row).transpose().cwiseProduct( rhs.col(col) )).sum(); + } +}; + +/******************************************* +*** Scalar path with inner vectorization *** +*******************************************/ + +template +struct product_coeff_vectorized_unroller +{ + typedef typename Lhs::Index Index; + enum { PacketSize = packet_traits::size }; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) + { + product_coeff_vectorized_unroller::run(row, col, lhs, rhs, pres); + pres = padd(pres, pmul( lhs.template packet(row, UnrollingIndex) , rhs.template packet(UnrollingIndex, col) )); + } +}; + +template +struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) + { + pres = pmul(lhs.template packet(row, 0) , rhs.template packet(0, col)); + } +}; + +template +struct product_coeff_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res) + { + res = 0; + } +}; + +template +struct product_coeff_impl +{ + typedef typename Lhs::PacketScalar Packet; + typedef typename Lhs::Index Index; + enum { PacketSize = packet_traits::size }; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) + { + Packet pres; + product_coeff_vectorized_unroller::run(row, col, lhs, rhs, pres); + res = predux(pres); + } +}; + +template +struct product_coeff_vectorized_dyn_selector +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); + } +}; + +// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower +// NOTE maybe they are now useless since we have a specialization for Block +template +struct product_coeff_vectorized_dyn_selector +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); + } +}; + +template +struct product_coeff_vectorized_dyn_selector +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); + } +}; + +template +struct product_coeff_vectorized_dyn_selector +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + res = lhs.transpose().cwiseProduct(rhs).sum(); + } +}; + +template +struct product_coeff_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + product_coeff_vectorized_dyn_selector::run(row, col, lhs, rhs, res); + } +}; + +/******************* +*** Packet path *** +*******************/ + +template +struct product_packet_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) + { + product_packet_impl::run(row, col, lhs, rhs, res); + res = pmadd(pset1(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet(UnrollingIndex-1, col), res); + } +}; + +template +struct product_packet_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) + { + product_packet_impl::run(row, col, lhs, rhs, res); + res = pmadd(lhs.template packet(row, UnrollingIndex-1), pset1(rhs.coeff(UnrollingIndex-1, col)), res); + } +}; + +template +struct product_packet_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) + { + res = pmul(pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); + } +}; + +template +struct product_packet_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) + { + res = pmul(lhs.template packet(row, 0), pset1(rhs.coeff(0, col))); + } +}; + +template +struct product_packet_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res) + { + res = pset1(0); + } +}; + +template +struct product_packet_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res) + { + res = pset1(0); + } +}; + +template +struct product_packet_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) + { + res = pset1(0); + for(Index i = 0; i < lhs.cols(); ++i) + res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); + } +}; + +template +struct product_packet_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) + { + res = pset1(0); + for(Index i = 0; i < lhs.cols(); ++i) + res = pmadd(lhs.template packet(row, i), pset1(rhs.coeff(i, col)), res); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COEFFBASED_PRODUCT_H From d6d257624d62ba57ff80267e354b7a2cb9ce077a Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 14:12:18 +0200 Subject: [PATCH 16/25] add more eigen files --- tools/Eigen/src/Core/PermutationMatrix.h | 721 +++++++++ tools/Eigen/src/Core/Random.h | 152 ++ tools/Eigen/src/Core/Redux.h | 409 +++++ tools/Eigen/src/Core/Ref.h | 278 ++++ tools/Eigen/src/Core/Replicate.h | 177 +++ .../Core/products/GeneralBlockPanelKernel.h | 1341 +++++++++++++++++ .../src/Core/products/GeneralMatrixMatrix.h | 433 ++++++ .../products/GeneralMatrixMatrixTriangular.h | 278 ++++ .../GeneralMatrixMatrixTriangular_MKL.h | 146 ++ .../Core/products/GeneralMatrixMatrix_MKL.h | 118 ++ .../src/Core/products/GeneralMatrixVector.h | 566 +++++++ .../Core/products/GeneralMatrixVector_MKL.h | 131 ++ tools/Eigen/src/Core/products/Parallelizer.h | 162 ++ .../Core/products/SelfadjointMatrixMatrix.h | 436 ++++++ .../products/SelfadjointMatrixMatrix_MKL.h | 295 ++++ .../Core/products/SelfadjointMatrixVector.h | 281 ++++ .../products/SelfadjointMatrixVector_MKL.h | 114 ++ .../src/Core/products/SelfadjointProduct.h | 123 ++ .../Core/products/SelfadjointRank2Update.h | 93 ++ .../Core/products/TriangularMatrixMatrix.h | 427 ++++++ .../products/TriangularMatrixMatrix_MKL.h | 309 ++++ .../Core/products/TriangularMatrixVector.h | 348 +++++ .../products/TriangularMatrixVector_MKL.h | 247 +++ .../Core/products/TriangularSolverMatrix.h | 332 ++++ .../products/TriangularSolverMatrix_MKL.h | 155 ++ .../Core/products/TriangularSolverVector.h | 139 ++ 26 files changed, 8211 insertions(+) create mode 100644 tools/Eigen/src/Core/PermutationMatrix.h create mode 100644 tools/Eigen/src/Core/Random.h create mode 100644 tools/Eigen/src/Core/Redux.h create mode 100644 tools/Eigen/src/Core/Ref.h create mode 100644 tools/Eigen/src/Core/Replicate.h create mode 100644 tools/Eigen/src/Core/products/GeneralBlockPanelKernel.h create mode 100644 tools/Eigen/src/Core/products/GeneralMatrixMatrix.h create mode 100644 tools/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h create mode 100644 tools/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h create mode 100644 tools/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h create mode 100644 tools/Eigen/src/Core/products/GeneralMatrixVector.h create mode 100644 tools/Eigen/src/Core/products/GeneralMatrixVector_MKL.h create mode 100644 tools/Eigen/src/Core/products/Parallelizer.h create mode 100644 tools/Eigen/src/Core/products/SelfadjointMatrixMatrix.h create mode 100644 tools/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h create mode 100644 tools/Eigen/src/Core/products/SelfadjointMatrixVector.h create mode 100644 tools/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h create mode 100644 tools/Eigen/src/Core/products/SelfadjointProduct.h create mode 100644 tools/Eigen/src/Core/products/SelfadjointRank2Update.h create mode 100644 tools/Eigen/src/Core/products/TriangularMatrixMatrix.h create mode 100644 tools/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h create mode 100644 tools/Eigen/src/Core/products/TriangularMatrixVector.h create mode 100644 tools/Eigen/src/Core/products/TriangularMatrixVector_MKL.h create mode 100644 tools/Eigen/src/Core/products/TriangularSolverMatrix.h create mode 100644 tools/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h create mode 100644 tools/Eigen/src/Core/products/TriangularSolverVector.h diff --git a/tools/Eigen/src/Core/PermutationMatrix.h b/tools/Eigen/src/Core/PermutationMatrix.h new file mode 100644 index 0000000..85ffae2 --- /dev/null +++ b/tools/Eigen/src/Core/PermutationMatrix.h @@ -0,0 +1,721 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Benoit Jacob +// Copyright (C) 2009-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PERMUTATIONMATRIX_H +#define EIGEN_PERMUTATIONMATRIX_H + +namespace Eigen { + +template class PermutedImpl; + +/** \class PermutationBase + * \ingroup Core_Module + * + * \brief Base class for permutations + * + * \param Derived the derived class + * + * This class is the base class for all expressions representing a permutation matrix, + * internally stored as a vector of integers. + * The convention followed here is that if \f$ \sigma \f$ is a permutation, the corresponding permutation matrix + * \f$ P_\sigma \f$ is such that if \f$ (e_1,\ldots,e_p) \f$ is the canonical basis, we have: + * \f[ P_\sigma(e_i) = e_{\sigma(i)}. \f] + * This convention ensures that for any two permutations \f$ \sigma, \tau \f$, we have: + * \f[ P_{\sigma\circ\tau} = P_\sigma P_\tau. \f] + * + * Permutation matrices are square and invertible. + * + * Notice that in addition to the member functions and operators listed here, there also are non-member + * operator* to multiply any kind of permutation object with any kind of matrix expression (MatrixBase) + * on either side. + * + * \sa class PermutationMatrix, class PermutationWrapper + */ + +namespace internal { + +template +struct permut_matrix_product_retval; +template +struct permut_sparsematrix_product_retval; +enum PermPermProduct_t {PermPermProduct}; + +} // end namespace internal + +template +class PermutationBase : public EigenBase +{ + typedef internal::traits Traits; + typedef EigenBase Base; + public: + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; + enum { + Flags = Traits::Flags, + CoeffReadCost = Traits::CoeffReadCost, + RowsAtCompileTime = Traits::RowsAtCompileTime, + ColsAtCompileTime = Traits::ColsAtCompileTime, + MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = Traits::MaxColsAtCompileTime + }; + typedef typename Traits::Scalar Scalar; + typedef typename Traits::Index Index; + typedef Matrix + DenseMatrixType; + typedef PermutationMatrix + PlainPermutationType; + using Base::derived; + #endif + + /** Copies the other permutation into *this */ + template + Derived& operator=(const PermutationBase& other) + { + indices() = other.indices(); + return derived(); + } + + /** Assignment from the Transpositions \a tr */ + template + Derived& operator=(const TranspositionsBase& tr) + { + setIdentity(tr.size()); + for(Index k=size()-1; k>=0; --k) + applyTranspositionOnTheRight(k,tr.coeff(k)); + return derived(); + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + Derived& operator=(const PermutationBase& other) + { + indices() = other.indices(); + return derived(); + } + #endif + + /** \returns the number of rows */ + inline Index rows() const { return Index(indices().size()); } + + /** \returns the number of columns */ + inline Index cols() const { return Index(indices().size()); } + + /** \returns the size of a side of the respective square matrix, i.e., the number of indices */ + inline Index size() const { return Index(indices().size()); } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + void evalTo(MatrixBase& other) const + { + other.setZero(); + for (int i=0; i=0 && j>=0 && i=0 && j>=0 && i inverse() const + { return derived(); } + /** \returns the tranpose permutation matrix. + * + * \note \note_try_to_help_rvo + */ + inline Transpose transpose() const + { return derived(); } + + /**** multiplication helpers to hopefully get RVO ****/ + + +#ifndef EIGEN_PARSED_BY_DOXYGEN + protected: + template + void assignTranspose(const PermutationBase& other) + { + for (int i=0; i + void assignProduct(const Lhs& lhs, const Rhs& rhs) + { + eigen_assert(lhs.cols() == rhs.rows()); + for (int i=0; i + inline PlainPermutationType operator*(const PermutationBase& other) const + { return PlainPermutationType(internal::PermPermProduct, derived(), other.derived()); } + + /** \returns the product of a permutation with another inverse permutation. + * + * \note \note_try_to_help_rvo + */ + template + inline PlainPermutationType operator*(const Transpose >& other) const + { return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); } + + /** \returns the product of an inverse permutation with another permutation. + * + * \note \note_try_to_help_rvo + */ + template friend + inline PlainPermutationType operator*(const Transpose >& other, const PermutationBase& perm) + { return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); } + + /** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation. + * + * This function is O(\c n) procedure allocating a buffer of \c n booleans. + */ + Index determinant() const + { + Index res = 1; + Index n = size(); + Matrix mask(n); + mask.fill(false); + Index r = 0; + while(r < n) + { + // search for the next seed + while(r=n) + break; + // we got one, let's follow it until we are back to the seed + Index k0 = r++; + mask.coeffRef(k0) = true; + for(Index k=indices().coeff(k0); k!=k0; k=indices().coeff(k)) + { + mask.coeffRef(k) = true; + res = -res; + } + } + return res; + } + + protected: + +}; + +/** \class PermutationMatrix + * \ingroup Core_Module + * + * \brief Permutation matrix + * + * \param SizeAtCompileTime the number of rows/cols, or Dynamic + * \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it. + * \param IndexType the interger type of the indices + * + * This class represents a permutation matrix, internally stored as a vector of integers. + * + * \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix + */ + +namespace internal { +template +struct traits > + : traits > +{ + typedef IndexType Index; + typedef Matrix IndicesType; +}; +} + +template +class PermutationMatrix : public PermutationBase > +{ + typedef PermutationBase Base; + typedef internal::traits Traits; + public: + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; + #endif + + inline PermutationMatrix() + {} + + /** Constructs an uninitialized permutation matrix of given size. + */ + inline PermutationMatrix(int size) : m_indices(size) + {} + + /** Copy constructor. */ + template + inline PermutationMatrix(const PermutationBase& other) + : m_indices(other.indices()) {} + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** Standard copy constructor. Defined only to prevent a default copy constructor + * from hiding the other templated constructor */ + inline PermutationMatrix(const PermutationMatrix& other) : m_indices(other.indices()) {} + #endif + + /** Generic constructor from expression of the indices. The indices + * array has the meaning that the permutations sends each integer i to indices[i]. + * + * \warning It is your responsibility to check that the indices array that you passes actually + * describes a permutation, i.e., each value between 0 and n-1 occurs exactly once, where n is the + * array's size. + */ + template + explicit inline PermutationMatrix(const MatrixBase& a_indices) : m_indices(a_indices) + {} + + /** Convert the Transpositions \a tr to a permutation matrix */ + template + explicit PermutationMatrix(const TranspositionsBase& tr) + : m_indices(tr.size()) + { + *this = tr; + } + + /** Copies the other permutation into *this */ + template + PermutationMatrix& operator=(const PermutationBase& other) + { + m_indices = other.indices(); + return *this; + } + + /** Assignment from the Transpositions \a tr */ + template + PermutationMatrix& operator=(const TranspositionsBase& tr) + { + return Base::operator=(tr.derived()); + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + PermutationMatrix& operator=(const PermutationMatrix& other) + { + m_indices = other.m_indices; + return *this; + } + #endif + + /** const version of indices(). */ + const IndicesType& indices() const { return m_indices; } + /** \returns a reference to the stored array representing the permutation. */ + IndicesType& indices() { return m_indices; } + + + /**** multiplication helpers to hopefully get RVO ****/ + +#ifndef EIGEN_PARSED_BY_DOXYGEN + template + PermutationMatrix(const Transpose >& other) + : m_indices(other.nestedPermutation().size()) + { + for (int i=0; i + PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs) + : m_indices(lhs.indices().size()) + { + Base::assignProduct(lhs,rhs); + } +#endif + + protected: + + IndicesType m_indices; +}; + + +namespace internal { +template +struct traits,_PacketAccess> > + : traits > +{ + typedef IndexType Index; + typedef Map, _PacketAccess> IndicesType; +}; +} + +template +class Map,_PacketAccess> + : public PermutationBase,_PacketAccess> > +{ + typedef PermutationBase Base; + typedef internal::traits Traits; + public: + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; + typedef typename IndicesType::Scalar Index; + #endif + + inline Map(const Index* indicesPtr) + : m_indices(indicesPtr) + {} + + inline Map(const Index* indicesPtr, Index size) + : m_indices(indicesPtr,size) + {} + + /** Copies the other permutation into *this */ + template + Map& operator=(const PermutationBase& other) + { return Base::operator=(other.derived()); } + + /** Assignment from the Transpositions \a tr */ + template + Map& operator=(const TranspositionsBase& tr) + { return Base::operator=(tr.derived()); } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + Map& operator=(const Map& other) + { + m_indices = other.m_indices; + return *this; + } + #endif + + /** const version of indices(). */ + const IndicesType& indices() const { return m_indices; } + /** \returns a reference to the stored array representing the permutation. */ + IndicesType& indices() { return m_indices; } + + protected: + + IndicesType m_indices; +}; + +/** \class PermutationWrapper + * \ingroup Core_Module + * + * \brief Class to view a vector of integers as a permutation matrix + * + * \param _IndicesType the type of the vector of integer (can be any compatible expression) + * + * This class allows to view any vector expression of integers as a permutation matrix. + * + * \sa class PermutationBase, class PermutationMatrix + */ + +struct PermutationStorage {}; + +template class TranspositionsWrapper; +namespace internal { +template +struct traits > +{ + typedef PermutationStorage StorageKind; + typedef typename _IndicesType::Scalar Scalar; + typedef typename _IndicesType::Scalar Index; + typedef _IndicesType IndicesType; + enum { + RowsAtCompileTime = _IndicesType::SizeAtCompileTime, + ColsAtCompileTime = _IndicesType::SizeAtCompileTime, + MaxRowsAtCompileTime = IndicesType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = IndicesType::MaxColsAtCompileTime, + Flags = 0, + CoeffReadCost = _IndicesType::CoeffReadCost + }; +}; +} + +template +class PermutationWrapper : public PermutationBase > +{ + typedef PermutationBase Base; + typedef internal::traits Traits; + public: + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; + #endif + + inline PermutationWrapper(const IndicesType& a_indices) + : m_indices(a_indices) + {} + + /** const version of indices(). */ + const typename internal::remove_all::type& + indices() const { return m_indices; } + + protected: + + typename IndicesType::Nested m_indices; +}; + +/** \returns the matrix with the permutation applied to the columns. + */ +template +inline const internal::permut_matrix_product_retval +operator*(const MatrixBase& matrix, + const PermutationBase &permutation) +{ + return internal::permut_matrix_product_retval + + (permutation.derived(), matrix.derived()); +} + +/** \returns the matrix with the permutation applied to the rows. + */ +template +inline const internal::permut_matrix_product_retval + +operator*(const PermutationBase &permutation, + const MatrixBase& matrix) +{ + return internal::permut_matrix_product_retval + + (permutation.derived(), matrix.derived()); +} + +namespace internal { + +template +struct traits > +{ + typedef typename MatrixType::PlainObject ReturnType; +}; + +template +struct permut_matrix_product_retval + : public ReturnByValue > +{ + typedef typename remove_all::type MatrixTypeNestedCleaned; + typedef typename MatrixType::Index Index; + + permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix) + : m_permutation(perm), m_matrix(matrix) + {} + + inline Index rows() const { return m_matrix.rows(); } + inline Index cols() const { return m_matrix.cols(); } + + template inline void evalTo(Dest& dst) const + { + const Index n = Side==OnTheLeft ? rows() : cols(); + // FIXME we need an is_same for expression that is not sensitive to constness. For instance + // is_same_xpr, Block >::value should be true. + if( is_same::value + && blas_traits::HasUsableDirectAccess + && blas_traits::HasUsableDirectAccess + && extract_data(dst) == extract_data(m_matrix)) + { + // apply the permutation inplace + Matrix mask(m_permutation.size()); + mask.fill(false); + Index r = 0; + while(r < m_permutation.size()) + { + // search for the next seed + while(r=m_permutation.size()) + break; + // we got one, let's follow it until we are back to the seed + Index k0 = r++; + Index kPrev = k0; + mask.coeffRef(k0) = true; + for(Index k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k)) + { + Block(dst, k) + .swap(Block + (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev)); + + mask.coeffRef(k) = true; + kPrev = k; + } + } + } + else + { + for(int i = 0; i < n; ++i) + { + Block + (dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i) + + = + + Block + (m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i); + } + } + } + + protected: + const PermutationType& m_permutation; + typename MatrixType::Nested m_matrix; +}; + +/* Template partial specialization for transposed/inverse permutations */ + +template +struct traits > > + : traits +{}; + +} // end namespace internal + +template +class Transpose > + : public EigenBase > > +{ + typedef Derived PermutationType; + typedef typename PermutationType::IndicesType IndicesType; + typedef typename PermutationType::PlainPermutationType PlainPermutationType; + public: + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef internal::traits Traits; + typedef typename Derived::DenseMatrixType DenseMatrixType; + enum { + Flags = Traits::Flags, + CoeffReadCost = Traits::CoeffReadCost, + RowsAtCompileTime = Traits::RowsAtCompileTime, + ColsAtCompileTime = Traits::ColsAtCompileTime, + MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = Traits::MaxColsAtCompileTime + }; + typedef typename Traits::Scalar Scalar; + #endif + + Transpose(const PermutationType& p) : m_permutation(p) {} + + inline int rows() const { return m_permutation.rows(); } + inline int cols() const { return m_permutation.cols(); } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + void evalTo(MatrixBase& other) const + { + other.setZero(); + for (int i=0; i friend + inline const internal::permut_matrix_product_retval + operator*(const MatrixBase& matrix, const Transpose& trPerm) + { + return internal::permut_matrix_product_retval(trPerm.m_permutation, matrix.derived()); + } + + /** \returns the matrix with the inverse permutation applied to the rows. + */ + template + inline const internal::permut_matrix_product_retval + operator*(const MatrixBase& matrix) const + { + return internal::permut_matrix_product_retval(m_permutation, matrix.derived()); + } + + const PermutationType& nestedPermutation() const { return m_permutation; } + + protected: + const PermutationType& m_permutation; +}; + +template +const PermutationWrapper MatrixBase::asPermutation() const +{ + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_PERMUTATIONMATRIX_H diff --git a/tools/Eigen/src/Core/Random.h b/tools/Eigen/src/Core/Random.h new file mode 100644 index 0000000..480fea4 --- /dev/null +++ b/tools/Eigen/src/Core/Random.h @@ -0,0 +1,152 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_RANDOM_H +#define EIGEN_RANDOM_H + +namespace Eigen { + +namespace internal { + +template struct scalar_random_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op) + template + inline const Scalar operator() (Index, Index = 0) const { return random(); } +}; + +template +struct functor_traits > +{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false, IsRepeatable = false }; }; + +} // end namespace internal + +/** \returns a random matrix expression + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Random() should be used + * instead. + * + * Example: \include MatrixBase_random_int_int.cpp + * Output: \verbinclude MatrixBase_random_int_int.out + * + * This expression has the "evaluate before nesting" flag so that it will be evaluated into + * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected + * behavior with expressions involving random matrices. + * + * \sa MatrixBase::setRandom(), MatrixBase::Random(Index), MatrixBase::Random() + */ +template +inline const CwiseNullaryOp::Scalar>, Derived> +DenseBase::Random(Index rows, Index cols) +{ + return NullaryExpr(rows, cols, internal::scalar_random_op()); +} + +/** \returns a random vector expression + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Random() should be used + * instead. + * + * Example: \include MatrixBase_random_int.cpp + * Output: \verbinclude MatrixBase_random_int.out + * + * This expression has the "evaluate before nesting" flag so that it will be evaluated into + * a temporary vector whenever it is nested in a larger expression. This prevents unexpected + * behavior with expressions involving random matrices. + * + * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random() + */ +template +inline const CwiseNullaryOp::Scalar>, Derived> +DenseBase::Random(Index size) +{ + return NullaryExpr(size, internal::scalar_random_op()); +} + +/** \returns a fixed-size random matrix or vector expression + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * Example: \include MatrixBase_random.cpp + * Output: \verbinclude MatrixBase_random.out + * + * This expression has the "evaluate before nesting" flag so that it will be evaluated into + * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected + * behavior with expressions involving random matrices. + * + * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random(Index) + */ +template +inline const CwiseNullaryOp::Scalar>, Derived> +DenseBase::Random() +{ + return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op()); +} + +/** Sets all coefficients in this expression to random values. + * + * Example: \include MatrixBase_setRandom.cpp + * Output: \verbinclude MatrixBase_setRandom.out + * + * \sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index) + */ +template +inline Derived& DenseBase::setRandom() +{ + return *this = Random(rows(), cols()); +} + +/** Resizes to the given \a newSize, and sets all coefficients in this expression to random values. + * + * \only_for_vectors + * + * Example: \include Matrix_setRandom_int.cpp + * Output: \verbinclude Matrix_setRandom_int.out + * + * \sa MatrixBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, MatrixBase::Random() + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setRandom(Index newSize) +{ + resize(newSize); + return setRandom(); +} + +/** Resizes to the given size, and sets all coefficients in this expression to random values. + * + * \param nbRows the new number of rows + * \param nbCols the new number of columns + * + * Example: \include Matrix_setRandom_int_int.cpp + * Output: \verbinclude Matrix_setRandom_int_int.out + * + * \sa MatrixBase::setRandom(), setRandom(Index), class CwiseNullaryOp, MatrixBase::Random() + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setRandom(Index nbRows, Index nbCols) +{ + resize(nbRows, nbCols); + return setRandom(); +} + +} // end namespace Eigen + +#endif // EIGEN_RANDOM_H diff --git a/tools/Eigen/src/Core/Redux.h b/tools/Eigen/src/Core/Redux.h new file mode 100644 index 0000000..9b8662a --- /dev/null +++ b/tools/Eigen/src/Core/Redux.h @@ -0,0 +1,409 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REDUX_H +#define EIGEN_REDUX_H + +namespace Eigen { + +namespace internal { + +// TODO +// * implement other kind of vectorization +// * factorize code + +/*************************************************************************** +* Part 1 : the logic deciding a strategy for vectorization and unrolling +***************************************************************************/ + +template +struct redux_traits +{ +public: + enum { + PacketSize = packet_traits::size, + InnerMaxSize = int(Derived::IsRowMajor) + ? Derived::MaxColsAtCompileTime + : Derived::MaxRowsAtCompileTime + }; + + enum { + MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) + && (functor_traits::PacketAccess), + MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit), + MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize + }; + +public: + enum { + Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal) + : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) + : int(DefaultTraversal) + }; + +public: + enum { + Cost = ( Derived::SizeAtCompileTime == Dynamic + || Derived::CoeffReadCost == Dynamic + || (Derived::SizeAtCompileTime!=1 && functor_traits::Cost == Dynamic) + ) ? Dynamic + : Derived::SizeAtCompileTime * Derived::CoeffReadCost + + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, + UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) + }; + +public: + enum { + Unrolling = Cost != Dynamic && Cost <= UnrollingLimit + ? CompleteUnrolling + : NoUnrolling + }; +}; + +/*************************************************************************** +* Part 2 : unrollers +***************************************************************************/ + +/*** no vectorization ***/ + +template +struct redux_novec_unroller +{ + enum { + HalfLength = Length/2 + }; + + typedef typename Derived::Scalar Scalar; + + static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) + { + return func(redux_novec_unroller::run(mat,func), + redux_novec_unroller::run(mat,func)); + } +}; + +template +struct redux_novec_unroller +{ + enum { + outer = Start / Derived::InnerSizeAtCompileTime, + inner = Start % Derived::InnerSizeAtCompileTime + }; + + typedef typename Derived::Scalar Scalar; + + static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&) + { + return mat.coeffByOuterInner(outer, inner); + } +}; + +// This is actually dead code and will never be called. It is required +// to prevent false warnings regarding failed inlining though +// for 0 length run() will never be called at all. +template +struct redux_novec_unroller +{ + typedef typename Derived::Scalar Scalar; + static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); } +}; + +/*** vectorization ***/ + +template +struct redux_vec_unroller +{ + enum { + PacketSize = packet_traits::size, + HalfLength = Length/2 + }; + + typedef typename Derived::Scalar Scalar; + typedef typename packet_traits::type PacketScalar; + + static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func) + { + return func.packetOp( + redux_vec_unroller::run(mat,func), + redux_vec_unroller::run(mat,func) ); + } +}; + +template +struct redux_vec_unroller +{ + enum { + index = Start * packet_traits::size, + outer = index / int(Derived::InnerSizeAtCompileTime), + inner = index % int(Derived::InnerSizeAtCompileTime), + alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned + }; + + typedef typename Derived::Scalar Scalar; + typedef typename packet_traits::type PacketScalar; + + static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&) + { + return mat.template packetByOuterInner(outer, inner); + } +}; + +/*************************************************************************** +* Part 3 : implementation of all cases +***************************************************************************/ + +template::Traversal, + int Unrolling = redux_traits::Unrolling +> +struct redux_impl; + +template +struct redux_impl +{ + typedef typename Derived::Scalar Scalar; + typedef typename Derived::Index Index; + static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) + { + eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); + Scalar res; + res = mat.coeffByOuterInner(0, 0); + for(Index i = 1; i < mat.innerSize(); ++i) + res = func(res, mat.coeffByOuterInner(0, i)); + for(Index i = 1; i < mat.outerSize(); ++i) + for(Index j = 0; j < mat.innerSize(); ++j) + res = func(res, mat.coeffByOuterInner(i, j)); + return res; + } +}; + +template +struct redux_impl + : public redux_novec_unroller +{}; + +template +struct redux_impl +{ + typedef typename Derived::Scalar Scalar; + typedef typename packet_traits::type PacketScalar; + typedef typename Derived::Index Index; + + static Scalar run(const Derived& mat, const Func& func) + { + const Index size = mat.size(); + eigen_assert(size && "you are using an empty matrix"); + const Index packetSize = packet_traits::size; + const Index alignedStart = internal::first_aligned(mat); + enum { + alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit) + ? Aligned : Unaligned + }; + const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); + const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); + const Index alignedEnd2 = alignedStart + alignedSize2; + const Index alignedEnd = alignedStart + alignedSize; + Scalar res; + if(alignedSize) + { + PacketScalar packet_res0 = mat.template packet(alignedStart); + if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop + { + PacketScalar packet_res1 = mat.template packet(alignedStart+packetSize); + for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize) + { + packet_res0 = func.packetOp(packet_res0, mat.template packet(index)); + packet_res1 = func.packetOp(packet_res1, mat.template packet(index+packetSize)); + } + + packet_res0 = func.packetOp(packet_res0,packet_res1); + if(alignedEnd>alignedEnd2) + packet_res0 = func.packetOp(packet_res0, mat.template packet(alignedEnd2)); + } + res = func.predux(packet_res0); + + for(Index index = 0; index < alignedStart; ++index) + res = func(res,mat.coeff(index)); + + for(Index index = alignedEnd; index < size; ++index) + res = func(res,mat.coeff(index)); + } + else // too small to vectorize anything. + // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. + { + res = mat.coeff(0); + for(Index index = 1; index < size; ++index) + res = func(res,mat.coeff(index)); + } + + return res; + } +}; + +// NOTE: for SliceVectorizedTraversal we simply bypass unrolling +template +struct redux_impl +{ + typedef typename Derived::Scalar Scalar; + typedef typename packet_traits::type PacketScalar; + typedef typename Derived::Index Index; + + static Scalar run(const Derived& mat, const Func& func) + { + eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); + const Index innerSize = mat.innerSize(); + const Index outerSize = mat.outerSize(); + enum { + packetSize = packet_traits::size + }; + const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize; + Scalar res; + if(packetedInnerSize) + { + PacketScalar packet_res = mat.template packet(0,0); + for(Index j=0; j(j,i)); + + res = func.predux(packet_res); + for(Index j=0; j::run(mat, func); + } + + return res; + } +}; + +template +struct redux_impl +{ + typedef typename Derived::Scalar Scalar; + typedef typename packet_traits::type PacketScalar; + enum { + PacketSize = packet_traits::size, + Size = Derived::SizeAtCompileTime, + VectorizedSize = (Size / PacketSize) * PacketSize + }; + static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) + { + eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); + Scalar res = func.predux(redux_vec_unroller::run(mat,func)); + if (VectorizedSize != Size) + res = func(res,redux_novec_unroller::run(mat,func)); + return res; + } +}; + +} // end namespace internal + +/*************************************************************************** +* Part 4 : public API +***************************************************************************/ + + +/** \returns the result of a full redux operation on the whole matrix or vector using \a func + * + * The template parameter \a BinaryOp is the type of the functor \a func which must be + * an associative operator. Both current STL and TR1 functor styles are handled. + * + * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() + */ +template +template +EIGEN_STRONG_INLINE typename internal::result_of::Scalar)>::type +DenseBase::redux(const Func& func) const +{ + typedef typename internal::remove_all::type ThisNested; + return internal::redux_impl + ::run(derived(), func); +} + +/** \returns the minimum of all coefficients of \c *this. + * \warning the result is undefined if \c *this contains NaN. + */ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +DenseBase::minCoeff() const +{ + return this->redux(Eigen::internal::scalar_min_op()); +} + +/** \returns the maximum of all coefficients of \c *this. + * \warning the result is undefined if \c *this contains NaN. + */ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +DenseBase::maxCoeff() const +{ + return this->redux(Eigen::internal::scalar_max_op()); +} + +/** \returns the sum of all coefficients of *this + * + * \sa trace(), prod(), mean() + */ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +DenseBase::sum() const +{ + if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) + return Scalar(0); + return this->redux(Eigen::internal::scalar_sum_op()); +} + +/** \returns the mean of all coefficients of *this +* +* \sa trace(), prod(), sum() +*/ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +DenseBase::mean() const +{ + return Scalar(this->redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); +} + +/** \returns the product of all coefficients of *this + * + * Example: \include MatrixBase_prod.cpp + * Output: \verbinclude MatrixBase_prod.out + * + * \sa sum(), mean(), trace() + */ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +DenseBase::prod() const +{ + if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) + return Scalar(1); + return this->redux(Eigen::internal::scalar_product_op()); +} + +/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. + * + * \c *this can be any matrix, not necessarily square. + * + * \sa diagonal(), sum() + */ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +MatrixBase::trace() const +{ + return derived().diagonal().sum(); +} + +} // end namespace Eigen + +#endif // EIGEN_REDUX_H diff --git a/tools/Eigen/src/Core/Ref.h b/tools/Eigen/src/Core/Ref.h new file mode 100644 index 0000000..7a3beca --- /dev/null +++ b/tools/Eigen/src/Core/Ref.h @@ -0,0 +1,278 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REF_H +#define EIGEN_REF_H + +namespace Eigen { + +template class RefBase; +template,OuterStride<> >::type > class Ref; + +/** \class Ref + * \ingroup Core_Module + * + * \brief A matrix or vector expression mapping an existing expressions + * + * \tparam PlainObjectType the equivalent matrix type of the mapped data + * \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned. + * The default is \c #Unaligned. + * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1), + * but accept a variable outer stride (leading dimension). + * This can be overridden by specifying strides. + * The type passed here must be a specialization of the Stride template, see examples below. + * + * This class permits to write non template functions taking Eigen's object as parameters while limiting the number of copies. + * A Ref<> object can represent either a const expression or a l-value: + * \code + * // in-out argument: + * void foo1(Ref x); + * + * // read-only const argument: + * void foo2(const Ref& x); + * \endcode + * + * In the in-out case, the input argument must satisfies the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered. + * By default, a Ref can reference any dense vector expression of float having a contiguous memory layout. + * Likewise, a Ref can reference any column major dense matrix expression of float whose column's elements are contiguously stored with + * the possibility to have a constant space inbetween each column, i.e.: the inner stride mmust be equal to 1, but the outer-stride (or leading dimension), + * can be greater than the number of rows. + * + * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function. + * Here are some examples: + * \code + * MatrixXf A; + * VectorXf a; + * foo1(a.head()); // OK + * foo1(A.col()); // OK + * foo1(A.row()); // compilation error because here innerstride!=1 + * foo2(A.row()); // The row is copied into a contiguous temporary + * foo2(2*a); // The expression is evaluated into a temporary + * foo2(A.col().segment(2,4)); // No temporary + * \endcode + * + * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameter. + * Here is an example accepting an innerstride!=1: + * \code + * // in-out argument: + * void foo3(Ref > x); + * foo3(A.row()); // OK + * \endcode + * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involved more + * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overloads internally calling a + * template function, e.g.: + * \code + * // in the .h: + * void foo(const Ref& A); + * void foo(const Ref >& A); + * + * // in the .cpp: + * template void foo_impl(const TypeOfA& A) { + * ... // crazy code goes here + * } + * void foo(const Ref& A) { foo_impl(A); } + * void foo(const Ref >& A) { foo_impl(A); } + * \endcode + * + * + * \sa PlainObjectBase::Map(), \ref TopicStorageOrders + */ + +namespace internal { + +template +struct traits > + : public traits > +{ + typedef _PlainObjectType PlainObjectType; + typedef _StrideType StrideType; + enum { + Options = _Options, + Flags = traits >::Flags | NestByRefBit + }; + + template struct match { + enum { + HasDirectAccess = internal::has_direct_access::ret, + StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)), + InnerStrideMatch = int(StrideType::InnerStrideAtCompileTime)==int(Dynamic) + || int(StrideType::InnerStrideAtCompileTime)==int(Derived::InnerStrideAtCompileTime) + || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1), + OuterStrideMatch = Derived::IsVectorAtCompileTime + || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime), + AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits::Flags&AlignedBit)==AlignedBit), + ScalarTypeMatch = internal::is_same::value, + MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch + }; + typedef typename internal::conditional::type type; + }; + +}; + +template +struct traits > : public traits {}; + +} + +template class RefBase + : public MapBase +{ + typedef typename internal::traits::PlainObjectType PlainObjectType; + typedef typename internal::traits::StrideType StrideType; + +public: + + typedef MapBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(RefBase) + + inline Index innerStride() const + { + return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; + } + + inline Index outerStride() const + { + return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() + : IsVectorAtCompileTime ? this->size() + : int(Flags)&RowMajorBit ? this->cols() + : this->rows(); + } + + RefBase() + : Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime), + // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values: + m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime, + StrideType::InnerStrideAtCompileTime==Dynamic?0:StrideType::InnerStrideAtCompileTime) + {} + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(RefBase) + +protected: + + typedef Stride StrideBase; + + template + void construct(Expression& expr) + { + if(PlainObjectType::RowsAtCompileTime==1) + { + eigen_assert(expr.rows()==1 || expr.cols()==1); + ::new (static_cast(this)) Base(expr.data(), 1, expr.size()); + } + else if(PlainObjectType::ColsAtCompileTime==1) + { + eigen_assert(expr.rows()==1 || expr.cols()==1); + ::new (static_cast(this)) Base(expr.data(), expr.size(), 1); + } + else + ::new (static_cast(this)) Base(expr.data(), expr.rows(), expr.cols()); + + if(Expression::IsVectorAtCompileTime && (!PlainObjectType::IsVectorAtCompileTime) && ((Expression::Flags&RowMajorBit)!=(PlainObjectType::Flags&RowMajorBit))) + ::new (&m_stride) StrideBase(expr.innerStride(), StrideType::InnerStrideAtCompileTime==0?0:1); + else + ::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(), + StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride()); + } + + StrideBase m_stride; +}; + + +template class Ref + : public RefBase > +{ + private: + typedef internal::traits Traits; + template + inline Ref(const PlainObjectBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0); + public: + + typedef RefBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Ref) + + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + inline Ref(PlainObjectBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) + { + EIGEN_STATIC_ASSERT(static_cast(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + Base::construct(expr.derived()); + } + template + inline Ref(const DenseBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) + #else + template + inline Ref(DenseBase& expr) + #endif + { + EIGEN_STATIC_ASSERT(static_cast(internal::is_lvalue::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + EIGEN_STATIC_ASSERT(static_cast(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + enum { THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY = Derived::ThisConstantIsPrivateInPlainObjectBase}; + Base::construct(expr.const_cast_derived()); + } + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Ref) + +}; + +// this is the const ref version +template class Ref + : public RefBase > +{ + typedef internal::traits Traits; + public: + + typedef RefBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Ref) + + template + inline Ref(const DenseBase& expr, + typename internal::enable_if::ScalarTypeMatch),Derived>::type* = 0) + { +// std::cout << match_helper::HasDirectAccess << "," << match_helper::OuterStrideMatch << "," << match_helper::InnerStrideMatch << "\n"; +// std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; +// std::cout << int(StrideType::InnerStrideAtCompileTime) << " - " << int(Derived::InnerStrideAtCompileTime) << "\n"; + construct(expr.derived(), typename Traits::template match::type()); + } + + inline Ref(const Ref& other) : Base(other) { + // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy + } + + template + inline Ref(const RefBase& other) { + construct(other.derived(), typename Traits::template match::type()); + } + + protected: + + template + void construct(const Expression& expr,internal::true_type) + { + Base::construct(expr); + } + + template + void construct(const Expression& expr, internal::false_type) + { + m_object.lazyAssign(expr); + Base::construct(m_object); + } + + protected: + TPlainObjectType m_object; +}; + +} // end namespace Eigen + +#endif // EIGEN_REF_H diff --git a/tools/Eigen/src/Core/Replicate.h b/tools/Eigen/src/Core/Replicate.h new file mode 100644 index 0000000..ac4537c --- /dev/null +++ b/tools/Eigen/src/Core/Replicate.h @@ -0,0 +1,177 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REPLICATE_H +#define EIGEN_REPLICATE_H + +namespace Eigen { + +/** + * \class Replicate + * \ingroup Core_Module + * + * \brief Expression of the multiple replication of a matrix or vector + * + * \param MatrixType the type of the object we are replicating + * + * This class represents an expression of the multiple replication of a matrix or vector. + * It is the return type of DenseBase::replicate() and most of the time + * this is the only way it is used. + * + * \sa DenseBase::replicate() + */ + +namespace internal { +template +struct traits > + : traits +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; + enum { + Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor + }; + typedef typename nested::type MatrixTypeNested; + typedef typename remove_reference::type _MatrixTypeNested; + enum { + RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic + ? Dynamic + : RowFactor * MatrixType::RowsAtCompileTime, + ColsAtCompileTime = ColFactor==Dynamic || int(MatrixType::ColsAtCompileTime)==Dynamic + ? Dynamic + : ColFactor * MatrixType::ColsAtCompileTime, + //FIXME we don't propagate the max sizes !!! + MaxRowsAtCompileTime = RowsAtCompileTime, + MaxColsAtCompileTime = ColsAtCompileTime, + IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1 + : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0 + : (MatrixType::Flags & RowMajorBit) ? 1 : 0, + Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0), + CoeffReadCost = _MatrixTypeNested::CoeffReadCost + }; +}; +} + +template class Replicate + : public internal::dense_xpr_base< Replicate >::type +{ + typedef typename internal::traits::MatrixTypeNested MatrixTypeNested; + typedef typename internal::traits::_MatrixTypeNested _MatrixTypeNested; + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Replicate) + + template + inline explicit Replicate(const OriginalMatrixType& a_matrix) + : m_matrix(a_matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor) + { + EIGEN_STATIC_ASSERT((internal::is_same::type,OriginalMatrixType>::value), + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) + eigen_assert(RowFactor!=Dynamic && ColFactor!=Dynamic); + } + + template + inline Replicate(const OriginalMatrixType& a_matrix, Index rowFactor, Index colFactor) + : m_matrix(a_matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) + { + EIGEN_STATIC_ASSERT((internal::is_same::type,OriginalMatrixType>::value), + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) + } + + inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); } + inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); } + + inline Scalar coeff(Index rowId, Index colId) const + { + // try to avoid using modulo; this is a pure optimization strategy + const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 + : RowFactor==1 ? rowId + : rowId%m_matrix.rows(); + const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 + : ColFactor==1 ? colId + : colId%m_matrix.cols(); + + return m_matrix.coeff(actual_row, actual_col); + } + template + inline PacketScalar packet(Index rowId, Index colId) const + { + const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 + : RowFactor==1 ? rowId + : rowId%m_matrix.rows(); + const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 + : ColFactor==1 ? colId + : colId%m_matrix.cols(); + + return m_matrix.template packet(actual_row, actual_col); + } + + const _MatrixTypeNested& nestedExpression() const + { + return m_matrix; + } + + protected: + MatrixTypeNested m_matrix; + const internal::variable_if_dynamic m_rowFactor; + const internal::variable_if_dynamic m_colFactor; +}; + +/** + * \return an expression of the replication of \c *this + * + * Example: \include MatrixBase_replicate.cpp + * Output: \verbinclude MatrixBase_replicate.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(Index,Index), class Replicate + */ +template +template +const Replicate +DenseBase::replicate() const +{ + return Replicate(derived()); +} + +/** + * \return an expression of the replication of \c *this + * + * Example: \include MatrixBase_replicate_int_int.cpp + * Output: \verbinclude MatrixBase_replicate_int_int.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate + */ +template +const typename DenseBase::ReplicateReturnType +DenseBase::replicate(Index rowFactor,Index colFactor) const +{ + return Replicate(derived(),rowFactor,colFactor); +} + +/** + * \return an expression of the replication of each column (or row) of \c *this + * + * Example: \include DirectionWise_replicate_int.cpp + * Output: \verbinclude DirectionWise_replicate_int.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate + */ +template +const typename VectorwiseOp::ReplicateReturnType +VectorwiseOp::replicate(Index factor) const +{ + return typename VectorwiseOp::ReplicateReturnType + (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1); +} + +} // end namespace Eigen + +#endif // EIGEN_REPLICATE_H diff --git a/tools/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/tools/Eigen/src/Core/products/GeneralBlockPanelKernel.h new file mode 100644 index 0000000..bcdca5b --- /dev/null +++ b/tools/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -0,0 +1,1341 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GENERAL_BLOCK_PANEL_H +#define EIGEN_GENERAL_BLOCK_PANEL_H + +namespace Eigen { + +namespace internal { + +template +class gebp_traits; + + +/** \internal \returns b if a<=0, and returns a otherwise. */ +inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b) +{ + return a<=0 ? b : a; +} + +/** \internal */ +inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0) +{ + static std::ptrdiff_t m_l1CacheSize = 0; + static std::ptrdiff_t m_l2CacheSize = 0; + if(m_l2CacheSize==0) + { + m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024); + m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024); + } + + if(action==SetAction) + { + // set the cpu cache size and cache all block sizes from a global cache size in byte + eigen_internal_assert(l1!=0 && l2!=0); + m_l1CacheSize = *l1; + m_l2CacheSize = *l2; + } + else if(action==GetAction) + { + eigen_internal_assert(l1!=0 && l2!=0); + *l1 = m_l1CacheSize; + *l2 = m_l2CacheSize; + } + else + { + eigen_internal_assert(false); + } +} + +/** \brief Computes the blocking parameters for a m x k times k x n matrix product + * + * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension. + * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension. + * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension. + * + * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, + * this function computes the blocking size parameters along the respective dimensions + * for matrix products and related algorithms. The blocking sizes depends on various + * parameters: + * - the L1 and L2 cache sizes, + * - the register level blocking sizes defined by gebp_traits, + * - the number of scalars that fit into a packet (when vectorization is enabled). + * + * \sa setCpuCacheSizes */ +template +void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n) +{ + EIGEN_UNUSED_VARIABLE(n); + // Explanations: + // Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and + // mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed + // per kc x nr vertical small panels where nr is the blocking size along the n dimension + // at the register level. For vectorization purpose, these small vertical panels are unpacked, + // e.g., each coefficient is replicated to fit a packet. This small vertical panel has to + // stay in L1 cache. + std::ptrdiff_t l1, l2; + + typedef gebp_traits Traits; + enum { + kdiv = KcFactor * 2 * Traits::nr + * Traits::RhsProgress * sizeof(RhsScalar), + mr = gebp_traits::mr, + mr_mask = (0xffffffff/mr)*mr + }; + + manage_caching_sizes(GetAction, &l1, &l2); + k = std::min(k, l1/kdiv); + SizeType _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0; + if(_m +inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n) +{ + computeProductBlockingSizes(k, m, n); +} + +#ifdef EIGEN_HAS_FUSE_CJMADD + #define MADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C); +#else + + // FIXME (a bit overkill maybe ?) + + template struct gebp_madd_selector { + EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/) + { + c = cj.pmadd(a,b,c); + } + }; + + template struct gebp_madd_selector { + EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t) + { + t = b; t = cj.pmul(a,t); c = padd(c,t); + } + }; + + template + EIGEN_STRONG_INLINE void gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t) + { + gebp_madd_selector::run(cj,a,b,c,t); + } + + #define MADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T); +// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T); +#endif + +/* Vectorization logic + * real*real: unpack rhs to constant packets, ... + * + * cd*cd : unpack rhs to (b_r,b_r), (b_i,b_i), mul to get (a_r b_r,a_i b_r) (a_r b_i,a_i b_i), + * storing each res packet into two packets (2x2), + * at the end combine them: swap the second and addsub them + * cf*cf : same but with 2x4 blocks + * cplx*real : unpack rhs to constant packets, ... + * real*cplx : load lhs as (a0,a0,a1,a1), and mul as usual + */ +template +class gebp_traits +{ +public: + typedef _LhsScalar LhsScalar; + typedef _RhsScalar RhsScalar; + typedef typename scalar_product_traits::ReturnType ResScalar; + + enum { + ConjLhs = _ConjLhs, + ConjRhs = _ConjRhs, + Vectorizable = packet_traits::Vectorizable && packet_traits::Vectorizable, + LhsPacketSize = Vectorizable ? packet_traits::size : 1, + RhsPacketSize = Vectorizable ? packet_traits::size : 1, + ResPacketSize = Vectorizable ? packet_traits::size : 1, + + NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, + + // register block size along the N direction (must be either 2 or 4) + nr = NumberOfRegisters/4, + + // register block size along the M direction (currently, this one cannot be modified) + mr = 2 * LhsPacketSize, + + WorkSpaceFactor = nr * RhsPacketSize, + + LhsProgress = LhsPacketSize, + RhsProgress = RhsPacketSize + }; + + typedef typename packet_traits::type _LhsPacket; + typedef typename packet_traits::type _RhsPacket; + typedef typename packet_traits::type _ResPacket; + + typedef typename conditional::type LhsPacket; + typedef typename conditional::type RhsPacket; + typedef typename conditional::type ResPacket; + + typedef ResPacket AccPacket; + + EIGEN_STRONG_INLINE void initAcc(AccPacket& p) + { + p = pset1(ResScalar(0)); + } + + EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b) + { + for(DenseIndex k=0; k(&b[k*RhsPacketSize], rhs[k]); + } + + EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const + { + dest = pload(b); + } + + EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const + { + dest = pload(a); + } + + EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, AccPacket& tmp) const + { + tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp); + } + + EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const + { + r = pmadd(c,alpha,r); + } + +protected: +// conj_helper cj; +// conj_helper pcj; +}; + +template +class gebp_traits, RealScalar, _ConjLhs, false> +{ +public: + typedef std::complex LhsScalar; + typedef RealScalar RhsScalar; + typedef typename scalar_product_traits::ReturnType ResScalar; + + enum { + ConjLhs = _ConjLhs, + ConjRhs = false, + Vectorizable = packet_traits::Vectorizable && packet_traits::Vectorizable, + LhsPacketSize = Vectorizable ? packet_traits::size : 1, + RhsPacketSize = Vectorizable ? packet_traits::size : 1, + ResPacketSize = Vectorizable ? packet_traits::size : 1, + + NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, + nr = NumberOfRegisters/4, + mr = 2 * LhsPacketSize, + WorkSpaceFactor = nr*RhsPacketSize, + + LhsProgress = LhsPacketSize, + RhsProgress = RhsPacketSize + }; + + typedef typename packet_traits::type _LhsPacket; + typedef typename packet_traits::type _RhsPacket; + typedef typename packet_traits::type _ResPacket; + + typedef typename conditional::type LhsPacket; + typedef typename conditional::type RhsPacket; + typedef typename conditional::type ResPacket; + + typedef ResPacket AccPacket; + + EIGEN_STRONG_INLINE void initAcc(AccPacket& p) + { + p = pset1(ResScalar(0)); + } + + EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b) + { + for(DenseIndex k=0; k(&b[k*RhsPacketSize], rhs[k]); + } + + EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const + { + dest = pload(b); + } + + EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const + { + dest = pload(a); + } + + EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const + { + madd_impl(a, b, c, tmp, typename conditional::type()); + } + + EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const + { + tmp = b; tmp = pmul(a.v,tmp); c.v = padd(c.v,tmp); + } + + EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const + { + c += a * b; + } + + EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const + { + r = cj.pmadd(c,alpha,r); + } + +protected: + conj_helper cj; +}; + +template +class gebp_traits, std::complex, _ConjLhs, _ConjRhs > +{ +public: + typedef std::complex Scalar; + typedef std::complex LhsScalar; + typedef std::complex RhsScalar; + typedef std::complex ResScalar; + + enum { + ConjLhs = _ConjLhs, + ConjRhs = _ConjRhs, + Vectorizable = packet_traits::Vectorizable + && packet_traits::Vectorizable, + RealPacketSize = Vectorizable ? packet_traits::size : 1, + ResPacketSize = Vectorizable ? packet_traits::size : 1, + + nr = 2, + mr = 2 * ResPacketSize, + WorkSpaceFactor = Vectorizable ? 2*nr*RealPacketSize : nr, + + LhsProgress = ResPacketSize, + RhsProgress = Vectorizable ? 2*ResPacketSize : 1 + }; + + typedef typename packet_traits::type RealPacket; + typedef typename packet_traits::type ScalarPacket; + struct DoublePacket + { + RealPacket first; + RealPacket second; + }; + + typedef typename conditional::type LhsPacket; + typedef typename conditional::type RhsPacket; + typedef typename conditional::type ResPacket; + typedef typename conditional::type AccPacket; + + EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); } + + EIGEN_STRONG_INLINE void initAcc(DoublePacket& p) + { + p.first = pset1(RealScalar(0)); + p.second = pset1(RealScalar(0)); + } + + /* Unpack the rhs coeff such that each complex coefficient is spread into + * two packects containing respectively the real and imaginary coefficient + * duplicated as many time as needed: (x+iy) => [x, ..., x] [y, ..., y] + */ + EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const Scalar* rhs, Scalar* b) + { + for(DenseIndex k=0; k((RealScalar*)&b[k*ResPacketSize*2+0], real(rhs[k])); + pstore1((RealScalar*)&b[k*ResPacketSize*2+ResPacketSize], imag(rhs[k])); + } + else + b[k] = rhs[k]; + } + } + + EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, ResPacket& dest) const { dest = *b; } + + EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacket& dest) const + { + dest.first = pload((const RealScalar*)b); + dest.second = pload((const RealScalar*)(b+ResPacketSize)); + } + + // nothing special here + EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const + { + dest = pload((const typename unpacket_traits::type*)(a)); + } + + EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, DoublePacket& c, RhsPacket& /*tmp*/) const + { + c.first = padd(pmul(a,b.first), c.first); + c.second = padd(pmul(a,b.second),c.second); + } + + EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, ResPacket& c, RhsPacket& /*tmp*/) const + { + c = cj.pmadd(a,b,c); + } + + EIGEN_STRONG_INLINE void acc(const Scalar& c, const Scalar& alpha, Scalar& r) const { r += alpha * c; } + + EIGEN_STRONG_INLINE void acc(const DoublePacket& c, const ResPacket& alpha, ResPacket& r) const + { + // assemble c + ResPacket tmp; + if((!ConjLhs)&&(!ConjRhs)) + { + tmp = pcplxflip(pconj(ResPacket(c.second))); + tmp = padd(ResPacket(c.first),tmp); + } + else if((!ConjLhs)&&(ConjRhs)) + { + tmp = pconj(pcplxflip(ResPacket(c.second))); + tmp = padd(ResPacket(c.first),tmp); + } + else if((ConjLhs)&&(!ConjRhs)) + { + tmp = pcplxflip(ResPacket(c.second)); + tmp = padd(pconj(ResPacket(c.first)),tmp); + } + else if((ConjLhs)&&(ConjRhs)) + { + tmp = pcplxflip(ResPacket(c.second)); + tmp = psub(pconj(ResPacket(c.first)),tmp); + } + + r = pmadd(tmp,alpha,r); + } + +protected: + conj_helper cj; +}; + +template +class gebp_traits, false, _ConjRhs > +{ +public: + typedef std::complex Scalar; + typedef RealScalar LhsScalar; + typedef Scalar RhsScalar; + typedef Scalar ResScalar; + + enum { + ConjLhs = false, + ConjRhs = _ConjRhs, + Vectorizable = packet_traits::Vectorizable + && packet_traits::Vectorizable, + LhsPacketSize = Vectorizable ? packet_traits::size : 1, + RhsPacketSize = Vectorizable ? packet_traits::size : 1, + ResPacketSize = Vectorizable ? packet_traits::size : 1, + + NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, + nr = 4, + mr = 2*ResPacketSize, + WorkSpaceFactor = nr*RhsPacketSize, + + LhsProgress = ResPacketSize, + RhsProgress = ResPacketSize + }; + + typedef typename packet_traits::type _LhsPacket; + typedef typename packet_traits::type _RhsPacket; + typedef typename packet_traits::type _ResPacket; + + typedef typename conditional::type LhsPacket; + typedef typename conditional::type RhsPacket; + typedef typename conditional::type ResPacket; + + typedef ResPacket AccPacket; + + EIGEN_STRONG_INLINE void initAcc(AccPacket& p) + { + p = pset1(ResScalar(0)); + } + + EIGEN_STRONG_INLINE void unpackRhs(DenseIndex n, const RhsScalar* rhs, RhsScalar* b) + { + for(DenseIndex k=0; k(&b[k*RhsPacketSize], rhs[k]); + } + + EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const + { + dest = pload(b); + } + + EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const + { + dest = ploaddup(a); + } + + EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const + { + madd_impl(a, b, c, tmp, typename conditional::type()); + } + + EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const + { + tmp = b; tmp.v = pmul(a,tmp.v); c = padd(c,tmp); + } + + EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const + { + c += a * b; + } + + EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const + { + r = cj.pmadd(alpha,c,r); + } + +protected: + conj_helper cj; +}; + +/* optimized GEneral packed Block * packed Panel product kernel + * + * Mixing type logic: C += A * B + * | A | B | comments + * |real |cplx | no vectorization yet, would require to pack A with duplication + * |cplx |real | easy vectorization + */ +template +struct gebp_kernel +{ + typedef gebp_traits Traits; + typedef typename Traits::ResScalar ResScalar; + typedef typename Traits::LhsPacket LhsPacket; + typedef typename Traits::RhsPacket RhsPacket; + typedef typename Traits::ResPacket ResPacket; + typedef typename Traits::AccPacket AccPacket; + + enum { + Vectorizable = Traits::Vectorizable, + LhsProgress = Traits::LhsProgress, + RhsProgress = Traits::RhsProgress, + ResPacketSize = Traits::ResPacketSize + }; + + EIGEN_DONT_INLINE + void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha, + Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB=0); +}; + +template +EIGEN_DONT_INLINE +void gebp_kernel + ::operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha, + Index strideA, Index strideB, Index offsetA, Index offsetB, RhsScalar* unpackedB) + { + Traits traits; + + if(strideA==-1) strideA = depth; + if(strideB==-1) strideB = depth; + conj_helper cj; +// conj_helper pcj; + Index packet_cols = (cols/nr) * nr; + const Index peeled_mc = (rows/mr)*mr; + // FIXME: + const Index peeled_mc2 = peeled_mc + (rows-peeled_mc >= LhsProgress ? LhsProgress : 0); + const Index peeled_kc = (depth/4)*4; + + if(unpackedB==0) + unpackedB = const_cast(blockB - strideB * nr * RhsProgress); + + // loops on each micro vertical panel of rhs (depth x nr) + for(Index j2=0; j2 we select a mr x nr micro block of res which is entirely + // stored into mr/packet_size x nr registers. + for(Index i=0; i(alpha); + + R0 = ploadu(r0); + R1 = ploadu(r1); + R2 = ploadu(r2); + R3 = ploadu(r3); + R4 = ploadu(r0 + ResPacketSize); + R5 = ploadu(r1 + ResPacketSize); + R6 = ploadu(r2 + ResPacketSize); + traits.acc(C0, alphav, R0); + pstoreu(r0, R0); + R0 = ploadu(r3 + ResPacketSize); + + traits.acc(C1, alphav, R1); + traits.acc(C2, alphav, R2); + traits.acc(C3, alphav, R3); + traits.acc(C4, alphav, R4); + traits.acc(C5, alphav, R5); + traits.acc(C6, alphav, R6); + traits.acc(C7, alphav, R0); + + pstoreu(r1, R1); + pstoreu(r2, R2); + pstoreu(r3, R3); + pstoreu(r0 + ResPacketSize, R4); + pstoreu(r1 + ResPacketSize, R5); + pstoreu(r2 + ResPacketSize, R6); + pstoreu(r3 + ResPacketSize, R0); + } + else + { + ResPacket R0, R1, R4; + ResPacket alphav = pset1(alpha); + + R0 = ploadu(r0); + R1 = ploadu(r1); + R4 = ploadu(r0 + ResPacketSize); + traits.acc(C0, alphav, R0); + pstoreu(r0, R0); + R0 = ploadu(r1 + ResPacketSize); + traits.acc(C1, alphav, R1); + traits.acc(C4, alphav, R4); + traits.acc(C5, alphav, R0); + pstoreu(r1, R1); + pstoreu(r0 + ResPacketSize, R4); + pstoreu(r1 + ResPacketSize, R0); + } + + } + + if(rows-peeled_mc>=LhsProgress) + { + Index i = peeled_mc; + const LhsScalar* blA = &blockA[i*strideA+offsetA*LhsProgress]; + prefetch(&blA[0]); + + // gets res block as register + AccPacket C0, C1, C2, C3; + traits.initAcc(C0); + traits.initAcc(C1); + if(nr==4) traits.initAcc(C2); + if(nr==4) traits.initAcc(C3); + + // performs "inner" product + const RhsScalar* blB = unpackedB; + for(Index k=0; k(alpha); + + ResScalar* r0 = &res[(j2+0)*resStride + i]; + ResScalar* r1 = r0 + resStride; + ResScalar* r2 = r1 + resStride; + ResScalar* r3 = r2 + resStride; + + R0 = ploadu(r0); + R1 = ploadu(r1); + if(nr==4) R2 = ploadu(r2); + if(nr==4) R3 = ploadu(r3); + + traits.acc(C0, alphav, R0); + traits.acc(C1, alphav, R1); + if(nr==4) traits.acc(C2, alphav, R2); + if(nr==4) traits.acc(C3, alphav, R3); + + pstoreu(r0, R0); + pstoreu(r1, R1); + if(nr==4) pstoreu(r2, R2); + if(nr==4) pstoreu(r3, R3); + } + for(Index i=peeled_mc2; i do the same but with nr==1 + for(Index j2=packet_cols; j2(alpha); + + ResScalar* r0 = &res[(j2+0)*resStride + i]; + + R0 = ploadu(r0); + R4 = ploadu(r0+ResPacketSize); + + traits.acc(C0, alphav, R0); + traits.acc(C4, alphav, R4); + + pstoreu(r0, R0); + pstoreu(r0+ResPacketSize, R4); + } + if(rows-peeled_mc>=LhsProgress) + { + Index i = peeled_mc; + const LhsScalar* blA = &blockA[i*strideA+offsetA*LhsProgress]; + prefetch(&blA[0]); + + AccPacket C0; + traits.initAcc(C0); + + const RhsScalar* blB = unpackedB; + for(Index k=0; k(alpha); + ResPacket R0 = ploadu(&res[(j2+0)*resStride + i]); + traits.acc(C0, alphav, R0); + pstoreu(&res[(j2+0)*resStride + i], R0); + } + for(Index i=peeled_mc2; i +struct gemm_pack_lhs +{ + EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride=0, Index offset=0); +}; + +template +EIGEN_DONT_INLINE void gemm_pack_lhs + ::operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride, Index offset) +{ + typedef typename packet_traits::type Packet; + enum { PacketSize = packet_traits::size }; + + EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS"); + EIGEN_UNUSED_VARIABLE(stride) + EIGEN_UNUSED_VARIABLE(offset) + eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); + eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) ); + conj_if::IsComplex && Conjugate> cj; + const_blas_data_mapper lhs(_lhs,lhsStride); + Index count = 0; + Index peeled_mc = (rows/Pack1)*Pack1; + for(Index i=0; i=1*PacketSize) A = ploadu(&lhs(i+0*PacketSize, k)); + if(Pack1>=2*PacketSize) B = ploadu(&lhs(i+1*PacketSize, k)); + if(Pack1>=3*PacketSize) C = ploadu(&lhs(i+2*PacketSize, k)); + if(Pack1>=4*PacketSize) D = ploadu(&lhs(i+3*PacketSize, k)); + if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; } + if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; } + if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; } + if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; } + } + } + else + { + for(Index k=0; k=Pack2) + { + if(PanelMode) count += Pack2*offset; + for(Index k=0; k +struct gemm_pack_rhs +{ + typedef typename packet_traits::type Packet; + enum { PacketSize = packet_traits::size }; + EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0); +}; + +template +EIGEN_DONT_INLINE void gemm_pack_rhs + ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset) +{ + EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR"); + EIGEN_UNUSED_VARIABLE(stride) + EIGEN_UNUSED_VARIABLE(offset) + eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); + conj_if::IsComplex && Conjugate> cj; + Index packet_cols = (cols/nr) * nr; + Index count = 0; + for(Index j2=0; j2 +struct gemm_pack_rhs +{ + enum { PacketSize = packet_traits::size }; + EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0); +}; + +template +EIGEN_DONT_INLINE void gemm_pack_rhs + ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset) +{ + EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR"); + EIGEN_UNUSED_VARIABLE(stride) + EIGEN_UNUSED_VARIABLE(offset) + eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); + conj_if::IsComplex && Conjugate> cj; + Index packet_cols = (cols/nr) * nr; + Index count = 0; + for(Index j2=0; j2 +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GENERAL_MATRIX_MATRIX_H +#define EIGEN_GENERAL_MATRIX_MATRIX_H + +namespace Eigen { + +namespace internal { + +template class level3_blocking; + +/* Specialization for a row-major destination matrix => simple transposition of the product */ +template< + typename Index, + typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs> +struct general_matrix_matrix_product +{ + typedef typename scalar_product_traits::ReturnType ResScalar; + static EIGEN_STRONG_INLINE void run( + Index rows, Index cols, Index depth, + const LhsScalar* lhs, Index lhsStride, + const RhsScalar* rhs, Index rhsStride, + ResScalar* res, Index resStride, + ResScalar alpha, + level3_blocking& blocking, + GemmParallelInfo* info = 0) + { + // transpose the product such that the result is column major + general_matrix_matrix_product + ::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking,info); + } +}; + +/* Specialization for a col-major destination matrix + * => Blocking algorithm following Goto's paper */ +template< + typename Index, + typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs> +struct general_matrix_matrix_product +{ + +typedef typename scalar_product_traits::ReturnType ResScalar; +static void run(Index rows, Index cols, Index depth, + const LhsScalar* _lhs, Index lhsStride, + const RhsScalar* _rhs, Index rhsStride, + ResScalar* res, Index resStride, + ResScalar alpha, + level3_blocking& blocking, + GemmParallelInfo* info = 0) +{ + const_blas_data_mapper lhs(_lhs,lhsStride); + const_blas_data_mapper rhs(_rhs,rhsStride); + + typedef gebp_traits Traits; + + Index kc = blocking.kc(); // cache block size along the K direction + Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction + //Index nc = blocking.nc(); // cache block size along the N direction + + gemm_pack_lhs pack_lhs; + gemm_pack_rhs pack_rhs; + gebp_kernel gebp; + +#ifdef EIGEN_HAS_OPENMP + if(info) + { + // this is the parallel version! + Index tid = omp_get_thread_num(); + Index threads = omp_get_num_threads(); + + std::size_t sizeA = kc*mc; + std::size_t sizeW = kc*Traits::WorkSpaceFactor; + ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, 0); + ei_declare_aligned_stack_constructed_variable(RhsScalar, w, sizeW, 0); + + RhsScalar* blockB = blocking.blockB(); + eigen_internal_assert(blockB!=0); + + // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs... + for(Index k=0; k rows of B', and cols of the A' + + // In order to reduce the chance that a thread has to wait for the other, + // let's start by packing A'. + pack_lhs(blockA, &lhs(0,k), lhsStride, actual_kc, mc); + + // Pack B_k to B' in a parallel fashion: + // each thread packs the sub block B_k,j to B'_j where j is the thread id. + + // However, before copying to B'_j, we have to make sure that no other thread is still using it, + // i.e., we test that info[tid].users equals 0. + // Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it. + while(info[tid].users!=0) {} + info[tid].users += threads; + + pack_rhs(blockB+info[tid].rhs_start*actual_kc, &rhs(k,info[tid].rhs_start), rhsStride, actual_kc, info[tid].rhs_length); + + // Notify the other threads that the part B'_j is ready to go. + info[tid].sync = k; + + // Computes C_i += A' * B' per B'_j + for(Index shift=0; shift0) + while(info[j].sync!=k) {} + + gebp(res+info[j].rhs_start*resStride, resStride, blockA, blockB+info[j].rhs_start*actual_kc, mc, actual_kc, info[j].rhs_length, alpha, -1,-1,0,0, w); + } + + // Then keep going as usual with the remaining A' + for(Index i=mc; i Pack rhs's panel into a sequential chunk of memory (L2 caching) + // Note that this panel will be read as many times as the number of blocks in the lhs's + // vertical panel which is, in practice, a very low number. + pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols); + + // For each mc x kc block of the lhs's vertical panel... + // (==GEPP_VAR1) + for(Index i2=0; i2 for "large" GEMM, i.e., +* implementation of the high level wrapper to general_matrix_matrix_product +**********************************************************************************/ + +template +struct traits > + : traits, Lhs, Rhs> > +{}; + +template +struct gemm_functor +{ + gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha, + BlockingType& blocking) + : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking) + {} + + void initParallelSession() const + { + m_blocking.allocateB(); + } + + void operator() (Index row, Index rows, Index col=0, Index cols=-1, GemmParallelInfo* info=0) const + { + if(cols==-1) + cols = m_rhs.cols(); + + Gemm::run(rows, cols, m_lhs.cols(), + /*(const Scalar*)*/&m_lhs.coeffRef(row,0), m_lhs.outerStride(), + /*(const Scalar*)*/&m_rhs.coeffRef(0,col), m_rhs.outerStride(), + (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(), + m_actualAlpha, m_blocking, info); + } + + protected: + const Lhs& m_lhs; + const Rhs& m_rhs; + Dest& m_dest; + Scalar m_actualAlpha; + BlockingType& m_blocking; +}; + +template class gemm_blocking_space; + +template +class level3_blocking +{ + typedef _LhsScalar LhsScalar; + typedef _RhsScalar RhsScalar; + + protected: + LhsScalar* m_blockA; + RhsScalar* m_blockB; + RhsScalar* m_blockW; + + DenseIndex m_mc; + DenseIndex m_nc; + DenseIndex m_kc; + + public: + + level3_blocking() + : m_blockA(0), m_blockB(0), m_blockW(0), m_mc(0), m_nc(0), m_kc(0) + {} + + inline DenseIndex mc() const { return m_mc; } + inline DenseIndex nc() const { return m_nc; } + inline DenseIndex kc() const { return m_kc; } + + inline LhsScalar* blockA() { return m_blockA; } + inline RhsScalar* blockB() { return m_blockB; } + inline RhsScalar* blockW() { return m_blockW; } +}; + +template +class gemm_blocking_space + : public level3_blocking< + typename conditional::type, + typename conditional::type> +{ + enum { + Transpose = StorageOrder==RowMajor, + ActualRows = Transpose ? MaxCols : MaxRows, + ActualCols = Transpose ? MaxRows : MaxCols + }; + typedef typename conditional::type LhsScalar; + typedef typename conditional::type RhsScalar; + typedef gebp_traits Traits; + enum { + SizeA = ActualRows * MaxDepth, + SizeB = ActualCols * MaxDepth, + SizeW = MaxDepth * Traits::WorkSpaceFactor + }; + + EIGEN_ALIGN16 LhsScalar m_staticA[SizeA]; + EIGEN_ALIGN16 RhsScalar m_staticB[SizeB]; + EIGEN_ALIGN16 RhsScalar m_staticW[SizeW]; + + public: + + gemm_blocking_space(DenseIndex /*rows*/, DenseIndex /*cols*/, DenseIndex /*depth*/) + { + this->m_mc = ActualRows; + this->m_nc = ActualCols; + this->m_kc = MaxDepth; + this->m_blockA = m_staticA; + this->m_blockB = m_staticB; + this->m_blockW = m_staticW; + } + + inline void allocateA() {} + inline void allocateB() {} + inline void allocateW() {} + inline void allocateAll() {} +}; + +template +class gemm_blocking_space + : public level3_blocking< + typename conditional::type, + typename conditional::type> +{ + enum { + Transpose = StorageOrder==RowMajor + }; + typedef typename conditional::type LhsScalar; + typedef typename conditional::type RhsScalar; + typedef gebp_traits Traits; + + DenseIndex m_sizeA; + DenseIndex m_sizeB; + DenseIndex m_sizeW; + + public: + + gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth) + { + this->m_mc = Transpose ? cols : rows; + this->m_nc = Transpose ? rows : cols; + this->m_kc = depth; + + computeProductBlockingSizes(this->m_kc, this->m_mc, this->m_nc); + m_sizeA = this->m_mc * this->m_kc; + m_sizeB = this->m_kc * this->m_nc; + m_sizeW = this->m_kc*Traits::WorkSpaceFactor; + } + + void allocateA() + { + if(this->m_blockA==0) + this->m_blockA = aligned_new(m_sizeA); + } + + void allocateB() + { + if(this->m_blockB==0) + this->m_blockB = aligned_new(m_sizeB); + } + + void allocateW() + { + if(this->m_blockW==0) + this->m_blockW = aligned_new(m_sizeW); + } + + void allocateAll() + { + allocateA(); + allocateB(); + allocateW(); + } + + ~gemm_blocking_space() + { + aligned_delete(this->m_blockA, m_sizeA); + aligned_delete(this->m_blockB, m_sizeB); + aligned_delete(this->m_blockW, m_sizeW); + } +}; + +} // end namespace internal + +template +class GeneralProduct + : public ProductBase, Lhs, Rhs> +{ + enum { + MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime) + }; + public: + EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) + + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef Scalar ResScalar; + + GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) + { +#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG)) + typedef internal::scalar_product_op BinOp; + EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar); +#endif + } + + template void scaleAndAddTo(Dest& dst, const Scalar& alpha) const + { + eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + if(m_lhs.cols()==0 || m_lhs.rows()==0 || m_rhs.cols()==0) + return; + + typename internal::add_const_on_value_type::type lhs = LhsBlasTraits::extract(m_lhs); + typename internal::add_const_on_value_type::type rhs = RhsBlasTraits::extract(m_rhs); + + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) + * RhsBlasTraits::extractScalarFactor(m_rhs); + + typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar, + Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType; + + typedef internal::gemm_functor< + Scalar, Index, + internal::general_matrix_matrix_product< + Index, + LhsScalar, (_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate), + RhsScalar, (_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate), + (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>, + _ActualLhsType, _ActualRhsType, Dest, BlockingType> GemmFunctor; + + BlockingType blocking(dst.rows(), dst.cols(), lhs.cols()); + + internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), this->rows(), this->cols(), Dest::Flags&RowMajorBit); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_GENERAL_MATRIX_MATRIX_H diff --git a/tools/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/tools/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h new file mode 100644 index 0000000..5c37639 --- /dev/null +++ b/tools/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -0,0 +1,278 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H +#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H + +namespace Eigen { + +template +struct selfadjoint_rank1_update; + +namespace internal { + +/********************************************************************** +* This file implements a general A * B product while +* evaluating only one triangular part of the product. +* This is more general version of self adjoint product (C += A A^T) +* as the level 3 SYRK Blas routine. +**********************************************************************/ + +// forward declarations (defined at the end of this file) +template +struct tribb_kernel; + +/* Optimized matrix-matrix product evaluating only one triangular half */ +template +struct general_matrix_matrix_triangular_product; + +// as usual if the result is row major => we transpose the product +template +struct general_matrix_matrix_triangular_product +{ + typedef typename scalar_product_traits::ReturnType ResScalar; + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, + const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha) + { + general_matrix_matrix_triangular_product + ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha); + } +}; + +template +struct general_matrix_matrix_triangular_product +{ + typedef typename scalar_product_traits::ReturnType ResScalar; + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, + const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha) + { + const_blas_data_mapper lhs(_lhs,lhsStride); + const_blas_data_mapper rhs(_rhs,rhsStride); + + typedef gebp_traits Traits; + + Index kc = depth; // cache block size along the K direction + Index mc = size; // cache block size along the M direction + Index nc = size; // cache block size along the N direction + computeProductBlockingSizes(kc, mc, nc); + // !!! mc must be a multiple of nr: + if(mc > Traits::nr) + mc = (mc/Traits::nr)*Traits::nr; + + std::size_t sizeW = kc*Traits::WorkSpaceFactor; + std::size_t sizeB = sizeW + kc*size; + ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0); + ei_declare_aligned_stack_constructed_variable(RhsScalar, allocatedBlockB, sizeB, 0); + RhsScalar* blockB = allocatedBlockB + sizeW; + + gemm_pack_lhs pack_lhs; + gemm_pack_rhs pack_rhs; + gebp_kernel gebp; + tribb_kernel sybb; + + for(Index k2=0; k2 processed with gebp or skipped + // 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel + // 3 - after the diagonal => processed with gebp or skipped + if (UpLo==Lower) + gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha, + -1, -1, 0, 0, allocatedBlockB); + + sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB); + + if (UpLo==Upper) + { + Index j2 = i2+actual_mc; + gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, (std::max)(Index(0), size-j2), alpha, + -1, -1, 0, 0, allocatedBlockB); + } + } + } + } +}; + +// Optimized packed Block * packed Block product kernel evaluating only one given triangular part +// This kernel is built on top of the gebp kernel: +// - the current destination block is processed per panel of actual_mc x BlockSize +// where BlockSize is set to the minimal value allowing gebp to be as fast as possible +// - then, as usual, each panel is split into three parts along the diagonal, +// the sub blocks above and below the diagonal are processed as usual, +// while the triangular block overlapping the diagonal is evaluated into a +// small temporary buffer which is then accumulated into the result using a +// triangular traversal. +template +struct tribb_kernel +{ + typedef gebp_traits Traits; + typedef typename Traits::ResScalar ResScalar; + + enum { + BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr) + }; + void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha, RhsScalar* workspace) + { + gebp_kernel gebp_kernel; + Matrix buffer; + + // let's process the block per panel of actual_mc x BlockSize, + // again, each is split into three parts, etc. + for (Index j=0; j(BlockSize,size - j); + const RhsScalar* actual_b = blockB+j*depth; + + if(UpLo==Upper) + gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha, + -1, -1, 0, 0, workspace); + + // selfadjoint micro block + { + Index i = j; + buffer.setZero(); + // 1 - apply the kernel on the temporary buffer + gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha, + -1, -1, 0, 0, workspace); + // 2 - triangular accumulation + for(Index j1=0; j1 +struct general_product_to_triangular_selector; + + +template +struct general_product_to_triangular_selector +{ + static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha) + { + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::Index Index; + + typedef typename internal::remove_all::type Lhs; + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; + typedef typename internal::remove_all::type _ActualLhs; + typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(prod.lhs()); + + typedef typename internal::remove_all::type Rhs; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; + typedef typename internal::remove_all::type _ActualRhs; + typename internal::add_const_on_value_type::type actualRhs = RhsBlasTraits::extract(prod.rhs()); + + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); + + enum { + StorageOrder = (internal::traits::Flags&RowMajorBit) ? RowMajor : ColMajor, + UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1, + UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1 + }; + + internal::gemv_static_vector_if static_lhs; + ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(), + (UseLhsDirectly ? const_cast(actualLhs.data()) : static_lhs.data())); + if(!UseLhsDirectly) Map(actualLhsPtr, actualLhs.size()) = actualLhs; + + internal::gemv_static_vector_if static_rhs; + ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(), + (UseRhsDirectly ? const_cast(actualRhs.data()) : static_rhs.data())); + if(!UseRhsDirectly) Map(actualRhsPtr, actualRhs.size()) = actualRhs; + + + selfadjoint_rank1_update::IsComplex, + RhsBlasTraits::NeedToConjugate && NumTraits::IsComplex> + ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha); + } +}; + +template +struct general_product_to_triangular_selector +{ + static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha) + { + typedef typename MatrixType::Index Index; + + typedef typename internal::remove_all::type Lhs; + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; + typedef typename internal::remove_all::type _ActualLhs; + typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(prod.lhs()); + + typedef typename internal::remove_all::type Rhs; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; + typedef typename internal::remove_all::type _ActualRhs; + typename internal::add_const_on_value_type::type actualRhs = RhsBlasTraits::extract(prod.rhs()); + + typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); + + internal::general_matrix_matrix_triangular_product + ::run(mat.cols(), actualLhs.cols(), + &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), + mat.data(), mat.outerStride(), actualAlpha); + } +}; + +template +template +TriangularView& TriangularView::assignProduct(const ProductBase& prod, const Scalar& alpha) +{ + general_product_to_triangular_selector::run(m_matrix.const_cast_derived(), prod.derived(), alpha); + + return *this; +} + +} // end namespace Eigen + +#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H diff --git a/tools/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h b/tools/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h new file mode 100644 index 0000000..3deed06 --- /dev/null +++ b/tools/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h @@ -0,0 +1,146 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Level 3 BLAS SYRK/HERK implementation. + ******************************************************************************** +*/ + +#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H +#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H + +namespace Eigen { + +namespace internal { + +template +struct general_matrix_matrix_rankupdate : + general_matrix_matrix_triangular_product< + Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {}; + + +// try to go to BLAS specialization +#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \ +template \ +struct general_matrix_matrix_triangular_product { \ + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \ + const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \ + { \ + if (lhs==rhs) { \ + general_matrix_matrix_rankupdate \ + ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ + } else { \ + general_matrix_matrix_triangular_product \ + ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ + } \ + } \ +}; + +EIGEN_MKL_RANKUPDATE_SPECIALIZE(double) +//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex) +EIGEN_MKL_RANKUPDATE_SPECIALIZE(float) +//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex) + +// SYRK for float/double +#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \ +template \ +struct general_matrix_matrix_rankupdate { \ + enum { \ + IsLower = (UpLo&Lower) == Lower, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \ + }; \ + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ + const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ + { \ + /* typedef Matrix MatrixRhs;*/ \ +\ + MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ + char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \ + MKLTYPE alpha_, beta_; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, EIGTYPE(1)); \ + MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \ + } \ +}; + +// HERK for complex data +#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \ +template \ +struct general_matrix_matrix_rankupdate { \ + enum { \ + IsLower = (UpLo&Lower) == Lower, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \ + }; \ + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ + const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ + { \ + typedef Matrix MatrixType; \ +\ + MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ + char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \ + RTYPE alpha_, beta_; \ + const EIGTYPE* a_ptr; \ +\ +/* Set alpha_ & beta_ */ \ +/* assign_scalar_eig2mkl(alpha_, alpha); */\ +/* assign_scalar_eig2mkl(beta_, EIGTYPE(1));*/ \ + alpha_ = alpha.real(); \ + beta_ = 1.0; \ +/* Copy with conjugation in some cases*/ \ + MatrixType a; \ + if (conjA) { \ + Map > mapA(lhs,n,k,OuterStride<>(lhsStride)); \ + a = mapA.conjugate(); \ + lda = a.outerStride(); \ + a_ptr = a.data(); \ + } else a_ptr=lhs; \ + MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \ + } \ +}; + + +EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk) +EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk) + +//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk) +//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk) + + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H diff --git a/tools/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h b/tools/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h new file mode 100644 index 0000000..060af32 --- /dev/null +++ b/tools/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h @@ -0,0 +1,118 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * General matrix-matrix product functionality based on ?GEMM. + ******************************************************************************** +*/ + +#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H +#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H + +namespace Eigen { + +namespace internal { + +/********************************************************************** +* This file implements general matrix-matrix multiplication using BLAS +* gemm function via partial specialization of +* general_matrix_matrix_product::run(..) method for float, double, +* std::complex and std::complex types +**********************************************************************/ + +// gemm specialization + +#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \ +template< \ + typename Index, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct general_matrix_matrix_product \ +{ \ +static void run(Index rows, Index cols, Index depth, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha, \ + level3_blocking& /*blocking*/, \ + GemmParallelInfo* /*info = 0*/) \ +{ \ + using std::conj; \ +\ + char transa, transb; \ + MKL_INT m, n, k, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX a_tmp, b_tmp; \ + EIGTYPE myone(1);\ +\ +/* Set transpose options */ \ + transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ + transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ +\ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ + k = (MKL_INT)depth; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)lhsStride; \ + ldb = (MKL_INT)rhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \ + Map > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \ + a_tmp = lhs.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else a = _lhs; \ +\ + if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \ + Map > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \ + b_tmp = rhs.conjugate(); \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } else b = _rhs; \ +\ + MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +}}; + +GEMM_SPECIALIZATION(double, d, double, d) +GEMM_SPECIALIZATION(float, f, float, s) +GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z) +GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c) + +} // end namespase internal + +} // end namespace Eigen + +#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H diff --git a/tools/Eigen/src/Core/products/GeneralMatrixVector.h b/tools/Eigen/src/Core/products/GeneralMatrixVector.h new file mode 100644 index 0000000..0938770 --- /dev/null +++ b/tools/Eigen/src/Core/products/GeneralMatrixVector.h @@ -0,0 +1,566 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GENERAL_MATRIX_VECTOR_H +#define EIGEN_GENERAL_MATRIX_VECTOR_H + +namespace Eigen { + +namespace internal { + +/* Optimized col-major matrix * vector product: + * This algorithm processes 4 columns at onces that allows to both reduce + * the number of load/stores of the result by a factor 4 and to reduce + * the instruction dependency. Moreover, we know that all bands have the + * same alignment pattern. + * + * Mixing type logic: C += alpha * A * B + * | A | B |alpha| comments + * |real |cplx |cplx | no vectorization + * |real |cplx |real | alpha is converted to a cplx when calling the run function, no vectorization + * |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp + * |cplx |real |real | optimal case, vectorization possible via real-cplx mul + */ +template +struct general_matrix_vector_product +{ +typedef typename scalar_product_traits::ReturnType ResScalar; + +enum { + Vectorizable = packet_traits::Vectorizable && packet_traits::Vectorizable + && int(packet_traits::size)==int(packet_traits::size), + LhsPacketSize = Vectorizable ? packet_traits::size : 1, + RhsPacketSize = Vectorizable ? packet_traits::size : 1, + ResPacketSize = Vectorizable ? packet_traits::size : 1 +}; + +typedef typename packet_traits::type _LhsPacket; +typedef typename packet_traits::type _RhsPacket; +typedef typename packet_traits::type _ResPacket; + +typedef typename conditional::type LhsPacket; +typedef typename conditional::type RhsPacket; +typedef typename conditional::type ResPacket; + +EIGEN_DONT_INLINE static void run( + Index rows, Index cols, + const LhsScalar* lhs, Index lhsStride, + const RhsScalar* rhs, Index rhsIncr, + ResScalar* res, Index resIncr, RhsScalar alpha); +}; + +template +EIGEN_DONT_INLINE void general_matrix_vector_product::run( + Index rows, Index cols, + const LhsScalar* lhs, Index lhsStride, + const RhsScalar* rhs, Index rhsIncr, + ResScalar* res, Index resIncr, RhsScalar alpha) +{ + EIGEN_UNUSED_VARIABLE(resIncr) + eigen_internal_assert(resIncr==1); + #ifdef _EIGEN_ACCUMULATE_PACKETS + #error _EIGEN_ACCUMULATE_PACKETS has already been defined + #endif + #define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) \ + pstore(&res[j], \ + padd(pload(&res[j]), \ + padd( \ + padd(pcj.pmul(EIGEN_CAT(ploa , A0)(&lhs0[j]), ptmp0), \ + pcj.pmul(EIGEN_CAT(ploa , A13)(&lhs1[j]), ptmp1)), \ + padd(pcj.pmul(EIGEN_CAT(ploa , A2)(&lhs2[j]), ptmp2), \ + pcj.pmul(EIGEN_CAT(ploa , A13)(&lhs3[j]), ptmp3)) ))) + + conj_helper cj; + conj_helper pcj; + if(ConjugateRhs) + alpha = numext::conj(alpha); + + enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned }; + const Index columnsAtOnce = 4; + const Index peels = 2; + const Index LhsPacketAlignedMask = LhsPacketSize-1; + const Index ResPacketAlignedMask = ResPacketSize-1; +// const Index PeelAlignedMask = ResPacketSize*peels-1; + const Index size = rows; + + // How many coeffs of the result do we have to skip to be aligned. + // Here we assume data are at least aligned on the base scalar type. + Index alignedStart = internal::first_aligned(res,size); + Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0; + const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1; + + const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0; + Index alignmentPattern = alignmentStep==0 ? AllAligned + : alignmentStep==(LhsPacketSize/2) ? EvenAligned + : FirstAligned; + + // we cannot assume the first element is aligned because of sub-matrices + const Index lhsAlignmentOffset = internal::first_aligned(lhs,size); + + // find how many columns do we have to skip to be aligned with the result (if possible) + Index skipColumns = 0; + // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats) + if( (size_t(lhs)%sizeof(LhsScalar)) || (size_t(res)%sizeof(ResScalar)) ) + { + alignedSize = 0; + alignedStart = 0; + } + else if (LhsPacketSize>1) + { + eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size= cols) + || LhsPacketSize > size + || (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0); + } + else if(Vectorizable) + { + alignedStart = 0; + alignedSize = size; + alignmentPattern = AllAligned; + } + + Index offset1 = (FirstAligned && alignmentStep==1?3:1); + Index offset3 = (FirstAligned && alignmentStep==1?1:3); + + Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns; + for (Index i=skipColumns; i(alpha*rhs[i*rhsIncr]), + ptmp1 = pset1(alpha*rhs[(i+offset1)*rhsIncr]), + ptmp2 = pset1(alpha*rhs[(i+2)*rhsIncr]), + ptmp3 = pset1(alpha*rhs[(i+offset3)*rhsIncr]); + + // this helps a lot generating better binary code + const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride, + *lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride; + + if (Vectorizable) + { + /* explicit vectorization */ + // process initial unaligned coeffs + for (Index j=0; jalignedStart) + { + switch(alignmentPattern) + { + case AllAligned: + for (Index j = alignedStart; j1) + { + LhsPacket A00, A01, A02, A03, A10, A11, A12, A13; + ResPacket T0, T1; + + A01 = pload(&lhs1[alignedStart-1]); + A02 = pload(&lhs2[alignedStart-2]); + A03 = pload(&lhs3[alignedStart-3]); + + for (; j(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11); + A12 = pload(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12); + A13 = pload(&lhs3[j-3+LhsPacketSize]); palign<3>(A03,A13); + + A00 = pload(&lhs0[j]); + A10 = pload(&lhs0[j+LhsPacketSize]); + T0 = pcj.pmadd(A00, ptmp0, pload(&res[j])); + T1 = pcj.pmadd(A10, ptmp0, pload(&res[j+ResPacketSize])); + + T0 = pcj.pmadd(A01, ptmp1, T0); + A01 = pload(&lhs1[j-1+2*LhsPacketSize]); palign<1>(A11,A01); + T0 = pcj.pmadd(A02, ptmp2, T0); + A02 = pload(&lhs2[j-2+2*LhsPacketSize]); palign<2>(A12,A02); + T0 = pcj.pmadd(A03, ptmp3, T0); + pstore(&res[j],T0); + A03 = pload(&lhs3[j-3+2*LhsPacketSize]); palign<3>(A13,A03); + T1 = pcj.pmadd(A11, ptmp1, T1); + T1 = pcj.pmadd(A12, ptmp2, T1); + T1 = pcj.pmadd(A13, ptmp3, T1); + pstore(&res[j+ResPacketSize],T1); + } + } + for (; j(alpha*rhs[k*rhsIncr]); + const LhsScalar* lhs0 = lhs + k*lhsStride; + + if (Vectorizable) + { + /* explicit vectorization */ + // process first unaligned result's coeffs + for (Index j=0; j(&lhs0[i]), ptmp0, pload(&res[i]))); + else + for (Index i = alignedStart;i(&lhs0[i]), ptmp0, pload(&res[i]))); + } + + // process remaining scalars (or all if no explicit vectorization) + for (Index i=alignedSize; i +struct general_matrix_vector_product +{ +typedef typename scalar_product_traits::ReturnType ResScalar; + +enum { + Vectorizable = packet_traits::Vectorizable && packet_traits::Vectorizable + && int(packet_traits::size)==int(packet_traits::size), + LhsPacketSize = Vectorizable ? packet_traits::size : 1, + RhsPacketSize = Vectorizable ? packet_traits::size : 1, + ResPacketSize = Vectorizable ? packet_traits::size : 1 +}; + +typedef typename packet_traits::type _LhsPacket; +typedef typename packet_traits::type _RhsPacket; +typedef typename packet_traits::type _ResPacket; + +typedef typename conditional::type LhsPacket; +typedef typename conditional::type RhsPacket; +typedef typename conditional::type ResPacket; + +EIGEN_DONT_INLINE static void run( + Index rows, Index cols, + const LhsScalar* lhs, Index lhsStride, + const RhsScalar* rhs, Index rhsIncr, + ResScalar* res, Index resIncr, + ResScalar alpha); +}; + +template +EIGEN_DONT_INLINE void general_matrix_vector_product::run( + Index rows, Index cols, + const LhsScalar* lhs, Index lhsStride, + const RhsScalar* rhs, Index rhsIncr, + ResScalar* res, Index resIncr, + ResScalar alpha) +{ + EIGEN_UNUSED_VARIABLE(rhsIncr); + eigen_internal_assert(rhsIncr==1); + #ifdef _EIGEN_ACCUMULATE_PACKETS + #error _EIGEN_ACCUMULATE_PACKETS has already been defined + #endif + + #define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2) {\ + RhsPacket b = pload(&rhs[j]); \ + ptmp0 = pcj.pmadd(EIGEN_CAT(ploa,A0) (&lhs0[j]), b, ptmp0); \ + ptmp1 = pcj.pmadd(EIGEN_CAT(ploa,A13)(&lhs1[j]), b, ptmp1); \ + ptmp2 = pcj.pmadd(EIGEN_CAT(ploa,A2) (&lhs2[j]), b, ptmp2); \ + ptmp3 = pcj.pmadd(EIGEN_CAT(ploa,A13)(&lhs3[j]), b, ptmp3); } + + conj_helper cj; + conj_helper pcj; + + enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 }; + const Index rowsAtOnce = 4; + const Index peels = 2; + const Index RhsPacketAlignedMask = RhsPacketSize-1; + const Index LhsPacketAlignedMask = LhsPacketSize-1; +// const Index PeelAlignedMask = RhsPacketSize*peels-1; + const Index depth = cols; + + // How many coeffs of the result do we have to skip to be aligned. + // Here we assume data are at least aligned on the base scalar type + // if that's not the case then vectorization is discarded, see below. + Index alignedStart = internal::first_aligned(rhs, depth); + Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0; + const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1; + + const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0; + Index alignmentPattern = alignmentStep==0 ? AllAligned + : alignmentStep==(LhsPacketSize/2) ? EvenAligned + : FirstAligned; + + // we cannot assume the first element is aligned because of sub-matrices + const Index lhsAlignmentOffset = internal::first_aligned(lhs,depth); + + // find how many rows do we have to skip to be aligned with rhs (if possible) + Index skipRows = 0; + // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats) + if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) || (size_t(lhs)%sizeof(LhsScalar)) || (size_t(rhs)%sizeof(RhsScalar)) ) + { + alignedSize = 0; + alignedStart = 0; + } + else if (LhsPacketSize>1) + { + eigen_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth= rows) + || LhsPacketSize > depth + || (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(LhsPacket))==0); + } + else if(Vectorizable) + { + alignedStart = 0; + alignedSize = depth; + alignmentPattern = AllAligned; + } + + Index offset1 = (FirstAligned && alignmentStep==1?3:1); + Index offset3 = (FirstAligned && alignmentStep==1?1:3); + + Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; + for (Index i=skipRows; i(ResScalar(0)), ptmp1 = pset1(ResScalar(0)), + ptmp2 = pset1(ResScalar(0)), ptmp3 = pset1(ResScalar(0)); + + // process initial unaligned coeffs + // FIXME this loop get vectorized by the compiler ! + for (Index j=0; jalignedStart) + { + switch(alignmentPattern) + { + case AllAligned: + for (Index j = alignedStart; j1) + { + /* Here we proccess 4 rows with with two peeled iterations to hide + * the overhead of unaligned loads. Moreover unaligned loads are handled + * using special shift/move operations between the two aligned packets + * overlaping the desired unaligned packet. This is *much* more efficient + * than basic unaligned loads. + */ + LhsPacket A01, A02, A03, A11, A12, A13; + A01 = pload(&lhs1[alignedStart-1]); + A02 = pload(&lhs2[alignedStart-2]); + A03 = pload(&lhs3[alignedStart-3]); + + for (; j(&rhs[j]); + A11 = pload(&lhs1[j-1+LhsPacketSize]); palign<1>(A01,A11); + A12 = pload(&lhs2[j-2+LhsPacketSize]); palign<2>(A02,A12); + A13 = pload(&lhs3[j-3+LhsPacketSize]); palign<3>(A03,A13); + + ptmp0 = pcj.pmadd(pload(&lhs0[j]), b, ptmp0); + ptmp1 = pcj.pmadd(A01, b, ptmp1); + A01 = pload(&lhs1[j-1+2*LhsPacketSize]); palign<1>(A11,A01); + ptmp2 = pcj.pmadd(A02, b, ptmp2); + A02 = pload(&lhs2[j-2+2*LhsPacketSize]); palign<2>(A12,A02); + ptmp3 = pcj.pmadd(A03, b, ptmp3); + A03 = pload(&lhs3[j-3+2*LhsPacketSize]); palign<3>(A13,A03); + + b = pload(&rhs[j+RhsPacketSize]); + ptmp0 = pcj.pmadd(pload(&lhs0[j+LhsPacketSize]), b, ptmp0); + ptmp1 = pcj.pmadd(A11, b, ptmp1); + ptmp2 = pcj.pmadd(A12, b, ptmp2); + ptmp3 = pcj.pmadd(A13, b, ptmp3); + } + } + for (; j(tmp0); + const LhsScalar* lhs0 = lhs + i*lhsStride; + // process first unaligned result's coeffs + // FIXME this loop get vectorized by the compiler ! + for (Index j=0; jalignedStart) + { + // process aligned rhs coeffs + if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0) + for (Index j = alignedStart;j(&lhs0[j]), pload(&rhs[j]), ptmp0); + else + for (Index j = alignedStart;j(&lhs0[j]), pload(&rhs[j]), ptmp0); + tmp0 += predux(ptmp0); + } + + // process remaining scalars + // FIXME this loop get vectorized by the compiler ! + for (Index j=alignedSize; j and std::complex types +**********************************************************************/ + +// gemv specialization + +template +struct general_matrix_vector_product_gemv : + general_matrix_vector_product {}; + +#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \ +template \ +struct general_matrix_vector_product { \ +static void run( \ + Index rows, Index cols, \ + const Scalar* lhs, Index lhsStride, \ + const Scalar* rhs, Index rhsIncr, \ + Scalar* res, Index resIncr, Scalar alpha) \ +{ \ + if (ConjugateLhs) { \ + general_matrix_vector_product::run( \ + rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ + } else { \ + general_matrix_vector_product_gemv::run( \ + rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ + } \ +} \ +}; \ +template \ +struct general_matrix_vector_product { \ +static void run( \ + Index rows, Index cols, \ + const Scalar* lhs, Index lhsStride, \ + const Scalar* rhs, Index rhsIncr, \ + Scalar* res, Index resIncr, Scalar alpha) \ +{ \ + general_matrix_vector_product_gemv::run( \ + rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ +} \ +}; \ + +EIGEN_MKL_GEMV_SPECIALIZE(double) +EIGEN_MKL_GEMV_SPECIALIZE(float) +EIGEN_MKL_GEMV_SPECIALIZE(dcomplex) +EIGEN_MKL_GEMV_SPECIALIZE(scomplex) + +#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \ +template \ +struct general_matrix_vector_product_gemv \ +{ \ +typedef Matrix GEMVVector;\ +\ +static void run( \ + Index rows, Index cols, \ + const EIGTYPE* lhs, Index lhsStride, \ + const EIGTYPE* rhs, Index rhsIncr, \ + EIGTYPE* res, Index resIncr, EIGTYPE alpha) \ +{ \ + MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \ + MKLTYPE alpha_, beta_; \ + const EIGTYPE *x_ptr, myone(1); \ + char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \ + if (LhsStorageOrder==RowMajor) { \ + m=cols; \ + n=rows; \ + }\ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ + GEMVVector x_tmp; \ + if (ConjugateRhs) { \ + Map > map_x(rhs,cols,1,InnerStride<>(incx)); \ + x_tmp=map_x.conjugate(); \ + x_ptr=x_tmp.data(); \ + incx=1; \ + } else x_ptr=rhs; \ + MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ +}\ +}; + +EIGEN_MKL_GEMV_SPECIALIZATION(double, double, d) +EIGEN_MKL_GEMV_SPECIALIZATION(float, float, s) +EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z) +EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, c) + +} // end namespase internal + +} // end namespace Eigen + +#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H diff --git a/tools/Eigen/src/Core/products/Parallelizer.h b/tools/Eigen/src/Core/products/Parallelizer.h new file mode 100644 index 0000000..6937ee3 --- /dev/null +++ b/tools/Eigen/src/Core/products/Parallelizer.h @@ -0,0 +1,162 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PARALLELIZER_H +#define EIGEN_PARALLELIZER_H + +namespace Eigen { + +namespace internal { + +/** \internal */ +inline void manage_multi_threading(Action action, int* v) +{ + static EIGEN_UNUSED int m_maxThreads = -1; + + if(action==SetAction) + { + eigen_internal_assert(v!=0); + m_maxThreads = *v; + } + else if(action==GetAction) + { + eigen_internal_assert(v!=0); + #ifdef EIGEN_HAS_OPENMP + if(m_maxThreads>0) + *v = m_maxThreads; + else + *v = omp_get_max_threads(); + #else + *v = 1; + #endif + } + else + { + eigen_internal_assert(false); + } +} + +} + +/** Must be call first when calling Eigen from multiple threads */ +inline void initParallel() +{ + int nbt; + internal::manage_multi_threading(GetAction, &nbt); + std::ptrdiff_t l1, l2; + internal::manage_caching_sizes(GetAction, &l1, &l2); +} + +/** \returns the max number of threads reserved for Eigen + * \sa setNbThreads */ +inline int nbThreads() +{ + int ret; + internal::manage_multi_threading(GetAction, &ret); + return ret; +} + +/** Sets the max number of threads reserved for Eigen + * \sa nbThreads */ +inline void setNbThreads(int v) +{ + internal::manage_multi_threading(SetAction, &v); +} + +namespace internal { + +template struct GemmParallelInfo +{ + GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {} + + int volatile sync; + int volatile users; + + Index rhs_start; + Index rhs_length; +}; + +template +void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose) +{ + // TODO when EIGEN_USE_BLAS is defined, + // we should still enable OMP for other scalar types +#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS) + // FIXME the transpose variable is only needed to properly split + // the matrix product when multithreading is enabled. This is a temporary + // fix to support row-major destination matrices. This whole + // parallelizer mechanism has to be redisigned anyway. + EIGEN_UNUSED_VARIABLE(transpose); + func(0,rows, 0,cols); +#else + + // Dynamically check whether we should enable or disable OpenMP. + // The conditions are: + // - the max number of threads we can create is greater than 1 + // - we are not already in a parallel code + // - the sizes are large enough + + // 1- are we already in a parallel session? + // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp? + if((!Condition) || (omp_get_num_threads()>1)) + return func(0,rows, 0,cols); + + Index size = transpose ? cols : rows; + + // 2- compute the maximal number of threads from the size of the product: + // FIXME this has to be fine tuned + Index max_threads = std::max(1,size / 32); + + // 3 - compute the number of threads we are going to use + Index threads = std::min(nbThreads(), max_threads); + + if(threads==1) + return func(0,rows, 0,cols); + + Eigen::initParallel(); + func.initParallelSession(); + + if(transpose) + std::swap(rows,cols); + + GemmParallelInfo* info = new GemmParallelInfo[threads]; + + #pragma omp parallel num_threads(threads) + { + Index i = omp_get_thread_num(); + // Note that the actual number of threads might be lower than the number of request ones. + Index actual_threads = omp_get_num_threads(); + + Index blockCols = (cols / actual_threads) & ~Index(0x3); + Index blockRows = (rows / actual_threads) & ~Index(0x7); + + Index r0 = i*blockRows; + Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows; + + Index c0 = i*blockCols; + Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols; + + info[i].rhs_start = c0; + info[i].rhs_length = actualBlockCols; + + if(transpose) + func(0, cols, r0, actualBlockRows, info); + else + func(r0, actualBlockRows, 0,cols, info); + } + + delete[] info; +#endif +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PARALLELIZER_H diff --git a/tools/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/tools/Eigen/src/Core/products/SelfadjointMatrixMatrix.h new file mode 100644 index 0000000..99cf9e0 --- /dev/null +++ b/tools/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -0,0 +1,436 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H +#define EIGEN_SELFADJOINT_MATRIX_MATRIX_H + +namespace Eigen { + +namespace internal { + +// pack a selfadjoint block diagonal for use with the gebp_kernel +template +struct symm_pack_lhs +{ + template inline + void pack(Scalar* blockA, const const_blas_data_mapper& lhs, Index cols, Index i, Index& count) + { + // normal copy + for(Index k=0; k lhs(_lhs,lhsStride); + Index count = 0; + Index peeled_mc = (rows/Pack1)*Pack1; + for(Index i=0; i(blockA, lhs, cols, i, count); + } + + if(rows-peeled_mc>=Pack2) + { + pack(blockA, lhs, cols, peeled_mc, count); + peeled_mc += Pack2; + } + + // do the same with mr==1 + for(Index i=peeled_mc; i +struct symm_pack_rhs +{ + enum { PacketSize = packet_traits::size }; + void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2) + { + Index end_k = k2 + rows; + Index count = 0; + const_blas_data_mapper rhs(_rhs,rhsStride); + Index packet_cols = (cols/nr)*nr; + + // first part: normal case + for(Index j2=0; j2 the same with nr==1) + for(Index j2=packet_cols; j2 +struct product_selfadjoint_matrix; + +template +struct product_selfadjoint_matrix +{ + + static EIGEN_STRONG_INLINE void run( + Index rows, Index cols, + const Scalar* lhs, Index lhsStride, + const Scalar* rhs, Index rhsStride, + Scalar* res, Index resStride, + const Scalar& alpha) + { + product_selfadjoint_matrix::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs), + EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor, + LhsSelfAdjoint, NumTraits::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs), + ColMajor> + ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha); + } +}; + +template +struct product_selfadjoint_matrix +{ + + static EIGEN_DONT_INLINE void run( + Index rows, Index cols, + const Scalar* _lhs, Index lhsStride, + const Scalar* _rhs, Index rhsStride, + Scalar* res, Index resStride, + const Scalar& alpha); +}; + +template +EIGEN_DONT_INLINE void product_selfadjoint_matrix::run( + Index rows, Index cols, + const Scalar* _lhs, Index lhsStride, + const Scalar* _rhs, Index rhsStride, + Scalar* res, Index resStride, + const Scalar& alpha) + { + Index size = rows; + + const_blas_data_mapper lhs(_lhs,lhsStride); + const_blas_data_mapper rhs(_rhs,rhsStride); + + typedef gebp_traits Traits; + + Index kc = size; // cache block size along the K direction + Index mc = rows; // cache block size along the M direction + Index nc = cols; // cache block size along the N direction + computeProductBlockingSizes(kc, mc, nc); + // kc must smaller than mc + kc = (std::min)(kc,mc); + + std::size_t sizeW = kc*Traits::WorkSpaceFactor; + std::size_t sizeB = sizeW + kc*cols; + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); + ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); + Scalar* blockB = allocatedBlockB + sizeW; + + gebp_kernel gebp_kernel; + symm_pack_lhs pack_lhs; + gemm_pack_rhs pack_rhs; + gemm_pack_lhs pack_lhs_transposed; + + for(Index k2=0; k2 transposed packed copy + // 2 - the diagonal block => special packed copy + // 3 - the panel below the diagonal block => generic packed copy + for(Index i2=0; i2() + (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); + + gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); + } + } + } + +// matrix * selfadjoint product +template +struct product_selfadjoint_matrix +{ + + static EIGEN_DONT_INLINE void run( + Index rows, Index cols, + const Scalar* _lhs, Index lhsStride, + const Scalar* _rhs, Index rhsStride, + Scalar* res, Index resStride, + const Scalar& alpha); +}; + +template +EIGEN_DONT_INLINE void product_selfadjoint_matrix::run( + Index rows, Index cols, + const Scalar* _lhs, Index lhsStride, + const Scalar* _rhs, Index rhsStride, + Scalar* res, Index resStride, + const Scalar& alpha) + { + Index size = cols; + + const_blas_data_mapper lhs(_lhs,lhsStride); + + typedef gebp_traits Traits; + + Index kc = size; // cache block size along the K direction + Index mc = rows; // cache block size along the M direction + Index nc = cols; // cache block size along the N direction + computeProductBlockingSizes(kc, mc, nc); + std::size_t sizeW = kc*Traits::WorkSpaceFactor; + std::size_t sizeB = sizeW + kc*cols; + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); + ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); + Scalar* blockB = allocatedBlockB + sizeW; + + gebp_kernel gebp_kernel; + gemm_pack_lhs pack_lhs; + symm_pack_rhs pack_rhs; + + for(Index k2=0; k2 GEPP + for(Index i2=0; i2 +struct traits > + : traits, Lhs, Rhs> > +{}; +} + +template +struct SelfadjointProductMatrix + : public ProductBase, Lhs, Rhs > +{ + EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) + + SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} + + enum { + LhsIsUpper = (LhsMode&(Upper|Lower))==Upper, + LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint, + RhsIsUpper = (RhsMode&(Upper|Lower))==Upper, + RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint + }; + + template void scaleAndAddTo(Dest& dst, const Scalar& alpha) const + { + eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + + typename internal::add_const_on_value_type::type lhs = LhsBlasTraits::extract(m_lhs); + typename internal::add_const_on_value_type::type rhs = RhsBlasTraits::extract(m_rhs); + + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) + * RhsBlasTraits::extractScalarFactor(m_rhs); + + internal::product_selfadjoint_matrix::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, + NumTraits::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), + EIGEN_LOGICAL_XOR(RhsIsUpper, + internal::traits::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint, + NumTraits::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)), + internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor> + ::run( + lhs.rows(), rhs.cols(), // sizes + &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info + &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info + &dst.coeffRef(0,0), dst.outerStride(), // result info + actualAlpha // alpha + ); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H diff --git a/tools/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/tools/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h new file mode 100644 index 0000000..dfa687f --- /dev/null +++ b/tools/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h @@ -0,0 +1,295 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM. + ******************************************************************************** +*/ + +#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H +#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H + +namespace Eigen { + +namespace internal { + + +/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */ + +#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template \ +struct product_selfadjoint_matrix \ +{\ +\ + static void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='L', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + EIGTYPE myone(1);\ +\ +/* Set transpose options */ \ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)lhsStride; \ + ldb = (MKL_INT)rhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (LhsStorageOrder==RowMajor) uplo='U'; \ + a = _lhs; \ +\ + if (RhsStorageOrder==RowMajor) { \ + Map > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = rhs.adjoint(); \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } else b = _rhs; \ +\ + MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +\ + } \ +}; + + +#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template \ +struct product_selfadjoint_matrix \ +{\ + static void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='L', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + Matrix a_tmp; \ + EIGTYPE myone(1); \ +\ +/* Set transpose options */ \ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)lhsStride; \ + ldb = (MKL_INT)rhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \ + Map, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \ + a_tmp = lhs.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else a = _lhs; \ + if (LhsStorageOrder==RowMajor) uplo='U'; \ +\ + if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \ + b = _rhs; } \ + else { \ + if (RhsStorageOrder==ColMajor && ConjugateRhs) { \ + Map > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \ + b_tmp = rhs.conjugate(); \ + } else \ + if (ConjugateRhs) { \ + Map > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = rhs.adjoint(); \ + } else { \ + Map > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = rhs.transpose(); \ + } \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } \ +\ + MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +\ + } \ +}; + +EIGEN_MKL_SYMM_L(double, double, d, d) +EIGEN_MKL_SYMM_L(float, float, f, s) +EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c) + + +/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */ + +#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template \ +struct product_selfadjoint_matrix \ +{\ +\ + static void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='R', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + EIGTYPE myone(1);\ +\ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)rhsStride; \ + ldb = (MKL_INT)lhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (RhsStorageOrder==RowMajor) uplo='U'; \ + a = _rhs; \ +\ + if (LhsStorageOrder==RowMajor) { \ + Map > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = lhs.adjoint(); \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } else b = _lhs; \ +\ + MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +\ + } \ +}; + + +#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template \ +struct product_selfadjoint_matrix \ +{\ + static void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='R', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + Matrix a_tmp; \ + EIGTYPE myone(1); \ +\ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)rhsStride; \ + ldb = (MKL_INT)lhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \ + Map, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \ + a_tmp = rhs.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else a = _rhs; \ + if (RhsStorageOrder==RowMajor) uplo='U'; \ +\ + if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \ + b = _lhs; } \ + else { \ + if (LhsStorageOrder==ColMajor && ConjugateLhs) { \ + Map > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \ + b_tmp = lhs.conjugate(); \ + } else \ + if (ConjugateLhs) { \ + Map > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \ + b_tmp = lhs.adjoint(); \ + } else { \ + Map > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \ + b_tmp = lhs.transpose(); \ + } \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } \ +\ + MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ + } \ +}; + +EIGEN_MKL_SYMM_R(double, double, d, d) +EIGEN_MKL_SYMM_R(float, float, f, s) +EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c) + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H diff --git a/tools/Eigen/src/Core/products/SelfadjointMatrixVector.h b/tools/Eigen/src/Core/products/SelfadjointMatrixVector.h new file mode 100644 index 0000000..f698f67 --- /dev/null +++ b/tools/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -0,0 +1,281 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H +#define EIGEN_SELFADJOINT_MATRIX_VECTOR_H + +namespace Eigen { + +namespace internal { + +/* Optimized selfadjoint matrix * vector product: + * This algorithm processes 2 columns at onces that allows to both reduce + * the number of load/stores of the result by a factor 2 and to reduce + * the instruction dependency. + */ + +template +struct selfadjoint_matrix_vector_product; + +template +struct selfadjoint_matrix_vector_product + +{ +static EIGEN_DONT_INLINE void run( + Index size, + const Scalar* lhs, Index lhsStride, + const Scalar* _rhs, Index rhsIncr, + Scalar* res, + Scalar alpha); +}; + +template +EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product::run( + Index size, + const Scalar* lhs, Index lhsStride, + const Scalar* _rhs, Index rhsIncr, + Scalar* res, + Scalar alpha) +{ + typedef typename packet_traits::type Packet; + const Index PacketSize = sizeof(Packet)/sizeof(Scalar); + + enum { + IsRowMajor = StorageOrder==RowMajor ? 1 : 0, + IsLower = UpLo == Lower ? 1 : 0, + FirstTriangular = IsRowMajor == IsLower + }; + + conj_helper::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0; + conj_helper::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1; + conj_helper::IsComplex, ConjugateRhs> cjd; + + conj_helper::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0; + conj_helper::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1; + + Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha; + + // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed. + // if the rhs is not sequentially stored in memory we copy it to a temporary buffer, + // this is because we need to extract packets + ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast(_rhs) : 0); + if (rhsIncr!=1) + { + const Scalar* it = _rhs; + for (Index i=0; i(t0); + Scalar t1 = cjAlpha * rhs[j+1]; + Packet ptmp1 = pset1(t1); + + Scalar t2(0); + Packet ptmp2 = pset1(t2); + Scalar t3(0); + Packet ptmp3 = pset1(t3); + + size_t starti = FirstTriangular ? 0 : j+2; + size_t endi = FirstTriangular ? j : size; + size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti); + size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize); + + // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed + res[j] += cjd.pmul(numext::real(A0[j]), t0); + res[j+1] += cjd.pmul(numext::real(A1[j+1]), t1); + if(FirstTriangular) + { + res[j] += cj0.pmul(A1[j], t1); + t3 += cj1.pmul(A1[j], rhs[j]); + } + else + { + res[j+1] += cj0.pmul(A0[j+1],t0); + t2 += cj1.pmul(A0[j+1], rhs[j+1]); + } + + for (size_t i=starti; i huge speed up) + // gcc 4.2 does this optimization automatically. + const Scalar* EIGEN_RESTRICT a0It = A0 + alignedStart; + const Scalar* EIGEN_RESTRICT a1It = A1 + alignedStart; + const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart; + Scalar* EIGEN_RESTRICT resIt = res + alignedStart; + for (size_t i=alignedStart; i(a0It); a0It += PacketSize; + Packet A1i = ploadu(a1It); a1It += PacketSize; + Packet Bi = ploadu(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases + Packet Xi = pload (resIt); + + Xi = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi)); + ptmp2 = pcj1.pmadd(A0i, Bi, ptmp2); + ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3); + pstore(resIt,Xi); resIt += PacketSize; + } + for (size_t i=alignedEnd; i +struct traits > + : traits, Lhs, Rhs> > +{}; +} + +template +struct SelfadjointProductMatrix + : public ProductBase, Lhs, Rhs > +{ + EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) + + enum { + LhsUpLo = LhsMode&(Upper|Lower) + }; + + SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} + + template void scaleAndAddTo(Dest& dest, const Scalar& alpha) const + { + typedef typename Dest::Scalar ResScalar; + typedef typename Base::RhsScalar RhsScalar; + typedef Map, Aligned> MappedDest; + + eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols()); + + typename internal::add_const_on_value_type::type lhs = LhsBlasTraits::extract(m_lhs); + typename internal::add_const_on_value_type::type rhs = RhsBlasTraits::extract(m_rhs); + + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) + * RhsBlasTraits::extractScalarFactor(m_rhs); + + enum { + EvalToDest = (Dest::InnerStrideAtCompileTime==1), + UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1) + }; + + internal::gemv_static_vector_if static_dest; + internal::gemv_static_vector_if static_rhs; + + ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), + EvalToDest ? dest.data() : static_dest.data()); + + ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(), + UseRhs ? const_cast(rhs.data()) : static_rhs.data()); + + if(!EvalToDest) + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + int size = dest.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + MappedDest(actualDestPtr, dest.size()) = dest; + } + + if(!UseRhs) + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + int size = rhs.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + Map(actualRhsPtr, rhs.size()) = rhs; + } + + + internal::selfadjoint_matrix_vector_product::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run + ( + lhs.rows(), // size + &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info + actualRhsPtr, 1, // rhs info + actualDestPtr, // result info + actualAlpha // scale factor + ); + + if(!EvalToDest) + dest = MappedDest(actualDestPtr, dest.size()); + } +}; + +namespace internal { +template +struct traits > + : traits, Lhs, Rhs> > +{}; +} + +template +struct SelfadjointProductMatrix + : public ProductBase, Lhs, Rhs > +{ + EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) + + enum { + RhsUpLo = RhsMode&(Upper|Lower) + }; + + SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} + + template void scaleAndAddTo(Dest& dest, const Scalar& alpha) const + { + // let's simply transpose the product + Transpose destT(dest); + SelfadjointProductMatrix, int(RhsUpLo)==Upper ? Lower : Upper, false, + Transpose, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H diff --git a/tools/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/tools/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h new file mode 100644 index 0000000..86684b6 --- /dev/null +++ b/tools/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h @@ -0,0 +1,114 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV. + ******************************************************************************** +*/ + +#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H +#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H + +namespace Eigen { + +namespace internal { + +/********************************************************************** +* This file implements selfadjoint matrix-vector multiplication using BLAS +**********************************************************************/ + +// symv/hemv specialization + +template +struct selfadjoint_matrix_vector_product_symv : + selfadjoint_matrix_vector_product {}; + +#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \ +template \ +struct selfadjoint_matrix_vector_product { \ +static void run( \ + Index size, const Scalar* lhs, Index lhsStride, \ + const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \ + enum {\ + IsColMajor = StorageOrder==ColMajor \ + }; \ + if (IsColMajor == ConjugateLhs) {\ + selfadjoint_matrix_vector_product::run( \ + size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ + } else {\ + selfadjoint_matrix_vector_product_symv::run( \ + size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ + }\ + } \ +}; \ + +EIGEN_MKL_SYMV_SPECIALIZE(double) +EIGEN_MKL_SYMV_SPECIALIZE(float) +EIGEN_MKL_SYMV_SPECIALIZE(dcomplex) +EIGEN_MKL_SYMV_SPECIALIZE(scomplex) + +#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \ +template \ +struct selfadjoint_matrix_vector_product_symv \ +{ \ +typedef Matrix SYMVVector;\ +\ +static void run( \ +Index size, const EIGTYPE* lhs, Index lhsStride, \ +const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \ +{ \ + enum {\ + IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \ + IsLower = UpLo == Lower ? 1 : 0 \ + }; \ + MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \ + MKLTYPE alpha_, beta_; \ + const EIGTYPE *x_ptr, myone(1); \ + char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ + SYMVVector x_tmp; \ + if (ConjugateRhs) { \ + Map > map_x(_rhs,size,1,InnerStride<>(incx)); \ + x_tmp=map_x.conjugate(); \ + x_ptr=x_tmp.data(); \ + incx=1; \ + } else x_ptr=_rhs; \ + MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ +}\ +}; + +EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv) +EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv) +EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv) +EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv) + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H diff --git a/tools/Eigen/src/Core/products/SelfadjointProduct.h b/tools/Eigen/src/Core/products/SelfadjointProduct.h new file mode 100644 index 0000000..6ca4ae6 --- /dev/null +++ b/tools/Eigen/src/Core/products/SelfadjointProduct.h @@ -0,0 +1,123 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SELFADJOINT_PRODUCT_H +#define EIGEN_SELFADJOINT_PRODUCT_H + +/********************************************************************** +* This file implements a self adjoint product: C += A A^T updating only +* half of the selfadjoint matrix C. +* It corresponds to the level 3 SYRK and level 2 SYR Blas routines. +**********************************************************************/ + +namespace Eigen { + + +template +struct selfadjoint_rank1_update +{ + static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha) + { + internal::conj_if cj; + typedef Map > OtherMap; + typedef typename internal::conditional::type ConjLhsType; + for (Index i=0; i >(mat+stride*i+(UpLo==Lower ? i : 0), (UpLo==Lower ? size-i : (i+1))) + += (alpha * cj(vecY[i])) * ConjLhsType(OtherMap(vecX+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1))); + } + } +}; + +template +struct selfadjoint_rank1_update +{ + static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha) + { + selfadjoint_rank1_update::run(size,mat,stride,vecY,vecX,alpha); + } +}; + +template +struct selfadjoint_product_selector; + +template +struct selfadjoint_product_selector +{ + static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha) + { + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::Index Index; + typedef internal::blas_traits OtherBlasTraits; + typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType; + typedef typename internal::remove_all::type _ActualOtherType; + typename internal::add_const_on_value_type::type actualOther = OtherBlasTraits::extract(other.derived()); + + Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived()); + + enum { + StorageOrder = (internal::traits::Flags&RowMajorBit) ? RowMajor : ColMajor, + UseOtherDirectly = _ActualOtherType::InnerStrideAtCompileTime==1 + }; + internal::gemv_static_vector_if static_other; + + ei_declare_aligned_stack_constructed_variable(Scalar, actualOtherPtr, other.size(), + (UseOtherDirectly ? const_cast(actualOther.data()) : static_other.data())); + + if(!UseOtherDirectly) + Map(actualOtherPtr, actualOther.size()) = actualOther; + + selfadjoint_rank1_update::IsComplex, + (!OtherBlasTraits::NeedToConjugate) && NumTraits::IsComplex> + ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualOtherPtr, actualAlpha); + } +}; + +template +struct selfadjoint_product_selector +{ + static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha) + { + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::Index Index; + typedef internal::blas_traits OtherBlasTraits; + typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType; + typedef typename internal::remove_all::type _ActualOtherType; + typename internal::add_const_on_value_type::type actualOther = OtherBlasTraits::extract(other.derived()); + + Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived()); + + enum { IsRowMajor = (internal::traits::Flags&RowMajorBit) ? 1 : 0 }; + + internal::general_matrix_matrix_triangular_product::IsComplex, + Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits::IsComplex, + MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo> + ::run(mat.cols(), actualOther.cols(), + &actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(), + mat.data(), mat.outerStride(), actualAlpha); + } +}; + +// high level API + +template +template +SelfAdjointView& SelfAdjointView +::rankUpdate(const MatrixBase& u, const Scalar& alpha) +{ + selfadjoint_product_selector::run(_expression().const_cast_derived(), u.derived(), alpha); + + return *this; +} + +} // end namespace Eigen + +#endif // EIGEN_SELFADJOINT_PRODUCT_H diff --git a/tools/Eigen/src/Core/products/SelfadjointRank2Update.h b/tools/Eigen/src/Core/products/SelfadjointRank2Update.h new file mode 100644 index 0000000..8594a97 --- /dev/null +++ b/tools/Eigen/src/Core/products/SelfadjointRank2Update.h @@ -0,0 +1,93 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SELFADJOINTRANK2UPTADE_H +#define EIGEN_SELFADJOINTRANK2UPTADE_H + +namespace Eigen { + +namespace internal { + +/* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu' + * It corresponds to the Level2 syr2 BLAS routine + */ + +template +struct selfadjoint_rank2_update_selector; + +template +struct selfadjoint_rank2_update_selector +{ + static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha) + { + const Index size = u.size(); + for (Index i=0; i >(mat+stride*i+i, size-i) += + (numext::conj(alpha) * numext::conj(u.coeff(i))) * v.tail(size-i) + + (alpha * numext::conj(v.coeff(i))) * u.tail(size-i); + } + } +}; + +template +struct selfadjoint_rank2_update_selector +{ + static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha) + { + const Index size = u.size(); + for (Index i=0; i >(mat+stride*i, i+1) += + (numext::conj(alpha) * numext::conj(u.coeff(i))) * v.head(i+1) + + (alpha * numext::conj(v.coeff(i))) * u.head(i+1); + } +}; + +template struct conj_expr_if + : conditional::Scalar>,T> > {}; + +} // end namespace internal + +template +template +SelfAdjointView& SelfAdjointView +::rankUpdate(const MatrixBase& u, const MatrixBase& v, const Scalar& alpha) +{ + typedef internal::blas_traits UBlasTraits; + typedef typename UBlasTraits::DirectLinearAccessType ActualUType; + typedef typename internal::remove_all::type _ActualUType; + typename internal::add_const_on_value_type::type actualU = UBlasTraits::extract(u.derived()); + + typedef internal::blas_traits VBlasTraits; + typedef typename VBlasTraits::DirectLinearAccessType ActualVType; + typedef typename internal::remove_all::type _ActualVType; + typename internal::add_const_on_value_type::type actualV = VBlasTraits::extract(v.derived()); + + // If MatrixType is row major, then we use the routine for lower triangular in the upper triangular case and + // vice versa, and take the complex conjugate of all coefficients and vector entries. + + enum { IsRowMajor = (internal::traits::Flags&RowMajorBit) ? 1 : 0 }; + Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived()) + * numext::conj(VBlasTraits::extractScalarFactor(v.derived())); + if (IsRowMajor) + actualAlpha = numext::conj(actualAlpha); + + internal::selfadjoint_rank2_update_selector::type>::type, + typename internal::remove_all::type>::type, + (IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)> + ::run(_expression().const_cast_derived().data(),_expression().outerStride(),actualU,actualV,actualAlpha); + + return *this; +} + +} // end namespace Eigen + +#endif // EIGEN_SELFADJOINTRANK2UPTADE_H diff --git a/tools/Eigen/src/Core/products/TriangularMatrixMatrix.h b/tools/Eigen/src/Core/products/TriangularMatrixMatrix.h new file mode 100644 index 0000000..8110507 --- /dev/null +++ b/tools/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -0,0 +1,427 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H +#define EIGEN_TRIANGULAR_MATRIX_MATRIX_H + +namespace Eigen { + +namespace internal { + +// template +// struct gemm_pack_lhs_triangular +// { +// Matrix::IsComplex && Conjugate> cj; +// const_blas_data_mapper lhs(_lhs,lhsStride); +// int count = 0; +// const int peeled_mc = (rows/mr)*mr; +// for(int i=0; i +struct product_triangular_matrix_matrix; + +template +struct product_triangular_matrix_matrix +{ + static EIGEN_STRONG_INLINE void run( + Index rows, Index cols, Index depth, + const Scalar* lhs, Index lhsStride, + const Scalar* rhs, Index rhsStride, + Scalar* res, Index resStride, + const Scalar& alpha, level3_blocking& blocking) + { + product_triangular_matrix_matrix + ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking); + } +}; + +// implements col-major += alpha * op(triangular) * op(general) +template +struct product_triangular_matrix_matrix +{ + + typedef gebp_traits Traits; + enum { + SmallPanelWidth = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), + IsLower = (Mode&Lower) == Lower, + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1 + }; + + static EIGEN_DONT_INLINE void run( + Index _rows, Index _cols, Index _depth, + const Scalar* _lhs, Index lhsStride, + const Scalar* _rhs, Index rhsStride, + Scalar* res, Index resStride, + const Scalar& alpha, level3_blocking& blocking); +}; + +template +EIGEN_DONT_INLINE void product_triangular_matrix_matrix::run( + Index _rows, Index _cols, Index _depth, + const Scalar* _lhs, Index lhsStride, + const Scalar* _rhs, Index rhsStride, + Scalar* res, Index resStride, + const Scalar& alpha, level3_blocking& blocking) + { + // strip zeros + Index diagSize = (std::min)(_rows,_depth); + Index rows = IsLower ? _rows : diagSize; + Index depth = IsLower ? diagSize : _depth; + Index cols = _cols; + + const_blas_data_mapper lhs(_lhs,lhsStride); + const_blas_data_mapper rhs(_rhs,rhsStride); + + Index kc = blocking.kc(); // cache block size along the K direction + Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction + + std::size_t sizeA = kc*mc; + std::size_t sizeB = kc*cols; + std::size_t sizeW = kc*Traits::WorkSpaceFactor; + + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); + + Matrix triangularBuffer; + triangularBuffer.setZero(); + if((Mode&ZeroDiag)==ZeroDiag) + triangularBuffer.diagonal().setZero(); + else + triangularBuffer.diagonal().setOnes(); + + gebp_kernel gebp_kernel; + gemm_pack_lhs pack_lhs; + gemm_pack_rhs pack_rhs; + + for(Index k2=IsLower ? depth : 0; + IsLower ? k2>0 : k2rows)) + { + actual_kc = rows-k2; + k2 = k2+actual_kc-kc; + } + + pack_rhs(blockB, &rhs(actual_k2,0), rhsStride, actual_kc, cols); + + // the selected lhs's panel has to be split in three different parts: + // 1 - the part which is zero => skip it + // 2 - the diagonal block => special kernel + // 3 - the dense panel below (lower case) or above (upper case) the diagonal block => GEPP + + // the block diagonal, if any: + if(IsLower || actual_k2(actual_kc-k1, SmallPanelWidth); + Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1; + Index startBlock = actual_k2+k1; + Index blockBOffset = k1; + + // => GEBP with the micro triangular block + // The trick is to pack this micro block while filling the opposite triangular part with zeros. + // To this end we do an extra triangular copy to a small temporary buffer + for (Index k=0;k0) + { + Index startTarget = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2; + + pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget); + + gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha, + actualPanelWidth, actual_kc, 0, blockBOffset, blockW); + } + } + } + // the part below (lower case) or above (upper case) the diagonal => GEPP + { + Index start = IsLower ? k2 : 0; + Index end = IsLower ? rows : (std::min)(actual_k2,rows); + for(Index i2=start; i2() + (blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc); + + gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW); + } + } + } + } + +// implements col-major += alpha * op(general) * op(triangular) +template +struct product_triangular_matrix_matrix +{ + typedef gebp_traits Traits; + enum { + SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), + IsLower = (Mode&Lower) == Lower, + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1 + }; + + static EIGEN_DONT_INLINE void run( + Index _rows, Index _cols, Index _depth, + const Scalar* _lhs, Index lhsStride, + const Scalar* _rhs, Index rhsStride, + Scalar* res, Index resStride, + const Scalar& alpha, level3_blocking& blocking); +}; + +template +EIGEN_DONT_INLINE void product_triangular_matrix_matrix::run( + Index _rows, Index _cols, Index _depth, + const Scalar* _lhs, Index lhsStride, + const Scalar* _rhs, Index rhsStride, + Scalar* res, Index resStride, + const Scalar& alpha, level3_blocking& blocking) + { + // strip zeros + Index diagSize = (std::min)(_cols,_depth); + Index rows = _rows; + Index depth = IsLower ? _depth : diagSize; + Index cols = IsLower ? diagSize : _cols; + + const_blas_data_mapper lhs(_lhs,lhsStride); + const_blas_data_mapper rhs(_rhs,rhsStride); + + Index kc = blocking.kc(); // cache block size along the K direction + Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction + + std::size_t sizeA = kc*mc; + std::size_t sizeB = kc*cols; + std::size_t sizeW = kc*Traits::WorkSpaceFactor; + + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); + + Matrix triangularBuffer; + triangularBuffer.setZero(); + if((Mode&ZeroDiag)==ZeroDiag) + triangularBuffer.diagonal().setZero(); + else + triangularBuffer.diagonal().setOnes(); + + gebp_kernel gebp_kernel; + gemm_pack_lhs pack_lhs; + gemm_pack_rhs pack_rhs; + gemm_pack_rhs pack_rhs_panel; + + for(Index k2=IsLower ? 0 : depth; + IsLower ? k20; + IsLower ? k2+=kc : k2-=kc) + { + Index actual_kc = (std::min)(IsLower ? depth-k2 : k2, kc); + Index actual_k2 = IsLower ? k2 : k2-actual_kc; + + // align blocks with the end of the triangular part for trapezoidal rhs + if(IsLower && (k2cols)) + { + actual_kc = cols-k2; + k2 = actual_k2 + actual_kc - kc; + } + + // remaining size + Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2; + // size of the triangular part + Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc; + + Scalar* geb = blockB+ts*ts; + + pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, actual_kc, rs); + + // pack the triangular part of the rhs padding the unrolled blocks with zeros + if(ts>0) + { + for (Index j2=0; j2(actual_kc-j2, SmallPanelWidth); + Index actual_j2 = actual_k2 + j2; + Index panelOffset = IsLower ? j2+actualPanelWidth : 0; + Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2; + // general part + pack_rhs_panel(blockB+j2*actual_kc, + &rhs(actual_k2+panelOffset, actual_j2), rhsStride, + panelLength, actualPanelWidth, + actual_kc, panelOffset); + + // append the triangular part via a temporary buffer + for (Index j=0;j0) + { + for (Index j2=0; j2(actual_kc-j2, SmallPanelWidth); + Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth; + Index blockOffset = IsLower ? j2 : 0; + + gebp_kernel(res+i2+(actual_k2+j2)*resStride, resStride, + blockA, blockB+j2*actual_kc, + actual_mc, panelLength, actualPanelWidth, + alpha, + actual_kc, actual_kc, // strides + blockOffset, blockOffset,// offsets + blockW); // workspace + } + } + gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride, + blockA, geb, actual_mc, actual_kc, rs, + alpha, + -1, -1, 0, 0, blockW); + } + } + } + +/*************************************************************************** +* Wrapper to product_triangular_matrix_matrix +***************************************************************************/ + +template +struct traits > + : traits, Lhs, Rhs> > +{}; + +} // end namespace internal + +template +struct TriangularProduct + : public ProductBase, Lhs, Rhs > +{ + EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct) + + TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} + + template void scaleAndAddTo(Dest& dst, const Scalar& alpha) const + { + typename internal::add_const_on_value_type::type lhs = LhsBlasTraits::extract(m_lhs); + typename internal::add_const_on_value_type::type rhs = RhsBlasTraits::extract(m_rhs); + + Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) + * RhsBlasTraits::extractScalarFactor(m_rhs); + + typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, + Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType; + + enum { IsLower = (Mode&Lower) == Lower }; + Index stripedRows = ((!LhsIsTriangular) || (IsLower)) ? lhs.rows() : (std::min)(lhs.rows(),lhs.cols()); + Index stripedCols = ((LhsIsTriangular) || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(),rhs.rows()); + Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows())) + : ((IsLower) ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols())); + + BlockingType blocking(stripedRows, stripedCols, stripedDepth); + + internal::product_triangular_matrix_matrix::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, + (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, + (internal::traits::Flags&RowMajorBit) ? RowMajor : ColMajor> + ::run( + stripedRows, stripedCols, stripedDepth, // sizes + &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info + &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info + &dst.coeffRef(0,0), dst.outerStride(), // result info + actualAlpha, blocking + ); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H diff --git a/tools/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/tools/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h new file mode 100644 index 0000000..4cc56a4 --- /dev/null +++ b/tools/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h @@ -0,0 +1,309 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Triangular matrix * matrix product functionality based on ?TRMM. + ******************************************************************************** +*/ + +#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H +#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H + +namespace Eigen { + +namespace internal { + + +template +struct product_triangular_matrix_matrix_trmm : + product_triangular_matrix_matrix {}; + + +// try to go to BLAS specialization +#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \ +template \ +struct product_triangular_matrix_matrix { \ + static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\ + const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking& blocking) { \ + product_triangular_matrix_matrix_trmm::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ + } \ +}; + +EIGEN_MKL_TRMM_SPECIALIZE(double, true) +EIGEN_MKL_TRMM_SPECIALIZE(double, false) +EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true) +EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false) +EIGEN_MKL_TRMM_SPECIALIZE(float, true) +EIGEN_MKL_TRMM_SPECIALIZE(float, false) +EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true) +EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false) + +// implements col-major += alpha * op(triangular) * op(general) +#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template \ +struct product_triangular_matrix_matrix_trmm \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \ + }; \ +\ + static void run( \ + Index _rows, Index _cols, Index _depth, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha, level3_blocking& blocking) \ + { \ + Index diagSize = (std::min)(_rows,_depth); \ + Index rows = IsLower ? _rows : diagSize; \ + Index depth = IsLower ? diagSize : _depth; \ + Index cols = _cols; \ +\ + typedef Matrix MatrixLhs; \ + typedef Matrix MatrixRhs; \ +\ +/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ + if (rows != depth) { \ +\ + int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \ +\ + if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \ + /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ + product_triangular_matrix_matrix::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ + /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \ + } else { \ + /* Make sense to call GEMM */ \ + Map > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \ + MatrixLhs aa_tmp=lhsMap.template triangularView(); \ + MKL_INT aStride = aa_tmp.outerStride(); \ + gemm_blocking_space gemm_blocking(_rows,_cols,_depth); \ + general_matrix_matrix_product::run( \ + rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \ +\ + /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ + } \ + return; \ + } \ + char side = 'L', transa, uplo, diag = 'N'; \ + EIGTYPE *b; \ + const EIGTYPE *a; \ + MKL_INT m, n, lda, ldb; \ + MKLTYPE alpha_; \ +\ +/* Set alpha_*/ \ + assign_scalar_eig2mkl(alpha_, alpha); \ +\ +/* Set m, n */ \ + m = (MKL_INT)diagSize; \ + n = (MKL_INT)cols; \ +\ +/* Set trans */ \ + transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ +\ +/* Set b, ldb */ \ + Map > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \ + MatrixX##EIGPREFIX b_tmp; \ +\ + if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ +\ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + Map > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \ + MatrixLhs a_tmp; \ +\ + if ((conjA!=0) || (SetDiag==0)) { \ + if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \ + if (IsZeroDiag) \ + a_tmp.diagonal().setZero(); \ + else if (IsUnitDiag) \ + a_tmp.diagonal().setOnes();\ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _lhs; \ + lda = lhsStride; \ + } \ + /*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \ +/* call ?trmm*/ \ + MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ +\ +/* Add op(a_triangular)*b into res*/ \ + Map > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ + res_tmp=res_tmp+b_tmp; \ + } \ +}; + +EIGEN_MKL_TRMM_L(double, double, d, d) +EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_TRMM_L(float, float, f, s) +EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c) + +// implements col-major += alpha * op(general) * op(triangular) +#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template \ +struct product_triangular_matrix_matrix_trmm \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \ + }; \ +\ + static void run( \ + Index _rows, Index _cols, Index _depth, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha, level3_blocking& blocking) \ + { \ + Index diagSize = (std::min)(_cols,_depth); \ + Index rows = _rows; \ + Index depth = IsLower ? _depth : diagSize; \ + Index cols = IsLower ? diagSize : _cols; \ +\ + typedef Matrix MatrixLhs; \ + typedef Matrix MatrixRhs; \ +\ +/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ + if (cols != depth) { \ +\ + int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \ +\ + if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \ + /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ + product_triangular_matrix_matrix::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ + /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \ + } else { \ + /* Make sense to call GEMM */ \ + Map > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \ + MatrixRhs aa_tmp=rhsMap.template triangularView(); \ + MKL_INT aStride = aa_tmp.outerStride(); \ + gemm_blocking_space gemm_blocking(_rows,_cols,_depth); \ + general_matrix_matrix_product::run( \ + rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \ +\ + /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ + } \ + return; \ + } \ + char side = 'R', transa, uplo, diag = 'N'; \ + EIGTYPE *b; \ + const EIGTYPE *a; \ + MKL_INT m, n, lda, ldb; \ + MKLTYPE alpha_; \ +\ +/* Set alpha_*/ \ + assign_scalar_eig2mkl(alpha_, alpha); \ +\ +/* Set m, n */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)diagSize; \ +\ +/* Set trans */ \ + transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ +\ +/* Set b, ldb */ \ + Map > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \ + MatrixX##EIGPREFIX b_tmp; \ +\ + if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ +\ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + Map > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \ + MatrixRhs a_tmp; \ +\ + if ((conjA!=0) || (SetDiag==0)) { \ + if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \ + if (IsZeroDiag) \ + a_tmp.diagonal().setZero(); \ + else if (IsUnitDiag) \ + a_tmp.diagonal().setOnes();\ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _rhs; \ + lda = rhsStride; \ + } \ + /*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \ +/* call ?trmm*/ \ + MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ +\ +/* Add op(a_triangular)*b into res*/ \ + Map > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ + res_tmp=res_tmp+b_tmp; \ + } \ +}; + +EIGEN_MKL_TRMM_R(double, double, d, d) +EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_TRMM_R(float, float, f, s) +EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c) + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H diff --git a/tools/Eigen/src/Core/products/TriangularMatrixVector.h b/tools/Eigen/src/Core/products/TriangularMatrixVector.h new file mode 100644 index 0000000..6117d5a --- /dev/null +++ b/tools/Eigen/src/Core/products/TriangularMatrixVector.h @@ -0,0 +1,348 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TRIANGULARMATRIXVECTOR_H +#define EIGEN_TRIANGULARMATRIXVECTOR_H + +namespace Eigen { + +namespace internal { + +template +struct triangular_matrix_vector_product; + +template +struct triangular_matrix_vector_product +{ + typedef typename scalar_product_traits::ReturnType ResScalar; + enum { + IsLower = ((Mode&Lower)==Lower), + HasUnitDiag = (Mode & UnitDiag)==UnitDiag, + HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag + }; + static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride, + const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha); +}; + +template +EIGEN_DONT_INLINE void triangular_matrix_vector_product + ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride, + const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha) + { + static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH; + Index size = (std::min)(_rows,_cols); + Index rows = IsLower ? _rows : (std::min)(_rows,_cols); + Index cols = IsLower ? (std::min)(_rows,_cols) : _cols; + + typedef Map, 0, OuterStride<> > LhsMap; + const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride)); + typename conj_expr_if::type cjLhs(lhs); + + typedef Map, 0, InnerStride<> > RhsMap; + const RhsMap rhs(_rhs,cols,InnerStride<>(rhsIncr)); + typename conj_expr_if::type cjRhs(rhs); + + typedef Map > ResMap; + ResMap res(_res,rows); + + for (Index pi=0; pi0) + res.segment(s,r) += (alpha * cjRhs.coeff(i)) * cjLhs.col(i).segment(s,r); + if (HasUnitDiag) + res.coeffRef(i) += alpha * cjRhs.coeff(i); + } + Index r = IsLower ? rows - pi - actualPanelWidth : pi; + if (r>0) + { + Index s = IsLower ? pi+actualPanelWidth : 0; + general_matrix_vector_product::run( + r, actualPanelWidth, + &lhs.coeffRef(s,pi), lhsStride, + &rhs.coeffRef(pi), rhsIncr, + &res.coeffRef(s), resIncr, alpha); + } + } + if((!IsLower) && cols>size) + { + general_matrix_vector_product::run( + rows, cols-size, + &lhs.coeffRef(0,size), lhsStride, + &rhs.coeffRef(size), rhsIncr, + _res, resIncr, alpha); + } + } + +template +struct triangular_matrix_vector_product +{ + typedef typename scalar_product_traits::ReturnType ResScalar; + enum { + IsLower = ((Mode&Lower)==Lower), + HasUnitDiag = (Mode & UnitDiag)==UnitDiag, + HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag + }; + static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride, + const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha); +}; + +template +EIGEN_DONT_INLINE void triangular_matrix_vector_product + ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride, + const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha) + { + static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH; + Index diagSize = (std::min)(_rows,_cols); + Index rows = IsLower ? _rows : diagSize; + Index cols = IsLower ? diagSize : _cols; + + typedef Map, 0, OuterStride<> > LhsMap; + const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride)); + typename conj_expr_if::type cjLhs(lhs); + + typedef Map > RhsMap; + const RhsMap rhs(_rhs,cols); + typename conj_expr_if::type cjRhs(rhs); + + typedef Map, 0, InnerStride<> > ResMap; + ResMap res(_res,rows,InnerStride<>(resIncr)); + + for (Index pi=0; pi0) + res.coeffRef(i) += alpha * (cjLhs.row(i).segment(s,r).cwiseProduct(cjRhs.segment(s,r).transpose())).sum(); + if (HasUnitDiag) + res.coeffRef(i) += alpha * cjRhs.coeff(i); + } + Index r = IsLower ? pi : cols - pi - actualPanelWidth; + if (r>0) + { + Index s = IsLower ? 0 : pi + actualPanelWidth; + general_matrix_vector_product::run( + actualPanelWidth, r, + &lhs.coeffRef(pi,s), lhsStride, + &rhs.coeffRef(s), rhsIncr, + &res.coeffRef(pi), resIncr, alpha); + } + } + if(IsLower && rows>diagSize) + { + general_matrix_vector_product::run( + rows-diagSize, cols, + &lhs.coeffRef(diagSize,0), lhsStride, + &rhs.coeffRef(0), rhsIncr, + &res.coeffRef(diagSize), resIncr, alpha); + } + } + +/*************************************************************************** +* Wrapper to product_triangular_vector +***************************************************************************/ + +template +struct traits > + : traits, Lhs, Rhs> > +{}; + +template +struct traits > + : traits, Lhs, Rhs> > +{}; + + +template +struct trmv_selector; + +} // end namespace internal + +template +struct TriangularProduct + : public ProductBase, Lhs, Rhs > +{ + EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct) + + TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} + + template void scaleAndAddTo(Dest& dst, const Scalar& alpha) const + { + eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + + internal::trmv_selector<(int(internal::traits::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dst, alpha); + } +}; + +template +struct TriangularProduct + : public ProductBase, Lhs, Rhs > +{ + EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct) + + TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} + + template void scaleAndAddTo(Dest& dst, const Scalar& alpha) const + { + eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + + typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose,false,Transpose,true> TriangularProductTranspose; + Transpose dstT(dst); + internal::trmv_selector<(int(internal::traits::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run( + TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha); + } +}; + +namespace internal { + +// TODO: find a way to factorize this piece of code with gemv_selector since the logic is exactly the same. + +template<> struct trmv_selector +{ + template + static void run(const TriangularProduct& prod, Dest& dest, const typename TriangularProduct::Scalar& alpha) + { + typedef TriangularProduct ProductType; + typedef typename ProductType::Index Index; + typedef typename ProductType::LhsScalar LhsScalar; + typedef typename ProductType::RhsScalar RhsScalar; + typedef typename ProductType::Scalar ResScalar; + typedef typename ProductType::RealScalar RealScalar; + typedef typename ProductType::ActualLhsType ActualLhsType; + typedef typename ProductType::ActualRhsType ActualRhsType; + typedef typename ProductType::LhsBlasTraits LhsBlasTraits; + typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + typedef Map, Aligned> MappedDest; + + typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(prod.lhs()); + typename internal::add_const_on_value_type::type actualRhs = RhsBlasTraits::extract(prod.rhs()); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) + * RhsBlasTraits::extractScalarFactor(prod.rhs()); + + enum { + // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 + // on, the other hand it is good for the cache to pack the vector anyways... + EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1, + ComplexByReal = (NumTraits::IsComplex) && (!NumTraits::IsComplex), + MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal + }; + + gemv_static_vector_if static_dest; + + bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); + bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; + + RhsScalar compatibleAlpha = get_factor::run(actualAlpha); + + ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), + evalToDest ? dest.data() : static_dest.data()); + + if(!evalToDest) + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + Index size = dest.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + if(!alphaIsCompatible) + { + MappedDest(actualDestPtr, dest.size()).setZero(); + compatibleAlpha = RhsScalar(1); + } + else + MappedDest(actualDestPtr, dest.size()) = dest; + } + + internal::triangular_matrix_vector_product + + ::run(actualLhs.rows(),actualLhs.cols(), + actualLhs.data(),actualLhs.outerStride(), + actualRhs.data(),actualRhs.innerStride(), + actualDestPtr,1,compatibleAlpha); + + if (!evalToDest) + { + if(!alphaIsCompatible) + dest += actualAlpha * MappedDest(actualDestPtr, dest.size()); + else + dest = MappedDest(actualDestPtr, dest.size()); + } + } +}; + +template<> struct trmv_selector +{ + template + static void run(const TriangularProduct& prod, Dest& dest, const typename TriangularProduct::Scalar& alpha) + { + typedef TriangularProduct ProductType; + typedef typename ProductType::LhsScalar LhsScalar; + typedef typename ProductType::RhsScalar RhsScalar; + typedef typename ProductType::Scalar ResScalar; + typedef typename ProductType::Index Index; + typedef typename ProductType::ActualLhsType ActualLhsType; + typedef typename ProductType::ActualRhsType ActualRhsType; + typedef typename ProductType::_ActualRhsType _ActualRhsType; + typedef typename ProductType::LhsBlasTraits LhsBlasTraits; + typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + + typename add_const::type actualLhs = LhsBlasTraits::extract(prod.lhs()); + typename add_const::type actualRhs = RhsBlasTraits::extract(prod.rhs()); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) + * RhsBlasTraits::extractScalarFactor(prod.rhs()); + + enum { + DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1 + }; + + gemv_static_vector_if static_rhs; + + ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(), + DirectlyUseRhs ? const_cast(actualRhs.data()) : static_rhs.data()); + + if(!DirectlyUseRhs) + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + int size = actualRhs.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + Map(actualRhsPtr, actualRhs.size()) = actualRhs; + } + + internal::triangular_matrix_vector_product + + ::run(actualLhs.rows(),actualLhs.cols(), + actualLhs.data(),actualLhs.outerStride(), + actualRhsPtr,1, + dest.data(),dest.innerStride(), + actualAlpha); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TRIANGULARMATRIXVECTOR_H diff --git a/tools/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/tools/Eigen/src/Core/products/TriangularMatrixVector_MKL.h new file mode 100644 index 0000000..09f110d --- /dev/null +++ b/tools/Eigen/src/Core/products/TriangularMatrixVector_MKL.h @@ -0,0 +1,247 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Triangular matrix-vector product functionality based on ?TRMV. + ******************************************************************************** +*/ + +#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H +#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H + +namespace Eigen { + +namespace internal { + +/********************************************************************** +* This file implements triangular matrix-vector multiplication using BLAS +**********************************************************************/ + +// trmv/hemv specialization + +template +struct triangular_matrix_vector_product_trmv : + triangular_matrix_vector_product {}; + +#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \ +template \ +struct triangular_matrix_vector_product { \ + static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ + const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \ + triangular_matrix_vector_product_trmv::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ + } \ +}; \ +template \ +struct triangular_matrix_vector_product { \ + static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ + const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \ + triangular_matrix_vector_product_trmv::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ + } \ +}; + +EIGEN_MKL_TRMV_SPECIALIZE(double) +EIGEN_MKL_TRMV_SPECIALIZE(float) +EIGEN_MKL_TRMV_SPECIALIZE(dcomplex) +EIGEN_MKL_TRMV_SPECIALIZE(scomplex) + +// implements col-major: res += alpha * op(triangular) * vector +#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template \ +struct triangular_matrix_vector_product_trmv { \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper \ + }; \ + static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \ + { \ + if (ConjLhs || IsZeroDiag) { \ + triangular_matrix_vector_product::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ + return; \ + }\ + Index size = (std::min)(_rows,_cols); \ + Index rows = IsLower ? _rows : size; \ + Index cols = IsLower ? size : _cols; \ +\ + typedef VectorX##EIGPREFIX VectorRhs; \ + EIGTYPE *x, *y;\ +\ +/* Set x*/ \ + Map > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \ + VectorRhs x_tmp; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ +\ +/* Square part handling */\ +\ + char trans, uplo, diag; \ + MKL_INT m, n, lda, incx, incy; \ + EIGTYPE const *a; \ + MKLTYPE alpha_, beta_; \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, EIGTYPE(1)); \ +\ +/* Set m, n */ \ + n = (MKL_INT)size; \ + lda = lhsStride; \ + incx = 1; \ + incy = resIncr; \ +\ +/* Set uplo, trans and diag*/ \ + trans = 'N'; \ + uplo = IsLower ? 'L' : 'U'; \ + diag = IsUnitDiag ? 'U' : 'N'; \ +\ +/* call ?TRMV*/ \ + MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ +\ +/* Add op(a_tr)rhs into res*/ \ + MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ +/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ + if (size<(std::max)(rows,cols)) { \ + typedef Matrix MatrixLhs; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ + if (size \ +struct triangular_matrix_vector_product_trmv { \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper \ + }; \ + static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \ + { \ + if (IsZeroDiag) { \ + triangular_matrix_vector_product::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ + return; \ + }\ + Index size = (std::min)(_rows,_cols); \ + Index rows = IsLower ? _rows : size; \ + Index cols = IsLower ? size : _cols; \ +\ + typedef VectorX##EIGPREFIX VectorRhs; \ + EIGTYPE *x, *y;\ +\ +/* Set x*/ \ + Map > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \ + VectorRhs x_tmp; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ +\ +/* Square part handling */\ +\ + char trans, uplo, diag; \ + MKL_INT m, n, lda, incx, incy; \ + EIGTYPE const *a; \ + MKLTYPE alpha_, beta_; \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, EIGTYPE(1)); \ +\ +/* Set m, n */ \ + n = (MKL_INT)size; \ + lda = lhsStride; \ + incx = 1; \ + incy = resIncr; \ +\ +/* Set uplo, trans and diag*/ \ + trans = ConjLhs ? 'C' : 'T'; \ + uplo = IsLower ? 'U' : 'L'; \ + diag = IsUnitDiag ? 'U' : 'N'; \ +\ +/* call ?TRMV*/ \ + MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ +\ +/* Add op(a_tr)rhs into res*/ \ + MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ +/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ + if (size<(std::max)(rows,cols)) { \ + typedef Matrix MatrixLhs; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ + if (size +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H +#define EIGEN_TRIANGULAR_SOLVER_MATRIX_H + +namespace Eigen { + +namespace internal { + +// if the rhs is row major, let's transpose the product +template +struct triangular_solve_matrix +{ + static void run( + Index size, Index cols, + const Scalar* tri, Index triStride, + Scalar* _other, Index otherStride, + level3_blocking& blocking) + { + triangular_solve_matrix< + Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft, + (Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper), + NumTraits::IsComplex && Conjugate, + TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor> + ::run(size, cols, tri, triStride, _other, otherStride, blocking); + } +}; + +/* Optimized triangular solver with multiple right hand side and the triangular matrix on the left + */ +template +struct triangular_solve_matrix +{ + static EIGEN_DONT_INLINE void run( + Index size, Index otherSize, + const Scalar* _tri, Index triStride, + Scalar* _other, Index otherStride, + level3_blocking& blocking); +}; +template +EIGEN_DONT_INLINE void triangular_solve_matrix::run( + Index size, Index otherSize, + const Scalar* _tri, Index triStride, + Scalar* _other, Index otherStride, + level3_blocking& blocking) + { + Index cols = otherSize; + const_blas_data_mapper tri(_tri,triStride); + blas_data_mapper other(_other,otherStride); + + typedef gebp_traits Traits; + enum { + SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), + IsLower = (Mode&Lower) == Lower + }; + + Index kc = blocking.kc(); // cache block size along the K direction + Index mc = (std::min)(size,blocking.mc()); // cache block size along the M direction + + std::size_t sizeA = kc*mc; + std::size_t sizeB = kc*cols; + std::size_t sizeW = kc*Traits::WorkSpaceFactor; + + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); + + conj_if conj; + gebp_kernel gebp_kernel; + gemm_pack_lhs pack_lhs; + gemm_pack_rhs pack_rhs; + + // the goal here is to subdivise the Rhs panels such that we keep some cache + // coherence when accessing the rhs elements + std::ptrdiff_t l1, l2; + manage_caching_sizes(GetAction, &l1, &l2); + Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0; + subcols = std::max((subcols/Traits::nr)*Traits::nr, Traits::nr); + + for(Index k2=IsLower ? 0 : size; + IsLower ? k20; + IsLower ? k2+=kc : k2-=kc) + { + const Index actual_kc = (std::min)(IsLower ? size-k2 : k2, kc); + + // We have selected and packed a big horizontal panel R1 of rhs. Let B be the packed copy of this panel, + // and R2 the remaining part of rhs. The corresponding vertical panel of lhs is split into + // A11 (the triangular part) and A21 the remaining rectangular part. + // Then the high level algorithm is: + // - B = R1 => general block copy (done during the next step) + // - R1 = A11^-1 B => tricky part + // - update B from the new R1 => actually this has to be performed continuously during the above step + // - R2 -= A21 * B => GEPP + + // The tricky part: compute R1 = A11^-1 B while updating B from R1 + // The idea is to split A11 into multiple small vertical panels. + // Each panel can be split into a small triangular part T1k which is processed without optimization, + // and the remaining small part T2k which is processed using gebp with appropriate block strides + for(Index j2=0; j2(actual_kc-k1, SmallPanelWidth); + // tr solve + for (Index k=0; k0) + { + Index startTarget = IsLower ? k2+k1+actualPanelWidth : k2-actual_kc; + + pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget); + + gebp_kernel(&other(startTarget,j2), otherStride, blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1), + actualPanelWidth, actual_kc, 0, blockBOffset, blockW); + } + } + } + + // R2 -= A21 * B => GEPP + { + Index start = IsLower ? k2+kc : 0; + Index end = IsLower ? size : k2-kc; + for(Index i2=start; i20) + { + pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc); + + gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0, blockW); + } + } + } + } + } + +/* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right + */ +template +struct triangular_solve_matrix +{ + static EIGEN_DONT_INLINE void run( + Index size, Index otherSize, + const Scalar* _tri, Index triStride, + Scalar* _other, Index otherStride, + level3_blocking& blocking); +}; +template +EIGEN_DONT_INLINE void triangular_solve_matrix::run( + Index size, Index otherSize, + const Scalar* _tri, Index triStride, + Scalar* _other, Index otherStride, + level3_blocking& blocking) + { + Index rows = otherSize; + const_blas_data_mapper rhs(_tri,triStride); + blas_data_mapper lhs(_other,otherStride); + + typedef gebp_traits Traits; + enum { + RhsStorageOrder = TriStorageOrder, + SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), + IsLower = (Mode&Lower) == Lower + }; + + Index kc = blocking.kc(); // cache block size along the K direction + Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction + + std::size_t sizeA = kc*mc; + std::size_t sizeB = kc*size; + std::size_t sizeW = kc*Traits::WorkSpaceFactor; + + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); + + conj_if conj; + gebp_kernel gebp_kernel; + gemm_pack_rhs pack_rhs; + gemm_pack_rhs pack_rhs_panel; + gemm_pack_lhs pack_lhs_panel; + + for(Index k2=IsLower ? size : 0; + IsLower ? k2>0 : k20) pack_rhs(geb, &rhs(actual_k2,startPanel), triStride, actual_kc, rs); + + // triangular packing (we only pack the panels off the diagonal, + // neglecting the blocks overlapping the diagonal + { + for (Index j2=0; j2(actual_kc-j2, SmallPanelWidth); + Index actual_j2 = actual_k2 + j2; + Index panelOffset = IsLower ? j2+actualPanelWidth : 0; + Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2; + + if (panelLength>0) + pack_rhs_panel(blockB+j2*actual_kc, + &rhs(actual_k2+panelOffset, actual_j2), triStride, + panelLength, actualPanelWidth, + actual_kc, panelOffset); + } + } + + for(Index i2=0; i2 vertical panels of rhs) + for (Index j2 = IsLower + ? (actual_kc - ((actual_kc%SmallPanelWidth) ? Index(actual_kc%SmallPanelWidth) + : Index(SmallPanelWidth))) + : 0; + IsLower ? j2>=0 : j2(actual_kc-j2, SmallPanelWidth); + Index absolute_j2 = actual_k2 + j2; + Index panelOffset = IsLower ? j2+actualPanelWidth : 0; + Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2; + + // GEBP + if(panelLength>0) + { + gebp_kernel(&lhs(i2,absolute_j2), otherStride, + blockA, blockB+j2*actual_kc, + actual_mc, panelLength, actualPanelWidth, + Scalar(-1), + actual_kc, actual_kc, // strides + panelOffset, panelOffset, // offsets + blockW); // workspace + } + + // unblocked triangular solve + for (Index k=0; k0) + gebp_kernel(_other+i2+startPanel*otherStride, otherStride, blockA, geb, + actual_mc, actual_kc, rs, Scalar(-1), + -1, -1, 0, 0, blockW); + } + } + } + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H diff --git a/tools/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h b/tools/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h new file mode 100644 index 0000000..6a0bb83 --- /dev/null +++ b/tools/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h @@ -0,0 +1,155 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Triangular matrix * matrix product functionality based on ?TRMM. + ******************************************************************************** +*/ + +#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H +#define EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H + +namespace Eigen { + +namespace internal { + +// implements LeftSide op(triangular)^-1 * general +#define EIGEN_MKL_TRSM_L(EIGTYPE, MKLTYPE, MKLPREFIX) \ +template \ +struct triangular_solve_matrix \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \ + }; \ + static void run( \ + Index size, Index otherSize, \ + const EIGTYPE* _tri, Index triStride, \ + EIGTYPE* _other, Index otherStride, level3_blocking& /*blocking*/) \ + { \ + MKL_INT m = size, n = otherSize, lda, ldb; \ + char side = 'L', uplo, diag='N', transa; \ + /* Set alpha_ */ \ + MKLTYPE alpha; \ + EIGTYPE myone(1); \ + assign_scalar_eig2mkl(alpha, myone); \ + ldb = otherStride;\ +\ + const EIGTYPE *a; \ +/* Set trans */ \ + transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + typedef Matrix MatrixTri; \ + Map > tri(_tri,size,size,OuterStride<>(triStride)); \ + MatrixTri a_tmp; \ +\ + if (conjA) { \ + a_tmp = tri.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _tri; \ + lda = triStride; \ + } \ + if (IsUnitDiag) diag='U'; \ +/* call ?trsm*/ \ + MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ + } \ +}; + +EIGEN_MKL_TRSM_L(double, double, d) +EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z) +EIGEN_MKL_TRSM_L(float, float, s) +EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c) + + +// implements RightSide general * op(triangular)^-1 +#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \ +template \ +struct triangular_solve_matrix \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \ + }; \ + static void run( \ + Index size, Index otherSize, \ + const EIGTYPE* _tri, Index triStride, \ + EIGTYPE* _other, Index otherStride, level3_blocking& /*blocking*/) \ + { \ + MKL_INT m = otherSize, n = size, lda, ldb; \ + char side = 'R', uplo, diag='N', transa; \ + /* Set alpha_ */ \ + MKLTYPE alpha; \ + EIGTYPE myone(1); \ + assign_scalar_eig2mkl(alpha, myone); \ + ldb = otherStride;\ +\ + const EIGTYPE *a; \ +/* Set trans */ \ + transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + typedef Matrix MatrixTri; \ + Map > tri(_tri,size,size,OuterStride<>(triStride)); \ + MatrixTri a_tmp; \ +\ + if (conjA) { \ + a_tmp = tri.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _tri; \ + lda = triStride; \ + } \ + if (IsUnitDiag) diag='U'; \ +/* call ?trsm*/ \ + MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ + /*std::cout << "TRMS_L specialization!\n";*/ \ + } \ +}; + +EIGEN_MKL_TRSM_R(double, double, d) +EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z) +EIGEN_MKL_TRSM_R(float, float, s) +EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c) + + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H diff --git a/tools/Eigen/src/Core/products/TriangularSolverVector.h b/tools/Eigen/src/Core/products/TriangularSolverVector.h new file mode 100644 index 0000000..ce4d100 --- /dev/null +++ b/tools/Eigen/src/Core/products/TriangularSolverVector.h @@ -0,0 +1,139 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TRIANGULAR_SOLVER_VECTOR_H +#define EIGEN_TRIANGULAR_SOLVER_VECTOR_H + +namespace Eigen { + +namespace internal { + +template +struct triangular_solve_vector +{ + static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs) + { + triangular_solve_vector::run(size, _lhs, lhsStride, rhs); + } +}; + +// forward and backward substitution, row-major, rhs is a vector +template +struct triangular_solve_vector +{ + enum { + IsLower = ((Mode&Lower)==Lower) + }; + static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs) + { + typedef Map, 0, OuterStride<> > LhsMap; + const LhsMap lhs(_lhs,size,size,OuterStride<>(lhsStride)); + typename internal::conditional< + Conjugate, + const CwiseUnaryOp,LhsMap>, + const LhsMap&> + ::type cjLhs(lhs); + static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH; + for(Index pi=IsLower ? 0 : size; + IsLower ? pi0; + IsLower ? pi+=PanelWidth : pi-=PanelWidth) + { + Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth); + + Index r = IsLower ? pi : size - pi; // remaining size + if (r > 0) + { + // let's directly call the low level product function because: + // 1 - it is faster to compile + // 2 - it is slighlty faster at runtime + Index startRow = IsLower ? pi : pi-actualPanelWidth; + Index startCol = IsLower ? 0 : pi; + + general_matrix_vector_product::run( + actualPanelWidth, r, + &lhs.coeffRef(startRow,startCol), lhsStride, + rhs + startCol, 1, + rhs + startRow, 1, + RhsScalar(-1)); + } + + for(Index k=0; k0) + rhs[i] -= (cjLhs.row(i).segment(s,k).transpose().cwiseProduct(Map >(rhs+s,k))).sum(); + + if(!(Mode & UnitDiag)) + rhs[i] /= cjLhs(i,i); + } + } + } +}; + +// forward and backward substitution, column-major, rhs is a vector +template +struct triangular_solve_vector +{ + enum { + IsLower = ((Mode&Lower)==Lower) + }; + static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs) + { + typedef Map, 0, OuterStride<> > LhsMap; + const LhsMap lhs(_lhs,size,size,OuterStride<>(lhsStride)); + typename internal::conditional,LhsMap>, + const LhsMap& + >::type cjLhs(lhs); + static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH; + + for(Index pi=IsLower ? 0 : size; + IsLower ? pi0; + IsLower ? pi+=PanelWidth : pi-=PanelWidth) + { + Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth); + Index startBlock = IsLower ? pi : pi-actualPanelWidth; + Index endBlock = IsLower ? pi + actualPanelWidth : 0; + + for(Index k=0; k0) + Map >(rhs+s,r) -= rhs[i] * cjLhs.col(i).segment(s,r); + } + Index r = IsLower ? size - endBlock : startBlock; // remaining size + if (r > 0) + { + // let's directly call the low level product function because: + // 1 - it is faster to compile + // 2 - it is slighlty faster at runtime + general_matrix_vector_product::run( + r, actualPanelWidth, + &lhs.coeffRef(endBlock,startBlock), lhsStride, + rhs+startBlock, 1, + rhs+endBlock, 1, RhsScalar(-1)); + } + } + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TRIANGULAR_SOLVER_VECTOR_H From d04b9a18a2541c95318707264a5de18d85f50109 Mon Sep 17 00:00:00 2001 From: albe89 Date: Thu, 8 Sep 2016 14:13:25 +0200 Subject: [PATCH 17/25] add more eigen files --- tools/Eigen/src/Core/ReturnByValue.h | 99 ++ tools/Eigen/src/Core/Reverse.h | 224 ++++ tools/Eigen/src/Core/Select.h | 162 +++ tools/Eigen/src/Core/SelfAdjointView.h | 314 ++++++ tools/Eigen/src/Core/SelfCwiseBinaryOp.h | 191 ++++ tools/Eigen/src/Core/SolveTriangular.h | 260 +++++ tools/Eigen/src/Core/StableNorm.h | 203 ++++ tools/Eigen/src/Core/Stride.h | 108 ++ tools/Eigen/src/Core/Swap.h | 126 +++ tools/Eigen/src/Core/Transpose.h | 419 ++++++++ tools/Eigen/src/Core/Transpositions.h | 436 ++++++++ tools/Eigen/src/Core/TriangularMatrix.h | 839 +++++++++++++++ tools/Eigen/src/Core/VectorwiseOp.h | 642 ++++++++++++ tools/Eigen/src/Core/Visitor.h | 240 +++++ tools/Eigen/src/Core/util/BlasUtil.h | 264 +++++ tools/Eigen/src/Core/util/CMakeLists.txt | 6 + tools/Eigen/src/Core/util/Constants.h | 451 ++++++++ .../src/Core/util/DisableStupidWarnings.h | 40 + .../Eigen/src/Core/util/ForwardDeclarations.h | 302 ++++++ tools/Eigen/src/Core/util/MKL_support.h | 158 +++ tools/Eigen/src/Core/util/Memory.h | 979 ++++++++++++++++++ tools/Eigen/src/Core/util/NonMPL2.h | 3 + .../src/Core/util/ReenableStupidWarnings.h | 14 + tools/Eigen/src/Core/util/StaticAssert.h | 208 ++++ tools/Eigen/src/Core/util/XprHelper.h | 469 +++++++++ tools/Eigen/src/Eigen2Support/Block.h | 126 +++ tools/Eigen/src/Eigen2Support/CMakeLists.txt | 8 + tools/Eigen/src/Eigen2Support/Cwise.h | 192 ++++ 28 files changed, 7483 insertions(+) create mode 100644 tools/Eigen/src/Core/ReturnByValue.h create mode 100644 tools/Eigen/src/Core/Reverse.h create mode 100644 tools/Eigen/src/Core/Select.h create mode 100644 tools/Eigen/src/Core/SelfAdjointView.h create mode 100644 tools/Eigen/src/Core/SelfCwiseBinaryOp.h create mode 100644 tools/Eigen/src/Core/SolveTriangular.h create mode 100644 tools/Eigen/src/Core/StableNorm.h create mode 100644 tools/Eigen/src/Core/Stride.h create mode 100644 tools/Eigen/src/Core/Swap.h create mode 100644 tools/Eigen/src/Core/Transpose.h create mode 100644 tools/Eigen/src/Core/Transpositions.h create mode 100644 tools/Eigen/src/Core/TriangularMatrix.h create mode 100644 tools/Eigen/src/Core/VectorwiseOp.h create mode 100644 tools/Eigen/src/Core/Visitor.h create mode 100644 tools/Eigen/src/Core/util/BlasUtil.h create mode 100644 tools/Eigen/src/Core/util/CMakeLists.txt create mode 100644 tools/Eigen/src/Core/util/Constants.h create mode 100644 tools/Eigen/src/Core/util/DisableStupidWarnings.h create mode 100644 tools/Eigen/src/Core/util/ForwardDeclarations.h create mode 100644 tools/Eigen/src/Core/util/MKL_support.h create mode 100644 tools/Eigen/src/Core/util/Memory.h create mode 100644 tools/Eigen/src/Core/util/NonMPL2.h create mode 100644 tools/Eigen/src/Core/util/ReenableStupidWarnings.h create mode 100644 tools/Eigen/src/Core/util/StaticAssert.h create mode 100644 tools/Eigen/src/Core/util/XprHelper.h create mode 100644 tools/Eigen/src/Eigen2Support/Block.h create mode 100644 tools/Eigen/src/Eigen2Support/CMakeLists.txt create mode 100644 tools/Eigen/src/Eigen2Support/Cwise.h diff --git a/tools/Eigen/src/Core/ReturnByValue.h b/tools/Eigen/src/Core/ReturnByValue.h new file mode 100644 index 0000000..f635598 --- /dev/null +++ b/tools/Eigen/src/Core/ReturnByValue.h @@ -0,0 +1,99 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// Copyright (C) 2009-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_RETURNBYVALUE_H +#define EIGEN_RETURNBYVALUE_H + +namespace Eigen { + +/** \class ReturnByValue + * \ingroup Core_Module + * + */ + +namespace internal { + +template +struct traits > + : public traits::ReturnType> +{ + enum { + // We're disabling the DirectAccess because e.g. the constructor of + // the Block-with-DirectAccess expression requires to have a coeffRef method. + // Also, we don't want to have to implement the stride stuff. + Flags = (traits::ReturnType>::Flags + | EvalBeforeNestingBit) & ~DirectAccessBit + }; +}; + +/* The ReturnByValue object doesn't even have a coeff() method. + * So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix. + * So internal::nested always gives the plain return matrix type. + * + * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ?? + */ +template +struct nested, n, PlainObject> +{ + typedef typename traits::ReturnType type; +}; + +} // end namespace internal + +template class ReturnByValue + : internal::no_assignment_operator, public internal::dense_xpr_base< ReturnByValue >::type +{ + public: + typedef typename internal::traits::ReturnType ReturnType; + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue) + + template + inline void evalTo(Dest& dst) const + { static_cast(this)->evalTo(dst); } + inline Index rows() const { return static_cast(this)->rows(); } + inline Index cols() const { return static_cast(this)->cols(); } + +#ifndef EIGEN_PARSED_BY_DOXYGEN +#define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT + class Unusable{ + Unusable(const Unusable&) {} + Unusable& operator=(const Unusable&) {return *this;} + }; + const Unusable& coeff(Index) const { return *reinterpret_cast(this); } + const Unusable& coeff(Index,Index) const { return *reinterpret_cast(this); } + Unusable& coeffRef(Index) { return *reinterpret_cast(this); } + Unusable& coeffRef(Index,Index) { return *reinterpret_cast(this); } + template Unusable& packet(Index) const; + template Unusable& packet(Index, Index) const; +#endif +}; + +template +template +Derived& DenseBase::operator=(const ReturnByValue& other) +{ + other.evalTo(derived()); + return derived(); +} + +template +template +Derived& DenseBase::lazyAssign(const ReturnByValue& other) +{ + other.evalTo(derived()); + return derived(); +} + + +} // end namespace Eigen + +#endif // EIGEN_RETURNBYVALUE_H diff --git a/tools/Eigen/src/Core/Reverse.h b/tools/Eigen/src/Core/Reverse.h new file mode 100644 index 0000000..e30ae3d --- /dev/null +++ b/tools/Eigen/src/Core/Reverse.h @@ -0,0 +1,224 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2009 Ricard Marxer +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REVERSE_H +#define EIGEN_REVERSE_H + +namespace Eigen { + +/** \class Reverse + * \ingroup Core_Module + * + * \brief Expression of the reverse of a vector or matrix + * + * \param MatrixType the type of the object of which we are taking the reverse + * + * This class represents an expression of the reverse of a vector. + * It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::reverse(), VectorwiseOp::reverse() + */ + +namespace internal { + +template +struct traits > + : traits +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename nested::type MatrixTypeNested; + typedef typename remove_reference::type _MatrixTypeNested; + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, + + // let's enable LinearAccess only with vectorization because of the product overhead + LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) ) + ? LinearAccessBit : 0, + + Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess), + + CoeffReadCost = _MatrixTypeNested::CoeffReadCost + }; +}; + +template struct reverse_packet_cond +{ + static inline PacketScalar run(const PacketScalar& x) { return preverse(x); } +}; + +template struct reverse_packet_cond +{ + static inline PacketScalar run(const PacketScalar& x) { return x; } +}; + +} // end namespace internal + +template class Reverse + : public internal::dense_xpr_base< Reverse >::type +{ + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Reverse) + using Base::IsRowMajor; + + // next line is necessary because otherwise const version of operator() + // is hidden by non-const version defined in this file + using Base::operator(); + + protected: + enum { + PacketSize = internal::packet_traits::size, + IsColMajor = !IsRowMajor, + ReverseRow = (Direction == Vertical) || (Direction == BothDirections), + ReverseCol = (Direction == Horizontal) || (Direction == BothDirections), + OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, + OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1, + ReversePacket = (Direction == BothDirections) + || ((Direction == Vertical) && IsColMajor) + || ((Direction == Horizontal) && IsRowMajor) + }; + typedef internal::reverse_packet_cond reverse_packet; + public: + + inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse) + + inline Index rows() const { return m_matrix.rows(); } + inline Index cols() const { return m_matrix.cols(); } + + inline Index innerStride() const + { + return -m_matrix.innerStride(); + } + + inline Scalar& operator()(Index row, Index col) + { + eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); + return coeffRef(row, col); + } + + inline Scalar& coeffRef(Index row, Index col) + { + return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row, + ReverseCol ? m_matrix.cols() - col - 1 : col); + } + + inline CoeffReturnType coeff(Index row, Index col) const + { + return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row, + ReverseCol ? m_matrix.cols() - col - 1 : col); + } + + inline CoeffReturnType coeff(Index index) const + { + return m_matrix.coeff(m_matrix.size() - index - 1); + } + + inline Scalar& coeffRef(Index index) + { + return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1); + } + + inline Scalar& operator()(Index index) + { + eigen_assert(index >= 0 && index < m_matrix.size()); + return coeffRef(index); + } + + template + inline const PacketScalar packet(Index row, Index col) const + { + return reverse_packet::run(m_matrix.template packet( + ReverseRow ? m_matrix.rows() - row - OffsetRow : row, + ReverseCol ? m_matrix.cols() - col - OffsetCol : col)); + } + + template + inline void writePacket(Index row, Index col, const PacketScalar& x) + { + m_matrix.const_cast_derived().template writePacket( + ReverseRow ? m_matrix.rows() - row - OffsetRow : row, + ReverseCol ? m_matrix.cols() - col - OffsetCol : col, + reverse_packet::run(x)); + } + + template + inline const PacketScalar packet(Index index) const + { + return internal::preverse(m_matrix.template packet( m_matrix.size() - index - PacketSize )); + } + + template + inline void writePacket(Index index, const PacketScalar& x) + { + m_matrix.const_cast_derived().template writePacket(m_matrix.size() - index - PacketSize, internal::preverse(x)); + } + + const typename internal::remove_all::type& + nestedExpression() const + { + return m_matrix; + } + + protected: + typename MatrixType::Nested m_matrix; +}; + +/** \returns an expression of the reverse of *this. + * + * Example: \include MatrixBase_reverse.cpp + * Output: \verbinclude MatrixBase_reverse.out + * + */ +template +inline typename DenseBase::ReverseReturnType +DenseBase::reverse() +{ + return derived(); +} + +/** This is the const version of reverse(). */ +template +inline const typename DenseBase::ConstReverseReturnType +DenseBase::reverse() const +{ + return derived(); +} + +/** This is the "in place" version of reverse: it reverses \c *this. + * + * In most cases it is probably better to simply use the reversed expression + * of a matrix. However, when reversing the matrix data itself is really needed, + * then this "in-place" version is probably the right choice because it provides + * the following additional features: + * - less error prone: doing the same operation with .reverse() requires special care: + * \code m = m.reverse().eval(); \endcode + * - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap) + * - it allows future optimizations (cache friendliness, etc.) + * + * \sa reverse() */ +template +inline void DenseBase::reverseInPlace() +{ + derived() = derived().reverse().eval(); +} + +} // end namespace Eigen + +#endif // EIGEN_REVERSE_H diff --git a/tools/Eigen/src/Core/Select.h b/tools/Eigen/src/Core/Select.h new file mode 100644 index 0000000..87993bb --- /dev/null +++ b/tools/Eigen/src/Core/Select.h @@ -0,0 +1,162 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SELECT_H +#define EIGEN_SELECT_H + +namespace Eigen { + +/** \class Select + * \ingroup Core_Module + * + * \brief Expression of a coefficient wise version of the C++ ternary operator ?: + * + * \param ConditionMatrixType the type of the \em condition expression which must be a boolean matrix + * \param ThenMatrixType the type of the \em then expression + * \param ElseMatrixType the type of the \em else expression + * + * This class represents an expression of a coefficient wise version of the C++ ternary operator ?:. + * It is the return type of DenseBase::select() and most of the time this is the only way it is used. + * + * \sa DenseBase::select(const DenseBase&, const DenseBase&) const + */ + +namespace internal { +template +struct traits > + : traits +{ + typedef typename traits::Scalar Scalar; + typedef Dense StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename ConditionMatrixType::Nested ConditionMatrixNested; + typedef typename ThenMatrixType::Nested ThenMatrixNested; + typedef typename ElseMatrixType::Nested ElseMatrixNested; + enum { + RowsAtCompileTime = ConditionMatrixType::RowsAtCompileTime, + ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime, + MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime, + Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits, + CoeffReadCost = traits::type>::CoeffReadCost + + EIGEN_SIZE_MAX(traits::type>::CoeffReadCost, + traits::type>::CoeffReadCost) + }; +}; +} + +template +class Select : internal::no_assignment_operator, + public internal::dense_xpr_base< Select >::type +{ + public: + + typedef typename internal::dense_xpr_base