• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

Revision7d4f2033a6d316198bf50db297b352c22c29a86b (tree)
Time2013-03-13 17:27:59
AuthorMikiya Fujii <mikiya.fujii@gmai...>
CommiterMikiya Fujii

Log Message

Second algorithm using BLAS for Mndo::GetAuxiliaryKNRKRElement is added, which is commneted out yet. #30829

git-svn-id: https://svn.sourceforge.jp/svnroot/molds/trunk@1325 1136aad2-a195-0410-b898-f5ea1d11b9d8

Change Summary

Incremental Difference

--- a/src/mndo/Mndo.cpp
+++ b/src/mndo/Mndo.cpp
@@ -1437,19 +1437,16 @@ double Mndo::GetAuxiliaryKNRKRElement(int moI, int moJ, int moK, int moL) const{
14371437 }
14381438 }
14391439 MolDS_wrappers::Blas::GetInstance()->Dsymv(this->molecule->GetNumberAtoms()*dxy*dxy,
1440- //this->molecule->GetNumberAtoms()*dxy*dxy,
14411440 twoElec,
14421441 twiceMoKL,
14431442 tmpVector);
14441443 value = 4.0*MolDS_wrappers::Blas::GetInstance()->Ddot(this->molecule->GetNumberAtoms()*dxy*dxy,twiceMoIJ, tmpVector);
14451444 MolDS_wrappers::Blas::GetInstance()->Dsymv(this->molecule->GetNumberAtoms()*dxy*dxy,
1446- //this->molecule->GetNumberAtoms()*dxy*dxy,
14471445 twoElec,
14481446 twiceMoJL,
14491447 tmpVector);
14501448 value -= MolDS_wrappers::Blas::GetInstance()->Ddot(this->molecule->GetNumberAtoms()*dxy*dxy,twiceMoIK, tmpVector);
14511449 MolDS_wrappers::Blas::GetInstance()->Dsymv(this->molecule->GetNumberAtoms()*dxy*dxy,
1452- //this->molecule->GetNumberAtoms()*dxy*dxy,
14531450 twoElec,
14541451 twiceMoJK,
14551452 tmpVector);
@@ -1466,6 +1463,123 @@ double Mndo::GetAuxiliaryKNRKRElement(int moI, int moJ, int moK, int moL) const{
14661463 */
14671464
14681465 /*
1466+ // Second algorithm using blas
1467+ double** twoElec = NULL;
1468+ double* twiceMoIJ = NULL;
1469+ double* twiceMoIK = NULL;
1470+ double* twiceMoIL = NULL;
1471+ double** twiceMoB = NULL;
1472+ double** tmpMatrix = NULL;
1473+ int numAOs = this->molecule->GetTotalNumberAOs();
1474+ MallocerFreer::GetInstance()->Malloc<double>(&twoElec, this->molecule->GetNumberAtoms()*dxy*dxy, this->molecule->GetNumberAtoms()*dxy*dxy);
1475+ MallocerFreer::GetInstance()->Malloc<double>(&twiceMoIJ, this->molecule->GetNumberAtoms()*dxy*dxy);
1476+ MallocerFreer::GetInstance()->Malloc<double>(&twiceMoIK, this->molecule->GetNumberAtoms()*dxy*dxy);
1477+ MallocerFreer::GetInstance()->Malloc<double>(&twiceMoIL, this->molecule->GetNumberAtoms()*dxy*dxy);
1478+ MallocerFreer::GetInstance()->Malloc<double>(&twiceMoB, 3, this->molecule->GetNumberAtoms()*dxy*dxy);
1479+ MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrix,3, this->molecule->GetNumberAtoms()*dxy*dxy);
1480+ for(int A=0; A<this->molecule->GetNumberAtoms(); A++){
1481+ const Atom& atomA = *this->molecule->GetAtom(A);
1482+ int firstAOIndexA = atomA.GetFirstAOIndex();
1483+ int lastAOIndexA = atomA.GetLastAOIndex();
1484+ for(int mu=firstAOIndexA; mu<=lastAOIndexA; mu++){
1485+ for(int nu=firstAOIndexA; nu<=lastAOIndexA; nu++){
1486+ twiceMoIJ[A*dxy*dxy+(mu -firstAOIndexA)*dxy+(nu -firstAOIndexA)]=fockMatrix[moI][mu ]*fockMatrix[moJ][nu ];
1487+ twiceMoIK[A*dxy*dxy+(mu -firstAOIndexA)*dxy+(nu -firstAOIndexA)]=fockMatrix[moI][mu ]*fockMatrix[moK][nu ];
1488+ twiceMoIL[A*dxy*dxy+(mu -firstAOIndexA)*dxy+(nu -firstAOIndexA)]=fockMatrix[moI][mu ]*fockMatrix[moL][nu ];
1489+ }
1490+ }
1491+ }
1492+
1493+ for(int B=0; B<this->molecule->GetNumberAtoms(); B++){
1494+ const Atom& atomB = *this->molecule->GetAtom(B);
1495+ int firstAOIndexB = atomB.GetFirstAOIndex();
1496+ int lastAOIndexB = atomB.GetLastAOIndex();
1497+ for(int lambda=firstAOIndexB; lambda<=lastAOIndexB; lambda++){
1498+ for(int sigma=firstAOIndexB; sigma<=lastAOIndexB; sigma++){
1499+ twiceMoB[0][B*dxy*dxy+(lambda-firstAOIndexB)*dxy+(sigma-firstAOIndexB)]=fockMatrix[moK][lambda]*fockMatrix[moL][sigma];
1500+ twiceMoB[1][B*dxy*dxy+(lambda-firstAOIndexB)*dxy+(sigma-firstAOIndexB)]=fockMatrix[moJ][lambda]*fockMatrix[moL][sigma];
1501+ twiceMoB[2][B*dxy*dxy+(lambda-firstAOIndexB)*dxy+(sigma-firstAOIndexB)]=fockMatrix[moJ][lambda]*fockMatrix[moK][sigma];
1502+ }
1503+ }
1504+ }
1505+
1506+ for(int A=0; A<this->molecule->GetNumberAtoms(); A++){
1507+ const Atom& atomA = *this->molecule->GetAtom(A);
1508+ int firstAOIndexA = atomA.GetFirstAOIndex();
1509+ int lastAOIndexA = atomA.GetLastAOIndex();
1510+ for(int B=0; B<this->molecule->GetNumberAtoms(); B++){
1511+ const Atom& atomB = *this->molecule->GetAtom(B);
1512+ int firstAOIndexB = atomB.GetFirstAOIndex();
1513+ int lastAOIndexB = atomB.GetLastAOIndex();
1514+ double gamma = 0.0;
1515+ if(A!=B){
1516+ for(int mu=firstAOIndexA; mu<=lastAOIndexA; mu++){
1517+ for(int nu=firstAOIndexA; nu<=lastAOIndexA; nu++){
1518+ for(int lambda=firstAOIndexB; lambda<=lastAOIndexB; lambda++){
1519+ for(int sigma=firstAOIndexB; sigma<=lastAOIndexB; sigma++){
1520+ twoElec[A*dxy*dxy+(mu-firstAOIndexA)*dxy+(nu-firstAOIndexA)]
1521+ [B*dxy*dxy+(lambda-firstAOIndexB)*dxy+(sigma-firstAOIndexB)] =
1522+ this->twoElecTwoCore[A]
1523+ [B]
1524+ [mu-firstAOIndexA]
1525+ [nu-firstAOIndexA]
1526+ [lambda-firstAOIndexB]
1527+ [sigma-firstAOIndexB];
1528+ }
1529+ }
1530+ }
1531+ }
1532+ }
1533+ else{
1534+ for(int mu=firstAOIndexA; mu<=lastAOIndexA; mu++){
1535+ for(int nu=firstAOIndexA; nu<=lastAOIndexA; nu++){
1536+ for(int lambda=firstAOIndexB; lambda<=lastAOIndexB; lambda++){
1537+ for(int sigma=firstAOIndexB; sigma<=lastAOIndexB; sigma++){
1538+ if(mu==nu && lambda==sigma){
1539+ OrbitalType orbitalMu = atomA.GetValence(mu-firstAOIndexA);
1540+ OrbitalType orbitalLambda = atomB.GetValence(lambda-firstAOIndexB);
1541+ gamma = this->GetCoulombInt(orbitalMu, orbitalLambda, atomA);
1542+ }
1543+ else if((mu==lambda && nu==sigma) || (nu==lambda && mu==sigma) ){
1544+ OrbitalType orbitalMu = atomA.GetValence(mu-firstAOIndexA);
1545+ OrbitalType orbitalNu = atomA.GetValence(nu-firstAOIndexA);
1546+ gamma = this->GetExchangeInt(orbitalMu, orbitalNu, atomA);
1547+ }
1548+ else{
1549+ gamma = 0.0;
1550+ }
1551+ twoElec[A*dxy*dxy+(mu-firstAOIndexA)*dxy+(nu-firstAOIndexA)]
1552+ [B*dxy*dxy+(lambda-firstAOIndexB)*dxy+(sigma-firstAOIndexB)] = gamma;
1553+ }
1554+ }
1555+ }
1556+ }
1557+ }
1558+ }
1559+ }
1560+
1561+ MolDS_wrappers::Blas::GetInstance()->Dgemm(false, true, true,
1562+ this->molecule->GetNumberAtoms()*dxy*dxy,
1563+ 3,
1564+ this->molecule->GetNumberAtoms()*dxy*dxy,
1565+ 1.0,
1566+ twoElec,
1567+ twiceMoB,
1568+ 0.0,
1569+ tmpMatrix);
1570+ value = 4.0*MolDS_wrappers::Blas::GetInstance()->Ddot(this->molecule->GetNumberAtoms()*dxy*dxy,twiceMoIJ, &tmpMatrix[0][0]);
1571+ value -= MolDS_wrappers::Blas::GetInstance()->Ddot(this->molecule->GetNumberAtoms()*dxy*dxy,twiceMoIK, &tmpMatrix[1][0]);
1572+ value -= MolDS_wrappers::Blas::GetInstance()->Ddot(this->molecule->GetNumberAtoms()*dxy*dxy,twiceMoIL, &tmpMatrix[2][0]);
1573+ MallocerFreer::GetInstance()->Free<double>(&twoElec, this->molecule->GetNumberAtoms()*dxy*dxy, this->molecule->GetNumberAtoms()*dxy*dxy);
1574+ MallocerFreer::GetInstance()->Free<double>(&twiceMoIJ, this->molecule->GetNumberAtoms()*dxy*dxy);
1575+ MallocerFreer::GetInstance()->Free<double>(&twiceMoIK, this->molecule->GetNumberAtoms()*dxy*dxy);
1576+ MallocerFreer::GetInstance()->Free<double>(&twiceMoIL, this->molecule->GetNumberAtoms()*dxy*dxy);
1577+ MallocerFreer::GetInstance()->Free<double>(&twiceMoB, 3, this->molecule->GetNumberAtoms()*dxy*dxy);
1578+ MallocerFreer::GetInstance()->Free<double>(&tmpMatrix,3, this->molecule->GetNumberAtoms()*dxy*dxy);
1579+ // End of second algorithm using blas
1580+ */
1581+
1582+ /*
14691583 // slow algorithm
14701584 value = 4.0*this->GetMolecularIntegralElement(moI, moJ, moK, moL,
14711585 *this->molecule,
--- a/src/wrappers/Blas.cpp
+++ b/src/wrappers/Blas.cpp
@@ -253,6 +253,23 @@ void Blas::Dgemm(bool isColumnMajorMatrixA,
253253 double const* const* matrixB,
254254 double beta,
255255 double** matrixC) const{
256+ bool isColumnMajorMatrixC = false;
257+ this->Dgemm(isColumnMajorMatrixA, isColumnMajorMatrixB, isColumnMajorMatrixC,m, n, k, alpha, matrixA, matrixB, beta, matrixC);
258+}
259+
260+// matrixC = alpha*matrixA*matrixB + beta*matrixC
261+// matrixA: m*k-matrix
262+// matrixB: k*n-matrix
263+// matrixC: m*n-matrix
264+void Blas::Dgemm(bool isColumnMajorMatrixA,
265+ bool isColumnMajorMatrixB,
266+ bool isColumnMajorMatrixC,
267+ molds_blas_int m, molds_blas_int n, molds_blas_int k,
268+ double alpha,
269+ double const* const* matrixA,
270+ double const* const* matrixB,
271+ double beta,
272+ double** matrixC) const{
256273 double* a = const_cast<double*>(&matrixA[0][0]);
257274 double* b = const_cast<double*>(&matrixB[0][0]);
258275 double* c = &matrixC[0][0];
@@ -285,17 +302,31 @@ void Blas::Dgemm(bool isColumnMajorMatrixA,
285302 #else
286303 tmpC = (double*)malloc( sizeof(double)*m*n);
287304 #endif
288- for(molds_blas_int i=0; i<m; i++){
289- for(molds_blas_int j=0; j<n; j++){
290- tmpC[i+j*m] = matrixC[i][j];
305+ molds_blas_int ldc;
306+ if(isColumnMajorMatrixC){
307+ this->Dcopy(m*n, &matrixC[0][0], tmpC);
308+ ldc = m;
309+ }
310+ else{
311+ for(molds_blas_int i=0; i<m; i++){
312+ for(molds_blas_int j=0; j<n; j++){
313+ tmpC[i+j*m] = matrixC[i][j];
314+ }
291315 }
316+ ldc = n;
292317 }
293- molds_blas_int ldc = m;
318+
294319 //call blas
295320 cblas_dgemm(CblasColMajor, transA, transB, m, n, k, alpha, a, lda, b, ldb, beta, tmpC, ldc);
296- for(molds_blas_int i=0; i<m; i++){
297- for(molds_blas_int j=0; j<n; j++){
298- matrixC[i][j] = tmpC[i+j*m];
321+
322+ if(isColumnMajorMatrixC){
323+ this->Dcopy(m*n, tmpC, &matrixC[0][0]);
324+ }
325+ else{
326+ for(molds_blas_int i=0; i<m; i++){
327+ for(molds_blas_int j=0; j<n; j++){
328+ matrixC[i][j] = tmpC[i+j*m];
329+ }
299330 }
300331 }
301332 #ifdef __INTEL_COMPILER
--- a/src/wrappers/Blas.h
+++ b/src/wrappers/Blas.h
@@ -85,6 +85,15 @@ public:
8585 double const* const* matrixB,
8686 double beta,
8787 double** matrixC) const;
88+ void Dgemm(bool isColumnMajorMatrixA,
89+ bool isColumnMajorMatrixB,
90+ bool isColumnMajorMatrixC,
91+ molds_blas_int m, molds_blas_int n, molds_blas_int k,
92+ double alpha,
93+ double const* const* matrixA,
94+ double const* const* matrixB,
95+ double beta,
96+ double** matrixC) const;
8897 void Dgemmm(molds_blas_int m, molds_blas_int n, molds_blas_int k, molds_blas_int l,
8998 double const* const* matrixA,
9099 double const* const* matrixB,