Revision | 7d4f2033a6d316198bf50db297b352c22c29a86b (tree) |
---|---|
Time | 2013-03-13 17:27:59 |
Author | Mikiya Fujii <mikiya.fujii@gmai...> |
Commiter | Mikiya Fujii |
Second algorithm using BLAS for Mndo::GetAuxiliaryKNRKRElement is added, which is commneted out yet. #30829
git-svn-id: https://svn.sourceforge.jp/svnroot/molds/trunk@1325 1136aad2-a195-0410-b898-f5ea1d11b9d8
@@ -1437,19 +1437,16 @@ double Mndo::GetAuxiliaryKNRKRElement(int moI, int moJ, int moK, int moL) const{ | ||
1437 | 1437 | } |
1438 | 1438 | } |
1439 | 1439 | MolDS_wrappers::Blas::GetInstance()->Dsymv(this->molecule->GetNumberAtoms()*dxy*dxy, |
1440 | - //this->molecule->GetNumberAtoms()*dxy*dxy, | |
1441 | 1440 | twoElec, |
1442 | 1441 | twiceMoKL, |
1443 | 1442 | tmpVector); |
1444 | 1443 | value = 4.0*MolDS_wrappers::Blas::GetInstance()->Ddot(this->molecule->GetNumberAtoms()*dxy*dxy,twiceMoIJ, tmpVector); |
1445 | 1444 | MolDS_wrappers::Blas::GetInstance()->Dsymv(this->molecule->GetNumberAtoms()*dxy*dxy, |
1446 | - //this->molecule->GetNumberAtoms()*dxy*dxy, | |
1447 | 1445 | twoElec, |
1448 | 1446 | twiceMoJL, |
1449 | 1447 | tmpVector); |
1450 | 1448 | value -= MolDS_wrappers::Blas::GetInstance()->Ddot(this->molecule->GetNumberAtoms()*dxy*dxy,twiceMoIK, tmpVector); |
1451 | 1449 | MolDS_wrappers::Blas::GetInstance()->Dsymv(this->molecule->GetNumberAtoms()*dxy*dxy, |
1452 | - //this->molecule->GetNumberAtoms()*dxy*dxy, | |
1453 | 1450 | twoElec, |
1454 | 1451 | twiceMoJK, |
1455 | 1452 | tmpVector); |
@@ -1466,6 +1463,123 @@ double Mndo::GetAuxiliaryKNRKRElement(int moI, int moJ, int moK, int moL) const{ | ||
1466 | 1463 | */ |
1467 | 1464 | |
1468 | 1465 | /* |
1466 | + // Second algorithm using blas | |
1467 | + double** twoElec = NULL; | |
1468 | + double* twiceMoIJ = NULL; | |
1469 | + double* twiceMoIK = NULL; | |
1470 | + double* twiceMoIL = NULL; | |
1471 | + double** twiceMoB = NULL; | |
1472 | + double** tmpMatrix = NULL; | |
1473 | + int numAOs = this->molecule->GetTotalNumberAOs(); | |
1474 | + MallocerFreer::GetInstance()->Malloc<double>(&twoElec, this->molecule->GetNumberAtoms()*dxy*dxy, this->molecule->GetNumberAtoms()*dxy*dxy); | |
1475 | + MallocerFreer::GetInstance()->Malloc<double>(&twiceMoIJ, this->molecule->GetNumberAtoms()*dxy*dxy); | |
1476 | + MallocerFreer::GetInstance()->Malloc<double>(&twiceMoIK, this->molecule->GetNumberAtoms()*dxy*dxy); | |
1477 | + MallocerFreer::GetInstance()->Malloc<double>(&twiceMoIL, this->molecule->GetNumberAtoms()*dxy*dxy); | |
1478 | + MallocerFreer::GetInstance()->Malloc<double>(&twiceMoB, 3, this->molecule->GetNumberAtoms()*dxy*dxy); | |
1479 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpMatrix,3, this->molecule->GetNumberAtoms()*dxy*dxy); | |
1480 | + for(int A=0; A<this->molecule->GetNumberAtoms(); A++){ | |
1481 | + const Atom& atomA = *this->molecule->GetAtom(A); | |
1482 | + int firstAOIndexA = atomA.GetFirstAOIndex(); | |
1483 | + int lastAOIndexA = atomA.GetLastAOIndex(); | |
1484 | + for(int mu=firstAOIndexA; mu<=lastAOIndexA; mu++){ | |
1485 | + for(int nu=firstAOIndexA; nu<=lastAOIndexA; nu++){ | |
1486 | + twiceMoIJ[A*dxy*dxy+(mu -firstAOIndexA)*dxy+(nu -firstAOIndexA)]=fockMatrix[moI][mu ]*fockMatrix[moJ][nu ]; | |
1487 | + twiceMoIK[A*dxy*dxy+(mu -firstAOIndexA)*dxy+(nu -firstAOIndexA)]=fockMatrix[moI][mu ]*fockMatrix[moK][nu ]; | |
1488 | + twiceMoIL[A*dxy*dxy+(mu -firstAOIndexA)*dxy+(nu -firstAOIndexA)]=fockMatrix[moI][mu ]*fockMatrix[moL][nu ]; | |
1489 | + } | |
1490 | + } | |
1491 | + } | |
1492 | + | |
1493 | + for(int B=0; B<this->molecule->GetNumberAtoms(); B++){ | |
1494 | + const Atom& atomB = *this->molecule->GetAtom(B); | |
1495 | + int firstAOIndexB = atomB.GetFirstAOIndex(); | |
1496 | + int lastAOIndexB = atomB.GetLastAOIndex(); | |
1497 | + for(int lambda=firstAOIndexB; lambda<=lastAOIndexB; lambda++){ | |
1498 | + for(int sigma=firstAOIndexB; sigma<=lastAOIndexB; sigma++){ | |
1499 | + twiceMoB[0][B*dxy*dxy+(lambda-firstAOIndexB)*dxy+(sigma-firstAOIndexB)]=fockMatrix[moK][lambda]*fockMatrix[moL][sigma]; | |
1500 | + twiceMoB[1][B*dxy*dxy+(lambda-firstAOIndexB)*dxy+(sigma-firstAOIndexB)]=fockMatrix[moJ][lambda]*fockMatrix[moL][sigma]; | |
1501 | + twiceMoB[2][B*dxy*dxy+(lambda-firstAOIndexB)*dxy+(sigma-firstAOIndexB)]=fockMatrix[moJ][lambda]*fockMatrix[moK][sigma]; | |
1502 | + } | |
1503 | + } | |
1504 | + } | |
1505 | + | |
1506 | + for(int A=0; A<this->molecule->GetNumberAtoms(); A++){ | |
1507 | + const Atom& atomA = *this->molecule->GetAtom(A); | |
1508 | + int firstAOIndexA = atomA.GetFirstAOIndex(); | |
1509 | + int lastAOIndexA = atomA.GetLastAOIndex(); | |
1510 | + for(int B=0; B<this->molecule->GetNumberAtoms(); B++){ | |
1511 | + const Atom& atomB = *this->molecule->GetAtom(B); | |
1512 | + int firstAOIndexB = atomB.GetFirstAOIndex(); | |
1513 | + int lastAOIndexB = atomB.GetLastAOIndex(); | |
1514 | + double gamma = 0.0; | |
1515 | + if(A!=B){ | |
1516 | + for(int mu=firstAOIndexA; mu<=lastAOIndexA; mu++){ | |
1517 | + for(int nu=firstAOIndexA; nu<=lastAOIndexA; nu++){ | |
1518 | + for(int lambda=firstAOIndexB; lambda<=lastAOIndexB; lambda++){ | |
1519 | + for(int sigma=firstAOIndexB; sigma<=lastAOIndexB; sigma++){ | |
1520 | + twoElec[A*dxy*dxy+(mu-firstAOIndexA)*dxy+(nu-firstAOIndexA)] | |
1521 | + [B*dxy*dxy+(lambda-firstAOIndexB)*dxy+(sigma-firstAOIndexB)] = | |
1522 | + this->twoElecTwoCore[A] | |
1523 | + [B] | |
1524 | + [mu-firstAOIndexA] | |
1525 | + [nu-firstAOIndexA] | |
1526 | + [lambda-firstAOIndexB] | |
1527 | + [sigma-firstAOIndexB]; | |
1528 | + } | |
1529 | + } | |
1530 | + } | |
1531 | + } | |
1532 | + } | |
1533 | + else{ | |
1534 | + for(int mu=firstAOIndexA; mu<=lastAOIndexA; mu++){ | |
1535 | + for(int nu=firstAOIndexA; nu<=lastAOIndexA; nu++){ | |
1536 | + for(int lambda=firstAOIndexB; lambda<=lastAOIndexB; lambda++){ | |
1537 | + for(int sigma=firstAOIndexB; sigma<=lastAOIndexB; sigma++){ | |
1538 | + if(mu==nu && lambda==sigma){ | |
1539 | + OrbitalType orbitalMu = atomA.GetValence(mu-firstAOIndexA); | |
1540 | + OrbitalType orbitalLambda = atomB.GetValence(lambda-firstAOIndexB); | |
1541 | + gamma = this->GetCoulombInt(orbitalMu, orbitalLambda, atomA); | |
1542 | + } | |
1543 | + else if((mu==lambda && nu==sigma) || (nu==lambda && mu==sigma) ){ | |
1544 | + OrbitalType orbitalMu = atomA.GetValence(mu-firstAOIndexA); | |
1545 | + OrbitalType orbitalNu = atomA.GetValence(nu-firstAOIndexA); | |
1546 | + gamma = this->GetExchangeInt(orbitalMu, orbitalNu, atomA); | |
1547 | + } | |
1548 | + else{ | |
1549 | + gamma = 0.0; | |
1550 | + } | |
1551 | + twoElec[A*dxy*dxy+(mu-firstAOIndexA)*dxy+(nu-firstAOIndexA)] | |
1552 | + [B*dxy*dxy+(lambda-firstAOIndexB)*dxy+(sigma-firstAOIndexB)] = gamma; | |
1553 | + } | |
1554 | + } | |
1555 | + } | |
1556 | + } | |
1557 | + } | |
1558 | + } | |
1559 | + } | |
1560 | + | |
1561 | + MolDS_wrappers::Blas::GetInstance()->Dgemm(false, true, true, | |
1562 | + this->molecule->GetNumberAtoms()*dxy*dxy, | |
1563 | + 3, | |
1564 | + this->molecule->GetNumberAtoms()*dxy*dxy, | |
1565 | + 1.0, | |
1566 | + twoElec, | |
1567 | + twiceMoB, | |
1568 | + 0.0, | |
1569 | + tmpMatrix); | |
1570 | + value = 4.0*MolDS_wrappers::Blas::GetInstance()->Ddot(this->molecule->GetNumberAtoms()*dxy*dxy,twiceMoIJ, &tmpMatrix[0][0]); | |
1571 | + value -= MolDS_wrappers::Blas::GetInstance()->Ddot(this->molecule->GetNumberAtoms()*dxy*dxy,twiceMoIK, &tmpMatrix[1][0]); | |
1572 | + value -= MolDS_wrappers::Blas::GetInstance()->Ddot(this->molecule->GetNumberAtoms()*dxy*dxy,twiceMoIL, &tmpMatrix[2][0]); | |
1573 | + MallocerFreer::GetInstance()->Free<double>(&twoElec, this->molecule->GetNumberAtoms()*dxy*dxy, this->molecule->GetNumberAtoms()*dxy*dxy); | |
1574 | + MallocerFreer::GetInstance()->Free<double>(&twiceMoIJ, this->molecule->GetNumberAtoms()*dxy*dxy); | |
1575 | + MallocerFreer::GetInstance()->Free<double>(&twiceMoIK, this->molecule->GetNumberAtoms()*dxy*dxy); | |
1576 | + MallocerFreer::GetInstance()->Free<double>(&twiceMoIL, this->molecule->GetNumberAtoms()*dxy*dxy); | |
1577 | + MallocerFreer::GetInstance()->Free<double>(&twiceMoB, 3, this->molecule->GetNumberAtoms()*dxy*dxy); | |
1578 | + MallocerFreer::GetInstance()->Free<double>(&tmpMatrix,3, this->molecule->GetNumberAtoms()*dxy*dxy); | |
1579 | + // End of second algorithm using blas | |
1580 | + */ | |
1581 | + | |
1582 | + /* | |
1469 | 1583 | // slow algorithm |
1470 | 1584 | value = 4.0*this->GetMolecularIntegralElement(moI, moJ, moK, moL, |
1471 | 1585 | *this->molecule, |
@@ -253,6 +253,23 @@ void Blas::Dgemm(bool isColumnMajorMatrixA, | ||
253 | 253 | double const* const* matrixB, |
254 | 254 | double beta, |
255 | 255 | double** matrixC) const{ |
256 | + bool isColumnMajorMatrixC = false; | |
257 | + this->Dgemm(isColumnMajorMatrixA, isColumnMajorMatrixB, isColumnMajorMatrixC,m, n, k, alpha, matrixA, matrixB, beta, matrixC); | |
258 | +} | |
259 | + | |
260 | +// matrixC = alpha*matrixA*matrixB + beta*matrixC | |
261 | +// matrixA: m*k-matrix | |
262 | +// matrixB: k*n-matrix | |
263 | +// matrixC: m*n-matrix | |
264 | +void Blas::Dgemm(bool isColumnMajorMatrixA, | |
265 | + bool isColumnMajorMatrixB, | |
266 | + bool isColumnMajorMatrixC, | |
267 | + molds_blas_int m, molds_blas_int n, molds_blas_int k, | |
268 | + double alpha, | |
269 | + double const* const* matrixA, | |
270 | + double const* const* matrixB, | |
271 | + double beta, | |
272 | + double** matrixC) const{ | |
256 | 273 | double* a = const_cast<double*>(&matrixA[0][0]); |
257 | 274 | double* b = const_cast<double*>(&matrixB[0][0]); |
258 | 275 | double* c = &matrixC[0][0]; |
@@ -285,17 +302,31 @@ void Blas::Dgemm(bool isColumnMajorMatrixA, | ||
285 | 302 | #else |
286 | 303 | tmpC = (double*)malloc( sizeof(double)*m*n); |
287 | 304 | #endif |
288 | - for(molds_blas_int i=0; i<m; i++){ | |
289 | - for(molds_blas_int j=0; j<n; j++){ | |
290 | - tmpC[i+j*m] = matrixC[i][j]; | |
305 | + molds_blas_int ldc; | |
306 | + if(isColumnMajorMatrixC){ | |
307 | + this->Dcopy(m*n, &matrixC[0][0], tmpC); | |
308 | + ldc = m; | |
309 | + } | |
310 | + else{ | |
311 | + for(molds_blas_int i=0; i<m; i++){ | |
312 | + for(molds_blas_int j=0; j<n; j++){ | |
313 | + tmpC[i+j*m] = matrixC[i][j]; | |
314 | + } | |
291 | 315 | } |
316 | + ldc = n; | |
292 | 317 | } |
293 | - molds_blas_int ldc = m; | |
318 | + | |
294 | 319 | //call blas |
295 | 320 | cblas_dgemm(CblasColMajor, transA, transB, m, n, k, alpha, a, lda, b, ldb, beta, tmpC, ldc); |
296 | - for(molds_blas_int i=0; i<m; i++){ | |
297 | - for(molds_blas_int j=0; j<n; j++){ | |
298 | - matrixC[i][j] = tmpC[i+j*m]; | |
321 | + | |
322 | + if(isColumnMajorMatrixC){ | |
323 | + this->Dcopy(m*n, tmpC, &matrixC[0][0]); | |
324 | + } | |
325 | + else{ | |
326 | + for(molds_blas_int i=0; i<m; i++){ | |
327 | + for(molds_blas_int j=0; j<n; j++){ | |
328 | + matrixC[i][j] = tmpC[i+j*m]; | |
329 | + } | |
299 | 330 | } |
300 | 331 | } |
301 | 332 | #ifdef __INTEL_COMPILER |
@@ -85,6 +85,15 @@ public: | ||
85 | 85 | double const* const* matrixB, |
86 | 86 | double beta, |
87 | 87 | double** matrixC) const; |
88 | + void Dgemm(bool isColumnMajorMatrixA, | |
89 | + bool isColumnMajorMatrixB, | |
90 | + bool isColumnMajorMatrixC, | |
91 | + molds_blas_int m, molds_blas_int n, molds_blas_int k, | |
92 | + double alpha, | |
93 | + double const* const* matrixA, | |
94 | + double const* const* matrixB, | |
95 | + double beta, | |
96 | + double** matrixC) const; | |
88 | 97 | void Dgemmm(molds_blas_int m, molds_blas_int n, molds_blas_int k, molds_blas_int l, |
89 | 98 | double const* const* matrixA, |
90 | 99 | double const* const* matrixB, |