p = -p;
}
if (p == 0) {
return DoubleFactory2D.dense.identity(A.rows());
}
DoubleMatrix2D T = A.like(); // temporary
if (p == 1) {
return T.assign(A);
} // safes one auxiliary matrix allocation
if (p == 2) {
blas.dgemm(false, false, 1, A, A, 0, T); // mult(A,A); // safes one auxiliary matrix allocation
return T;
}
int k = QuickBitVector.mostSignificantBit(p); // index of highest bit in state "true"
/*
this is the naive version:
DoubleMatrix2D B = A.copy();
for (int i=0; i<p-1; i++) {
B = mult(B,A);
}
return B;
*/
// here comes the optimized version:
//org.apache.mahout.math.Timer timer = new Timer().start();
int i = 0;
while (i <= k && (p & (1 << i)) == 0) { // while (bit i of p == false)
// A = mult(A,A); would allocate a lot of temporary memory
blas.dgemm(false, false, 1, A, A, 0, T); // A.zMult(A,T);
DoubleMatrix2D swap = A;
A = T;
T = swap; // swap A with T
i++;
}
DoubleMatrix2D B = A.copy();
i++;
for (; i <= k; i++) {
// A = mult(A,A); would allocate a lot of temporary memory
blas.dgemm(false, false, 1, A, A, 0, T); // A.zMult(A,T);
DoubleMatrix2D swap = A;
A = T;
T = swap; // swap A with T
if ((p & (1 << i)) != 0) { // if (bit i of p == true)
// B = mult(B,A); would allocate a lot of temporary memory