maxTileDim[k] = Math.min(dim[k], processingTileDim[k]);
// so, there is a guarantee that maxTileDim are allowed matrix dimensions
tileCount *= tileCounts[k]; // overflow impossible, because tileCounts[k] <= dim[k]
}
final int nt = (int)Math.min(numberOfTasks, tileCount);
final IRectangularArea maxAperture = maxDependenceAperture(srcCopy.keySet());
DependenceApertureBuilder.extendDimensions(dim, maxAperture); // overflow check before any calculations
long maxExtTileSize = Arrays.longMul(DependenceApertureBuilder.extendDimensions(maxTileDim, maxAperture));
double estimatedMemory = estimateWorkMemory(maxExtTileSize, destCopy.values(), srcCopy.values(), nt);
MemoryModel betterModel = estimatedMemory < maxTempJavaMemory ? Arrays.SMM : memoryModel();
final List<Map<K, UpdatableArray>> srcTileMem = allocateTile(betterModel, maxExtTileSize, srcCopy, nt);
final List<Map<K, UpdatableArray>> destTileMem = allocateTile(betterModel, maxExtTileSize, destCopy, nt);
final Matrix<?> enumerator = Matrices.matrix(Arrays.nIntCopies(tileCount, 157), tileCounts);
// - this trivial virtual matrix is a simplest way to enumerate all tiles
ArrayContext context = this.context(); // maybe, already not a context of TilingProcessorFactory!
if (nt > 1) {
context = context == null ? ArrayContext.DEFAULT_SINGLE_THREAD : context.singleThreadVersion();
} else if (context == null) {
context = ArrayContext.DEFAULT;
}
Runnable[] tasks = new Runnable[nt];
Runnable[] postprocessing = new Runnable[nt]; // non-parallel
long readyElementsCount = 0;
int taskIndex = 0;
// System.out.println("Number of tasks/tiles: " + nt + "/" + tileCount + ", " + maxAperture + "; "
// + src.size() + " arguments and " + dest.size() + " results " + JArrays.toString(dim, "x", 1000));
for (long tileIndex = 0; tileIndex < tileCount; tileIndex++) {
long[] tileIndexes = enumerator.coordinates(tileIndex, null);
final long[] tilePos = new long[dimCount];
final long[] tileDim = new long[dimCount];
final long[] tileMax = new long[dimCount];
final long[] extTilePos = new long[dimCount];
final long[] extTileDim = new long[dimCount];
final long[] extTileMax = new long[dimCount];
long tileSize = 1;
for (int k = 0; k < dimCount; k++) {
tilePos[k] = tileIndexes[k] * processingTileDim[k];
assert tilePos[k] < dim[k];
tileDim[k] = Math.min(processingTileDim[k], dim[k] - tilePos[k]); // exclusive
assert tileDim[k] > 0; // because processingTileDim[k] > 0: checked in the constructor
tileMax[k] = tilePos[k] + tileDim[k] - 1;
extTileDim[k] = DependenceApertureBuilder.safelyAdd(tileDim[k], maxAperture.width(k));
extTilePos[k] = tilePos[k] + maxAperture.min(k);
extTileMax[k] = tileMax[k] + maxAperture.max(k);
tileSize *= tileDim[k];
}
final ArrayContext ac =
nt == 1 ?
context.part(readyElementsCount, readyElementsCount + tileSize, matrixSize) :