// sets for all the nodes. So we allocate an array of state sets,
// one for each leaf node (i.e. each DFA position.)
//
fFollowList = new CMStateSet[fLeafCount];
for (int index = 0; index < fLeafCount; index++)
fFollowList[index] = new CMStateSet(fLeafCount);
calcFollowList(fHeadNode);
//
// And finally the big push... Now we build the DFA using all the
// states and the tree we've built up. First we set up the various
// data structures we are going to use while we do this.
//
// First of all we need an array of unique element names in our
// content model. For each transition table entry, we need a set of
// contiguous indices to represent the transitions for a particular
// input element. So we need to a zero based range of indexes that
// map to element types. This element map provides that mapping.
//
fElemMap = new Object[fLeafCount];
fElemMapType = new int[fLeafCount];
fElemMapId = new int[fLeafCount];
fElemMapSize = 0;
Occurence [] elemOccurenceMap = null;
for (int outIndex = 0; outIndex < fLeafCount; outIndex++) {
// optimization from Henry Zongaro:
//fElemMap[outIndex] = new Object ();
fElemMap[outIndex] = null;
int inIndex = 0;
final int id = fLeafList[outIndex].getParticleId();
for (; inIndex < fElemMapSize; inIndex++) {
if (id == fElemMapId[inIndex])
break;
}
// If it was not in the list, then add it, if not the EOC node
if (inIndex == fElemMapSize) {
XSCMLeaf leaf = fLeafList[outIndex];
fElemMap[fElemMapSize] = leaf.getLeaf();
if (leaf instanceof XSCMRepeatingLeaf) {
if (elemOccurenceMap == null) {
elemOccurenceMap = new Occurence[fLeafCount];
}
elemOccurenceMap[fElemMapSize] = new Occurence((XSCMRepeatingLeaf) leaf, fElemMapSize);
}
fElemMapType[fElemMapSize] = fLeafListType[outIndex];
fElemMapId[fElemMapSize] = id;
fElemMapSize++;
}
}
// the last entry in the element map must be the EOC element.
// remove it from the map.
if (DEBUG) {
if (fElemMapId[fElemMapSize-1] != -1)
System.err.println("interal error in DFA: last element is not EOC.");
}
fElemMapSize--;
/***
* Optimization(Jan, 2001); We sort fLeafList according to
* elemIndex which is *uniquely* associated to each leaf.
* We are *assuming* that each element appears in at least one leaf.
**/
int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
int fSortCount = 0;
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
final int id = fElemMapId[elemIndex];
for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
if (id == fLeafList[leafIndex].getParticleId())
fLeafSorter[fSortCount++] = leafIndex;
}
fLeafSorter[fSortCount++] = -1;
}
/* Optimization(Jan, 2001) */
//
// Next lets create some arrays, some that hold transient
// information during the DFA build and some that are permament.
// These are kind of sticky since we cannot know how big they will
// get, but we don't want to use any Java collections because of
// performance.
//
// Basically they will probably be about fLeafCount*2 on average,
// but can be as large as 2^(fLeafCount*2), worst case. So we start
// with fLeafCount*4 as a middle ground. This will be very unlikely
// to ever have to expand, though it if does, the overhead will be
// somewhat ugly.
//
int curArraySize = fLeafCount * 4;
CMStateSet[] statesToDo = new CMStateSet[curArraySize];
fFinalStateFlags = new boolean[curArraySize];
fTransTable = new int[curArraySize][];
//
// Ok we start with the initial set as the first pos set of the
// head node (which is the seq node that holds the content model
// and the EOC node.)
//
CMStateSet setT = fHeadNode.firstPos();
//
// Init our two state flags. Basically the unmarked state counter
// is always chasing the current state counter. When it catches up,
// that means we made a pass through that did not add any new states
// to the lists, at which time we are done. We could have used a
// expanding array of flags which we used to mark off states as we
// complete them, but this is easier though less readable maybe.
//
int unmarkedState = 0;
int curState = 0;
//
// Init the first transition table entry, and put the initial state
// into the states to do list, then bump the current state.
//
fTransTable[curState] = makeDefStateList();
statesToDo[curState] = setT;
curState++;
/* Optimization(Jan, 2001); This is faster for
* a large content model such as, "(t001+|t002+|.... |t500+)".
*/
HashMap stateTable = new HashMap();
/* Optimization(Jan, 2001) */
//
// Ok, almost done with the algorithm... We now enter the
// loop where we go until the states done counter catches up with
// the states to do counter.
//
while (unmarkedState < curState) {
//
// Get the first unmarked state out of the list of states to do.
// And get the associated transition table entry.
//
setT = statesToDo[unmarkedState];
int[] transEntry = fTransTable[unmarkedState];
// Mark this one final if it contains the EOC state
fFinalStateFlags[unmarkedState] = setT.getBit(EOCPos);
// Bump up the unmarked state count, marking this state done
unmarkedState++;
// Loop through each possible input symbol in the element map
CMStateSet newSet = null;
/* Optimization(Jan, 2001) */
int sorterIndex = 0;
/* Optimization(Jan, 2001) */
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
//
// Build up a set of states which is the union of all of
// the follow sets of DFA positions that are in the current
// state. If we gave away the new set last time through then
// create a new one. Otherwise, zero out the existing one.
//
if (newSet == null)
newSet = new CMStateSet(fLeafCount);
else
newSet.zeroBits();
/* Optimization(Jan, 2001) */
int leafIndex = fLeafSorter[sorterIndex++];
while (leafIndex != -1) {
// If this leaf index (DFA position) is in the current set...
if (setT.getBit(leafIndex)) {
//
// If this leaf is the current input symbol, then we
// want to add its follow list to the set of states to
// transition to from the current state.
//
newSet.union(fFollowList[leafIndex]);
}
leafIndex = fLeafSorter[sorterIndex++];
}
/* Optimization(Jan, 2001) */
//
// If this new set is not empty, then see if its in the list
// of states to do. If not, then add it.
//
if (!newSet.isEmpty()) {
//
// Search the 'states to do' list to see if this new
// state set is already in there.
//