// sets for all the nodes. So we allocate an array of state sets,
// one for each leaf node (i.e. each DFA position.)
//
fFollowList = new CMStateSet[fLeafCount];
for (int index = 0; index < fLeafCount; index++)
fFollowList[index] = new CMStateSet(fLeafCount);
calcFollowList(fHeadNode);
//
// And finally the big push... Now we build the DFA using all the
// states and the tree we've built up. First we set up the various
// data structures we are going to use while we do this.
//
// First of all we need an array of unique element names in our
// content model. For each transition table entry, we need a set of
// contiguous indices to represent the transitions for a particular
// input element. So we need to a zero based range of indexes that
// map to element types. This element map provides that mapping.
//
fElemMap = new Object [fLeafCount];
fElemMapType = new int[fLeafCount];
fElemMapSize = 0;
for (int outIndex = 0; outIndex < fLeafCount; outIndex++) {
// optimization from Henry Zongaro:
//fElemMap[outIndex] = new Object ();
fElemMap[outIndex] = null;
int inIndex = 0;
final Object decl = fLeafList[outIndex].getDecl();
// REVISIT: shouldn't we always compare the decls by reference?
// if we ever combine two different element decls with
// the same name and namespace, then this content model
// violates UPA.
// Comparing by name/namespace was inherited from Xerces1,
// where we only store name and uri, and couldn't compare
// whether two decls are the same.
// After we support UPA, change the following big "if"
// to the following 4 lines.
//for (; inIndex < fElemMapSize; inIndex++) {
// if (decl == fElemMap[inIndex])
// break;
//}
if (fLeafListType[outIndex] == XSParticleDecl.PARTICLE_WILDCARD) {
for (; inIndex < fElemMapSize; inIndex++) {
if (decl == fElemMap[inIndex])
break;
}
} else {
// Get the current leaf's element
final XSElementDecl element = (XSElementDecl)decl;
// See if the current leaf node's element index is in the list
for (; inIndex < fElemMapSize; inIndex++) {
if (fElemMapType[inIndex] == fLeafListType[outIndex] &&
((XSElementDecl)fElemMap[inIndex]).fTargetNamespace == element.fTargetNamespace &&
((XSElementDecl)fElemMap[inIndex]).fName == element.fName)
break;
}
}
// If it was not in the list, then add it, if not the EOC node
if (inIndex == fElemMapSize) {
fElemMap[fElemMapSize] = decl;
fElemMapType[fElemMapSize] = fLeafListType[outIndex];
fElemMapSize++;
}
}
// the last entry in the element map must be the EOC element.
// remove it from the map.
if (DEBUG) {
if (((XSElementDecl)fElemMap[fElemMapSize-1]).fName != fEOCString)
System.err.println("interal error in DFA: last element is not EOC.");
}
fElemMapSize--;
// set up the fLeafNameTypeVector object if there is one.
/**** but apparently there never will be since this was commented out for some reason...
if (fLeafNameTypeVector != null) {
fLeafNameTypeVector.setValues(fElemMap, fElemMapType, fElemMapSize);
}
******/
/***
* Optimization(Jan, 2001); We sort fLeafList according to
* elemIndex which is *uniquely* associated to each leaf.
* We are *assuming* that each element appears in at least one leaf.
**/
int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
int fSortCount = 0;
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
final Object decl = fElemMap[elemIndex];
for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
// REVISIT: shouldn't we always compare the decls by reference?
// if we ever combine two different element decls with
// the same name and namespace, then this content model
// violates UPA.
// Comparing by name/namespace was inherited from Xerces1,
// where we only store name and uri, and couldn't compare
// whether two decls are the same.
// After we support UPA, change the following 2 "if"s
// to the following 2 lines.
//if (decl == fLeafList[leafIndex].getDecl())
// fLeafSorter[fSortCount++] = leafIndex;
if (fElemMapType[elemIndex] != fLeafListType[leafIndex])
continue;
if (fLeafListType[leafIndex] == XSParticleDecl.PARTICLE_WILDCARD) {
if (decl == fLeafList[leafIndex].getDecl())
fLeafSorter[fSortCount++] = leafIndex;
} else {
final XSElementDecl leaf = (XSElementDecl)fLeafList[leafIndex].getDecl();
final XSElementDecl element = (XSElementDecl)decl;
if (leaf.fTargetNamespace == element.fTargetNamespace &&
leaf.fName == element.fName ) {
fLeafSorter[fSortCount++] = leafIndex;
}
}
}
fLeafSorter[fSortCount++] = -1;
}
/* Optimization(Jan, 2001) */
//
// Next lets create some arrays, some that hold transient
// information during the DFA build and some that are permament.
// These are kind of sticky since we cannot know how big they will
// get, but we don't want to use any Java collections because of
// performance.
//
// Basically they will probably be about fLeafCount*2 on average,
// but can be as large as 2^(fLeafCount*2), worst case. So we start
// with fLeafCount*4 as a middle ground. This will be very unlikely
// to ever have to expand, though it if does, the overhead will be
// somewhat ugly.
//
int curArraySize = fLeafCount * 4;
CMStateSet[] statesToDo = new CMStateSet[curArraySize];
fFinalStateFlags = new boolean[curArraySize];
fTransTable = new int[curArraySize][];
//
// Ok we start with the initial set as the first pos set of the
// head node (which is the seq node that holds the content model
// and the EOC node.)
//
CMStateSet setT = fHeadNode.firstPos();
//
// Init our two state flags. Basically the unmarked state counter
// is always chasing the current state counter. When it catches up,
// that means we made a pass through that did not add any new states
// to the lists, at which time we are done. We could have used a
// expanding array of flags which we used to mark off states as we
// complete them, but this is easier though less readable maybe.
//
int unmarkedState = 0;
int curState = 0;
//
// Init the first transition table entry, and put the initial state
// into the states to do list, then bump the current state.
//
fTransTable[curState] = makeDefStateList();
statesToDo[curState] = setT;
curState++;
/* Optimization(Jan, 2001); This is faster for
* a large content model such as, "(t001+|t002+|.... |t500+)".
*/
java.util.Hashtable stateTable = new java.util.Hashtable();
/* Optimization(Jan, 2001) */
//
// Ok, almost done with the algorithm... We now enter the
// loop where we go until the states done counter catches up with
// the states to do counter.
//
while (unmarkedState < curState) {
//
// Get the first unmarked state out of the list of states to do.
// And get the associated transition table entry.
//
setT = statesToDo[unmarkedState];
int[] transEntry = fTransTable[unmarkedState];
// Mark this one final if it contains the EOC state
fFinalStateFlags[unmarkedState] = setT.getBit(fEOCPos);
// Bump up the unmarked state count, marking this state done
unmarkedState++;
// Loop through each possible input symbol in the element map
CMStateSet newSet = null;
/* Optimization(Jan, 2001) */
int sorterIndex = 0;
/* Optimization(Jan, 2001) */
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
//
// Build up a set of states which is the union of all of
// the follow sets of DFA positions that are in the current
// state. If we gave away the new set last time through then
// create a new one. Otherwise, zero out the existing one.
//
if (newSet == null)
newSet = new CMStateSet(fLeafCount);
else
newSet.zeroBits();
/* Optimization(Jan, 2001) */
int leafIndex = fLeafSorter[sorterIndex++];
while (leafIndex != -1) {
// If this leaf index (DFA position) is in the current set...
if (setT.getBit(leafIndex)) {
//
// If this leaf is the current input symbol, then we
// want to add its follow list to the set of states to
// transition to from the current state.
//
newSet.union(fFollowList[leafIndex]);
}
leafIndex = fLeafSorter[sorterIndex++];
}
/* Optimization(Jan, 2001) */
//
// If this new set is not empty, then see if its in the list
// of states to do. If not, then add it.
//
if (!newSet.isEmpty()) {
//
// Search the 'states to do' list to see if this new
// state set is already in there.
//