* @param domIterator Description of the Parameter
*/
//TODO : unify functionalities with storeText -pb
private void collect(Set words, Iterator domIterator) {
TextToken token;
int readOffset;
final byte[] data = ((Value) domIterator.next()).getData();
final short type = Signatures.getType(data[OFFSET_NODE_TYPE]);
switch (type) {
case Node.ELEMENT_NODE :
final int childrenCount = ByteConversion.byteToInt(data, OFFSET_ELEMENT_CHILDREN_COUNT);
for (int i = 0; i < childrenCount; i++)
//recursive call on children
collect(words, domIterator);
break;
case Node.TEXT_NODE :
int dlnLen = ByteConversion.byteToShort(data, OFFSET_TEXT_DLN_LENGTH);
int nodeIdLen = broker.getBrokerPool().getNodeFactory().lengthInBytes(dlnLen, data, OFFSET_DLN);
readOffset = nodeIdLen + OFFSET_DLN;
final String s = new String(data, readOffset, data.length - readOffset, UTF_8);
tokenizer.setText(s);
while (null != (token = tokenizer.nextToken())) {
final String word = token.getText();
if (stoplist.contains(word))
{continue;}
words.add(word.toLowerCase());
}
break;
case Node.ATTRIBUTE_NODE :
final byte idSizeType = (byte) (data[OFFSET_NODE_TYPE] & 0x3);
final boolean hasNamespace = (data[OFFSET_NODE_TYPE] & 0x10) == 0x10;
dlnLen = ByteConversion.byteToShort(data, OFFSET_ATTRIBUTE_DLN_LENGTH);
nodeIdLen = broker.getBrokerPool().getNodeFactory().lengthInBytes(dlnLen, data, OFFSET_DLN);
readOffset = Signatures.getLength(idSizeType) + nodeIdLen + OFFSET_DLN;
if (hasNamespace) {
//TODO : check the order in wich both info are read (and discarded)
readOffset += SymbolTable.LENGTH_LOCAL_NAME; // skip namespace id
final short prefixLen = ByteConversion.byteToShort(data, readOffset);
readOffset += prefixLen + SymbolTable.LENGTH_NS_URI; // skip prefix
}
final String val = new String(data, readOffset, data.length - readOffset, UTF_8);
tokenizer.setText(val);
while (null != (token = tokenizer.nextToken())) {
final String word = token.getText();
if (stoplist.contains(word))
{continue;}
words.add(word.toLowerCase());
}