documentIndices = new IntArrayList();
fieldIndices = new ByteArrayList();
final Iterator<Document> docIterator = documents.iterator();
int documentIndex = 0;
final ITokenizer ts = context.language.getTokenizer();
final MutableCharArray wrapper = new MutableCharArray(CharArrayUtils.EMPTY_ARRAY);
while (docIterator.hasNext())
{
final Document doc = docIterator.next();
boolean hadTokens = false;
for (int i = 0; i < fieldNames.length; i++)
{
final byte fieldIndex = (byte) i;
final String fieldName = fieldNames[i];
final String fieldValue = doc.getField(fieldName);
if (!StringUtils.isEmpty(fieldValue))
{
try
{
short tokenType;
ts.reset(new StringReader(fieldValue));
if ((tokenType = ts.nextToken()) != ITokenizer.TT_EOF)
{
if (hadTokens) addFieldSeparator(documentIndex);
do
{
ts.setTermBuffer(wrapper);
add(documentIndex, fieldIndex, context.intern(wrapper), tokenType);
} while ( (tokenType = ts.nextToken()) != ITokenizer.TT_EOF);
hadTokens = true;
}
}
catch (IOException e)
{