System.out.println("-----Tokens: " + tokCount[k] + "-----");
for (int i = 0; i < tokCount[k]; i++) {
buffer.append(English.intToEnglish(i).toUpperCase()).append(' ');
}
//make sure we produce the same tokens
TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))));
TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(100));
teeStream.consumeAllTokens();
TokenStream stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))), 100);
CharTermAttribute tfTok = stream.addAttribute(CharTermAttribute.class);
CharTermAttribute sinkTok = sink.addAttribute(CharTermAttribute.class);
for (int i=0; stream.incrementToken(); i++) {
assertTrue(sink.incrementToken());
assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.equals(sinkTok) == true);
}
//simulate two fields, each being analyzed once, for 20 documents
for (int j = 0; j < modCounts.length; j++) {
int tfPos = 0;
long start = System.currentTimeMillis();
for (int i = 0; i < 20; i++) {
stream = new StandardFilter(new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString())));
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
while (stream.incrementToken()) {
tfPos += posIncrAtt.getPositionIncrement();
}
stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))), modCounts[j]);
posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
while (stream.incrementToken()) {
tfPos += posIncrAtt.getPositionIncrement();
}
}
long finish = System.currentTimeMillis();
System.out.println("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
int sinkPos = 0;
//simulate one field with one sink
start = System.currentTimeMillis();
for (int i = 0; i < 20; i++) {
teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))));
sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(modCounts[j]));
PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(PositionIncrementAttribute.class);
while (teeStream.incrementToken()) {
sinkPos += posIncrAtt.getPositionIncrement();
}