throws IOException, InvalidTokenOffsetsException
{
ArrayList<TextFragment> docFrags = new ArrayList<TextFragment>();
StringBuilder newText=new StringBuilder();
CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
tokenStream.reset();
TextFragment currentFrag = new TextFragment(newText,newText.length(), docFrags.size());
if (fragmentScorer instanceof QueryScorer) {
((QueryScorer) fragmentScorer).setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
}
TokenStream newStream = fragmentScorer.init(tokenStream);
if(newStream != null) {
tokenStream = newStream;
}
fragmentScorer.startFragment(currentFrag);
docFrags.add(currentFrag);
FragmentQueue fragQueue = new FragmentQueue(maxNumFragments);
try
{
String tokenText;
int startOffset;
int endOffset;
int lastEndOffset = 0;
textFragmenter.start(text, tokenStream);
TokenGroup tokenGroup=new TokenGroup(tokenStream);
for (boolean next = tokenStream.incrementToken(); next && (offsetAtt.startOffset()< maxDocCharsToAnalyze);
next = tokenStream.incrementToken())
{
if( (offsetAtt.endOffset()>text.length())
||
(offsetAtt.startOffset()>text.length())
)
{
throw new InvalidTokenOffsetsException("Token "+ termAtt.toString()
+" exceeds length of provided text sized "+text.length());
}
if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct()))
{
//the current token is distinct from previous tokens -