*/
public final Token next() throws IOException
{
//*-- if a token was saved, return it
if (stoken != null) //*-- no. 1
{ Token tempToken = stoken; stoken = null;
return(tempToken); }
//*-- get the next token and check if it is a stop word
ctoken = input.next();
if (ctoken == null) return (null); //*-- nos. 7 and 8
String ctokenText = ctoken.termText();
//*-- set stopword status: True if in stopwords list, or does NOT contain an alphabetic character
cstop = ( stopWords.contains(ctokenText) ) ? true: (wordPattern.matcher(ctokenText).matches()) ? false: true;
//*-- if there was a previous token
if (ptoken == null)
{ //*-- continue if the current token is a stop word
if (cstop) //*-- no. 5
{ ptoken = ctoken; pstop = cstop;
ctoken = input.next(); cstop = (ctoken != null) ? stopWords.contains(ctoken.termText()): false;
} //*-- otherwise, return the current token
else //*-- no. 6
{ ptoken = ctoken; pstop = cstop; return(ctoken); }
}
//*-- if the current token is a stop word, return a
//*-- bigram from the previous and current tokens
if (cstop)
{ Token tempToken = createBigram(ptoken, ctoken);
if (!stopWords.contains(ptoken.termText()) ) tempToken.setPositionIncrement(0); //*-- no. 3
ptoken = ctoken; pstop = cstop; return(tempToken); } //*-- nos. 3 and 4
//*-- if the previous token was a stop word, first save
//*-- the current token and return a bigram from previous
//*-- and current tokens. In next call, return the saved token
if (pstop) //*-- no. 1
{ if (ctoken != null)
{ stoken = ctoken; stoken.setPositionIncrement(0);
Token tempToken = createBigram(ptoken, ctoken);
ptoken = ctoken; pstop = cstop; return(tempToken);
}
}
ptoken = ctoken; pstop = cstop;