Package org.carrot2.text.analysis

Examples of org.carrot2.text.analysis.ITokenizer.reset()


                {
                    try
                    {
                        short tokenType;

                        ts.reset(new StringReader(fieldValue));
                        if ((tokenType = ts.nextToken()) != ITokenizer.TT_EOF)
                        {
                            if (hadTokens) addFieldSeparator(documentIndex);
                            do
                            {
View Full Code Here


    protected void assertEqualTokens(String testString, TokenImage [] expectedTokens)
    {
        try
        {
            final ITokenizer tokenStream = createTokenStream();
            tokenStream.reset(new StringReader(testString));

            final ArrayList<TokenImage> tokens = new ArrayList<TokenImage>();
            short token;
            MutableCharArray buffer = new MutableCharArray();
            while ((token = tokenStream.nextToken()) >= 0)
View Full Code Here

        {
            final char [] word = wordImages[i];
            if (buffer.length < word.length) buffer = new char [word.length];

            CharArrayUtils.toLowerCase(word, buffer);
            mutableCharArray.reset(buffer, 0, word.length);
            if (lexData.isCommonWord(mutableCharArray))
            {
                types[i] |= ITokenizer.TF_COMMON_WORD;
            }
        }
View Full Code Here

            final char [] word = wordImages[i];
            if (buffer.length < word.length) buffer = new char [word.length];

            final boolean different = CharArrayUtils.toLowerCase(word, buffer);

            mutableCharArray.reset(buffer, 0, word.length);
            final CharSequence stemmed = stemmer.stem(mutableCharArray);
            if (stemmed != null)
            {
                mutableCharArray.reset(stemmed);
                stemImages[i] = context.intern(mutableCharArray);
View Full Code Here

            mutableCharArray.reset(buffer, 0, word.length);
            final CharSequence stemmed = stemmer.stem(mutableCharArray);
            if (stemmed != null)
            {
                mutableCharArray.reset(stemmed);
                stemImages[i] = context.intern(mutableCharArray);
            }
            else
            {
                // We need to put the original word here, otherwise, we wouldn't be able
View Full Code Here

                fieldIndices |= wordsFieldIndices[nextInOrderIndex];

                stemTfsByDocument.clear();
                stemTfsByDocument.add(wordTfByDocumentArray[nextInOrderIndex]);

                buffer.reset(wordStemImages[nextInOrderIndex]);
                inQuery = queryStems.contains(buffer);
            }
        }

        // Store tf for the last stem in the array
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.