Examples of ITokenizer


Examples of org.carrot2.text.analysis.ITokenizer

        documentIndices = new IntArrayList();
        fieldIndices = new ByteArrayList();

        final Iterator<Document> docIterator = documents.iterator();
        int documentIndex = 0;
        final ITokenizer ts = context.language.getTokenizer();
        final MutableCharArray wrapper = new MutableCharArray(CharArrayUtils.EMPTY_ARRAY);

        while (docIterator.hasNext())
        {
            final Document doc = docIterator.next();

            boolean hadTokens = false;
            for (int i = 0; i < fieldNames.length; i++)
            {
                final byte fieldIndex = (byte) i;
                final String fieldName = fieldNames[i];
                final String fieldValue = doc.getField(fieldName);

                if (!StringUtils.isEmpty(fieldValue))
                {
                    try
                    {
                        short tokenType;

                        ts.reset(new StringReader(fieldValue));
                        if ((tokenType = ts.nextToken()) != ITokenizer.TT_EOF)
                        {
                            if (hadTokens) addFieldSeparator(documentIndex);
                            do
                            {
                                ts.setTermBuffer(wrapper);
                                add(documentIndex, fieldIndex, context.intern(wrapper), tokenType);
                            } while ( (tokenType = ts.nextToken()) != ITokenizer.TT_EOF);
                            hadTokens = true;
                        }
                    }
                    catch (IOException e)
                    {
View Full Code Here

Examples of org.carrot2.text.analysis.ITokenizer

     */
    protected void assertEqualTokens(String testString, TokenImage [] expectedTokens)
    {
        try
        {
            final ITokenizer tokenStream = createTokenStream();
            tokenStream.reset(new StringReader(testString));

            final ArrayList<TokenImage> tokens = new ArrayList<TokenImage>();
            short token;
            MutableCharArray buffer = new MutableCharArray();
            while ((token = tokenStream.nextToken()) >= 0)
            {
                tokenStream.setTermBuffer(buffer);
                tokens.add(new TokenImage(buffer.toString(), token));
            }

            for (int i = 0; i < tokens.size(); i++) {
            }       
View Full Code Here

Examples of org.spoofax.jsglr.client.imploder.ITokenizer

  /**
   * Report WATER + INSERT errors from parse tree
   */
  private static List<Error> gatherNonFatalErrors(IStrategoTerm top) {
    List<Error> errors = new ArrayList<Error>();
    ITokenizer tokenizer = getTokenizer(top);
    if (tokenizer == null)
      return errors;
    for (int i = 0, max = tokenizer.getTokenCount(); i < max; i++) {
      IToken token = tokenizer.getTokenAt(i);
      String error = token.getError();
      if (error != null) {
        if (error == ITokenizer.ERROR_SKIPPED_REGION) {
          i = findRightMostWithSameError(token, null);
          reportSkippedRegion(token, tokenizer.getTokenAt(i), errors);
        } else if (error.startsWith(ITokenizer.ERROR_WARNING_PREFIX)) {
          i = findRightMostWithSameError(token, null);
          reportWarningAtTokens(token, tokenizer.getTokenAt(i), error, errors);
        } else if (error.startsWith(ITokenizer.ERROR_WATER_PREFIX)) {
          i = findRightMostWithSameError(token, ITokenizer.ERROR_WATER_PREFIX);
          reportErrorAtTokens(token, tokenizer.getTokenAt(i), error, errors);
        } else {
          i = findRightMostWithSameError(token, null);
          // UNDONE: won't work for multi-token errors (as seen in SugarJ)
          reportErrorAtTokens(token, tokenizer.getTokenAt(i), error, errors);
        }
      }
    }
    gatherAmbiguities(top, errors);
   
View Full Code Here

Examples of org.spoofax.jsglr.client.imploder.ITokenizer

    return errors;
  }

  private static int findRightMostWithSameError(IToken token, String prefix) {
    String expectedError = token.getError();
    ITokenizer tokenizer = token.getTokenizer();
    int i = token.getIndex();
    for (int max = tokenizer.getTokenCount(); i + 1 < max; i++) {
      String error = tokenizer.getTokenAt(i + 1).getError();
      if (error != expectedError
          && (error == null || prefix == null || !error.startsWith(prefix)))
        break;
    }
    return i;
View Full Code Here

Examples of org.spoofax.jsglr.client.imploder.ITokenizer

    IToken errorToken = tokenizer.getErrorTokenOrAdjunct(offset);
    reportErrorAtTokens(errorToken, errorToken, message, errors);
  }
  
  private static IToken findNextNonEmptyToken(IToken token) {
    ITokenizer tokenizer = token.getTokenizer();
    IToken result = null;
    for (int i = token.getIndex(), max = tokenizer.getTokenCount(); i < max; i++) {
      result = tokenizer.getTokenAt(i);
      if (result.getLength() != 0 && !Token.isWhiteSpace(result)) break;
    }
    return result;
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.