Package org.apache.ctakes.typesystem.type.syntax

Examples of org.apache.ctakes.typesystem.type.syntax.BaseToken


        newGoldSentence.addToIndexes();
      }

      for (BaseToken oldSystemToken : JCasUtil.select(jCas, BaseToken.class))
      {
        BaseToken newGoldToken = null; //new BaseToken(goldView, oldSystemEventMention.getBegin(), oldSystemEventMention.getEnd());

        // TODO the following commented out block is an alternative to having the hard coded if..then..else-if..else block for constructing new BaseToken objects
//        Constructor<? extends BaseToken> constructor = null;
//        try
//        {
//          constructor = oldSystemToken.getClass().getConstructor(JCas.class, int.class, int.class);
//        } catch(NoSuchMethodException| SecurityException e)
//        {
//          logger.error("problem getting constructor for copying BaseToken instance (inside AssertionEvalBasedOnModifier.ReferenceSupportingAnnotationsSystemToGoldcopier.process())");
//          continue;
//        }
//        try
//        {
//          newGoldToken = constructor.newInstance(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
//        } catch (InstantiationException | IllegalAccessException
//            | IllegalArgumentException | InvocationTargetException e)
//        {
//          logger.error("problem invoking constructor to copy BaseToken instance (inside AssertionEvalBasedOnModifier.ReferenceSupportingAnnotationsSystemToGoldcopier.process())");
//          continue;
//        }
       
        String oldSystemTokenClass = oldSystemToken.getClass().getName();
        if (oldSystemTokenClass.equals(WordToken.class.getName()))
        {
          newGoldToken = new WordToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        } else if (oldSystemTokenClass.equals(ContractionToken.class.getName()))
        {
          newGoldToken = new ContractionToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        } else if (oldSystemTokenClass.equals(NewlineToken.class.getName()))
        {
          newGoldToken = new NewlineToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        } else if (oldSystemTokenClass.equals(NumToken.class.getName()))
        {
          newGoldToken = new NumToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        } else if (oldSystemTokenClass.equals(PunctuationToken.class.getName()))
        {
          newGoldToken = new PunctuationToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        } else if (oldSystemTokenClass.equals(SymbolToken.class.getName()))
        {
          newGoldToken = new SymbolToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        } else if (oldSystemTokenClass.equals(BaseToken.class.getName()))
        {
          newGoldToken = new BaseToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        } else
        {
          newGoldToken = new BaseToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
        }
       
        newGoldToken.setPartOfSpeech(oldSystemToken.getPartOfSpeech());
        newGoldToken.setTokenNumber(oldSystemToken.getTokenNumber());
       
        newGoldToken.addToIndexes();
      }

    } // end of method ReferenceSupportingAnnotationsSystemToGoldCopier.process()
View Full Code Here


    JFSIndexRepository indexes = jcas.getJFSIndexRepository();

    // Determine and set the normalized form for each <code>BaseToken</code>
    Iterator btaItr = indexes.getAnnotationIndex(BaseToken.type).iterator();
    while (btaItr.hasNext()) {
      BaseToken bta = (BaseToken) btaItr.next();
      String normForm = null;
      if (!(bta instanceof WordToken)) {
        normForm = bta.getCoveredText();
      } else {
        WordToken wta = (WordToken) bta;
        String canonicalForm = wta.getCanonicalForm();


        // The norm form is the canonical form, if there is one
        // Otherwise the norm form is the token's text.
        if ((canonicalForm != null&& (canonicalForm.length() > 0)) {
          normForm = canonicalForm;
        } else {
          normForm = wta.getCoveredText();
        }
      }
      bta.setNormalizedForm(normForm);
    }
  }
View Full Code Here

      tokens.clear();
      words.clear();

      FSIterator tokenIterator = baseTokenIndex.subiterator(sentence);
      while (tokenIterator.hasNext()) {
        BaseToken token = (BaseToken) tokenIterator.next();
        tokens.add(token);
        words.add(token.getCoveredText());
      }

      List<?> wordTagList = null; // List of BaseToken's
      if (words.size() > 0) {
        wordTagList = tagger.tag(words);
      }
      // else {
      // logger.info("sentence has no words = '" +
      // sentence.getCoveredText()
      // + "' at (" +sentence.getBegin() + "," + sentence.getEnd() + ")");
      // }

      try {
        for (int i = 0; i < tokens.size(); i++) {
          BaseToken token = (BaseToken) tokens.get(i);
          String posTag = (String) wordTagList.get(i);
          token.setPartOfSpeech(posTag);
        }
      } catch (IndexOutOfBoundsException e) {
        throw new AnalysisEngineProcessException(
            "sentence being tagged is: '"
                + sentence.getCoveredText() + "'", null, e);
View Full Code Here

          }
          String word = token.substring(0, split);
          wordEnd = wordStart + word.length();
          // Consider creating a token similar to the way
          // TokenConverter.convert method creates BaseToken's
          BaseToken baseToken = new BaseToken(jCas, wordStart, wordEnd);
          if (!loadWordsOnly) {
            String tag = token.substring(split + 1);
            baseToken.setPartOfSpeech(tag);
          }
          baseToken.setTokenNumber(wordNumber++);
          baseToken.addToIndexes();

          documentText.append(word + " ");
          wordStart = wordEnd + 1;
        }
        Sentence sentence = new Sentence(jCas, 0, wordEnd);
View Full Code Here

      List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
      DEPTree tree = new DEPTree();

      // Convert CAS data into structures usable by ClearNLP
      for (int i = 0; i < tokens.size(); i++) {
        BaseToken token = tokens.get(i);
        DEPNode node = new DEPNode(i+1, token.getCoveredText());
        tree.add(node);
      }

      // Run parser and convert output back to CAS friendly data types
      postagger.process(tree);
     
      for (int i = 0; i < tokens.size(); i++) {
        BaseToken token = tokens.get(i);
        DEPNode node = tree.get(i+1);
        token.setPartOfSpeech(node.pos);
      }
     
    }
   
   
View Full Code Here

   
    List list = getTokenData(jcas, locationTerm);
    FSArray fsArr = new FSArray(jcas, list.size());

    for(int i=0; i<list.size(); i++) {
      BaseToken wta = (BaseToken)list.get(i);
      if (wta.getCoveredText().compareToIgnoreCase("at")!=0 ||
          (wta.getCoveredText().compareTo("AT")==0 && wta.getPartOfSpeech().compareTo("IN")!= 0))
        fsArr.set(i, wta);
      else if (i ==0)
        skipTerm = true;
    }
    String segStatus = getSubSectionStatus(locationTerm, jcas, cutOffForRevision);
View Full Code Here

    // index the base tokens and NEs by their offsets
    hbs = new Hashtable<Integer, BaseToken>();
    hbe = new Hashtable<Integer, BaseToken>();
    FSIterator iter = jcas.getJFSIndexRepository().getAnnotationIndex(BaseToken.type).iterator();
    while (iter.hasNext()) {
      BaseToken t = (BaseToken) iter.next();
      hbs.put(t.getBegin(), t);
      hbe.put(t.getEnd(), t);
    }
  }
View Full Code Here

    stopwords = l;
  }

  static boolean isPronoun (Markable m) {
    if (m.getContent() instanceof BaseToken) {
      BaseToken t = (BaseToken) m.getContent();
      if (t.getPartOfSpeech().startsWith("PRP")) // TODO: since only 3rd person pronouns are added as markables, no need to check
        return true;
    }
    return false;
  }
View Full Code Here

    return "U";
  }

  public ArrayList<BaseToken> containedTokens (int a, int b) {
    ArrayList<BaseToken> ret = new ArrayList<BaseToken>();
    BaseToken t1 = hbs.get(a);
    BaseToken t2 = hbe.get(b);
    if (t1!=null && t2!=null) {
      int begin = t1.getTokenNumber();
      int end = t2.getTokenNumber();
      LinkedList<Annotation> l = FSIteratorToList.convert(jcas.getJFSIndexRepository().getAnnotationIndex(BaseToken.type).iterator());
      for (int i = 0; i < l.size(); i++) {
        BaseToken t = (BaseToken) l.get(i);
        if (t.getTokenNumber()>=begin && t.getTokenNumber()<=end)
          ret.add(t);
      }
    }
//    int e;
//    while (t!=null && (e=t.getEnd())<=b) {
View Full Code Here

        logger.info("Test PTB Tokenizer for string (shown here in quotes) " + DQUOTE + testInput + DQUOTE);

        int numTokensTested = 0;
        for (int i=0; i< expectedResults.length; i++) {
          SimpleToken expectedTok = expectedResults[i];
          BaseToken tokenFromPipeline = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, i);
          try {
            if (expectedTok.getTokenClass()!=null) {
              // allow for tokens where we don't care what kind of token it is. for example, if testing hyphens in a sentence,
              // might not care if what type of token the final punctuation is created as
              //TODO: Could we confirm this test case?
              //assertEquals(expectedTok.getTokenClass(), tokenFromPipeline.getClass());
            }
            //TODO: Could we confirm this test case?
            //assertEquals(expectedTok.getBegin(), tokenFromPipeline.getBegin());
            //assertEquals(expectedTok.getEnd(), tokenFromPipeline.getEnd());
            numTokensTested++;
          } catch (java.lang.AssertionError e) {
            if (throwAssertionErrors) {
              throw e;
            }
            if (!alreadyOutputDebugInfoForThisRunOfPipeline) {
              System.err.println("ERROR: Found a problem, so outputting the tokens");
              for (int x=0; x < expectedResults.length; x++) {
                SimpleToken xTok = expectedResults[x];
                System.err.println("Expected token #" + x + " " + xTok.toString());
              }
              for (int sysTokNum=0; sysTokNum< expectedResults.length; sysTokNum++) {
                BaseToken sysTok = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, sysTokNum);
                System.err.println("System token #" + sysTokNum + " " + sysTok.getClass() + " " + sysTok.getBegin() + " " + sysTok.getEnd());
              }

              alreadyOutputDebugInfoForThisRunOfPipeline = true;
            }
            System.err.println("Caught exception at i = " + i + " for expectedTok " + expectedTok.toString() + " for testInput " + testInput);
View Full Code Here

TOP

Related Classes of org.apache.ctakes.typesystem.type.syntax.BaseToken

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.