Package org.apache.ctakes.typesystem.type.syntax

Examples of org.apache.ctakes.typesystem.type.syntax.BaseToken


        }
      }

      // Initialize Token / Sentence info for the ClearParser Semantic Role Labeler
      for (int i = 0; i < tokens.size(); i++) {
        BaseToken token = tokens.get(i);

        // Determine HeadId
        DepNode node = new DepNode();
        ConllDependencyNode casDepNode = JCasUtil.selectCovered(jCas, ConllDependencyNode.class, token).get(0);
        casDepNode.getDeprel();
        String headRelation = casDepNode.getDeprel();
        ConllDependencyNode head = casDepNode.getHead();
       
        // If there is no head, this is the head node, set node to 0
        int headId = (head == null) ? 0 : depNodeToID.get(head);

        // Populate Dependency Node / Tree information
        node.id = i + 1;
        node.form = token.getCoveredText();
        node.pos = token.getPartOfSpeech();
        node.lemma = useLemmatizer ? "" : token.getNormalizedForm();
        node.setHead(headId, headRelation, 0);
        tree.add(node);
      }
      tree.setPredicates(AbstractReader.LANG_EN);
View Full Code Here


    // Start at node 1, since node 0 is considered the head of the sentence
    for (int i = 1; i < tree.size(); i++) {
      // Every ClearParser parserNode will contain an srlInfo field.
      DepNode parserNode = tree.get(i);
      BaseToken token = tokens.get(i - 1);
      if (parserNode.srlInfo == null) {
        continue;
      }

      if (parserNode.srlInfo.isPredicate()) {
        int headId = i;
        if (!headIdToPredicate.containsKey(headId)) {
          // We have not encountered this predicate yet, so create it
          Predicate pred = this.createPredicate(jCas, parserNode.srlInfo.rolesetId, token);
          headIdToPredicate.put(headId, pred);
          pred.setRelations(new EmptyFSList(jCas));
        }
      } else {
        for (SRLHead head : parserNode.srlInfo.heads) {
          Predicate predicate;

          // Determine which predicate this argument belongs to
          if (!headIdToPredicate.containsKey(head.headId)) {
            // The predicate hasn't been encountered, so create it
            BaseToken headToken = tokens.get(head.headId - 1);
            predicate = this.createPredicate(jCas, parserNode.srlInfo.rolesetId, headToken);
            headIdToPredicate.put(head.headId, predicate);
          } else {
            predicate = headIdToPredicate.get(head.headId);
          }
View Full Code Here

           
           
            ListIterator<BaseToken>           itt = tokens.listIterator();
            ListIterator<ConllDependencyNode> itn = nodes.listIterator();
            BaseToken           bt = null;
            ConllDependencyNode dn = null;
            if (tokens.size()>0 && nodes.size()>0) {
                // iterate through the parallel sorted lists
                if (itt.hasNext()) bt                  = itt.next();
                if (itn.hasNext()) dn                  = itn.next();
                if (dn != null)
                    if (dn.getId()==0 && itn.hasNext())
                        dn = itn.next();
                while (itt.hasNext() || itn.hasNext()) {
                    if (bt.getBegin()==dn.getBegin() ) { // Allow ragged right edge //&& bt.getEnd()==dn.getEnd()) {
                        dn.setLemma( bt.getNormalizedForm() );
                        if (dn.getLemma()==null)
                            dn.setLemma( dn.getForm().toLowerCase() );
                        dn.addToIndexes();
                        if (itt.hasNext()) bt = itt.next();
                        if (itn.hasNext()) dn = itn.next();
                    } else if ( bt.getBegin()<dn.getBegin() ) {
                        if (itt.hasNext()) bt = itt.next(); else break;
                    } else if ( bt.getBegin()>dn.getBegin() ) {
                        // not every node will get a lemma b/c not all tokens are word tokens
                        if (itn.hasNext()) {
                            dn.setLemma( dn.getForm().toLowerCase() );
                            dn.addToIndexes();
                            dn = itn.next();
                        } else break;
                    }
                }
                if (bt.getBegin()==dn.getBegin() && bt.getEnd()==dn.getEnd()) {
                    dn.setLemma( bt.getNormalizedForm() );
                    dn.addToIndexes();
                }
            }
           
    }
View Full Code Here

      List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
      DepTree tree = new DepTree();

      // Convert CAS data into structures usable by ClearParser
      for (int i = 0; i < tokens.size(); i++) {
        BaseToken token = tokens.get(i);
        DepNode node = new DepNode();
        node.id = i + 1;
        node.form = token.getCoveredText();
        node.pos = token.getPartOfSpeech();
        node.lemma = useLemmatizer ? lemmatizer.getLemma(node.form, node.pos) : token.getNormalizedForm();
        tree.add(node);
      }

      // Run parser and convert output back to CAS friendly data types
      parser.parse(tree);
View Full Code Here

    // index the base tokens and NEs by their offsets
    hbs = new Hashtable<Integer, BaseToken>();
    hbe = new Hashtable<Integer, BaseToken>();
    FSIterator iter = jcas.getJFSIndexRepository().getAnnotationIndex(BaseToken.type).iterator();
    while (iter.hasNext()) {
      BaseToken t = (BaseToken) iter.next();
      hbs.put(t.getBegin(), t);
      hbe.put(t.getEnd(), t);
    }
  }
View Full Code Here

    stopwords = l;
  }

  static boolean isPronoun (Markable m) {
    if (m.getContent() instanceof BaseToken) {
      BaseToken t = (BaseToken) m.getContent();
      if (t.getPartOfSpeech().startsWith("PRP")) // TODO: since only 3rd person pronouns are added as markables, no need to check
        return true;
    }
    return false;
  }
View Full Code Here

    return "U";
  }

  public ArrayList<BaseToken> containedTokens (int a, int b) {
    ArrayList<BaseToken> ret = new ArrayList<BaseToken>();
    BaseToken t1 = hbs.get(a);
    BaseToken t2 = hbe.get(b);
    if (t1!=null && t2!=null) {
      int begin = t1.getTokenNumber();
      int end = t2.getTokenNumber();
      LinkedList<Annotation> l = FSIteratorToList.convert(jcas.getJFSIndexRepository().getAnnotationIndex(BaseToken.type).iterator());
      for (int i = 0; i < l.size(); i++) {
        BaseToken t = (BaseToken) l.get(i);
        if (t.getTokenNumber()>=begin && t.getTokenNumber()<=end)
          ret.add(t);
      }
    }
//    int e;
//    while (t!=null && (e=t.getEnd())<=b) {
View Full Code Here

                }
            }

            ListIterator<BaseToken> itt           = tokens.listIterator();
            ListIterator<ConllDependencyNode> itn = nodes.listIterator();
            BaseToken           bt = null;
            ConllDependencyNode dn = null;
            if (tokens.size()>0 && nodes.size()>0) {
                // iterate through the parallel sorted lists
              if (itt.hasNext()) bt                  = itt.next();
              if (itn.hasNext()) dn                  = itn.next();
              if (dn != null)
                if (dn.getId()==0 && itn.hasNext())
                  dn = itn.next();
              while (itt.hasNext() || itn.hasNext()) {
                if (bt.getBegin()==dn.getBegin() && bt.getEnd()==dn.getEnd()) {
                  dn.setPostag( bt.getPartOfSpeech() );
                  dn.setCpostag( bt.getPartOfSpeech() )
                  dn.addToIndexes();
                  if (itt.hasNext()) bt = itt.next();
                  if (itn.hasNext()) dn = itn.next();
                } else if ( bt.getBegin()<dn.getBegin() ) {
                  if (itt.hasNext()) bt = itt.next(); else break;
                } else if ( bt.getBegin()>dn.getBegin() ) {
                  if (itn.hasNext()) dn = itn.next(); else break;
                }
              }
              if (bt.getBegin()==dn.getBegin() && bt.getEnd()==dn.getEnd()) {
                dn.setPostag( bt.getPartOfSpeech() );
                dn.setCpostag( bt.getPartOfSpeech() )
                dn.addToIndexes();
              }
            }
             
    }
View Full Code Here

    JFSIndexRepository indexes = jcas.getJFSIndexRepository();
    Iterator btaItr = indexes.getAnnotationIndex(BaseToken.type).iterator();
    while (btaItr.hasNext())
    {
      BaseToken bta = (BaseToken) btaItr.next();
      if (!((bta instanceof NewlineToken)
          || (bta instanceof PunctuationToken)
          || (bta instanceof ContractionToken)
          || (bta instanceof SymbolToken)))
      {
        LookupToken lt = new LookupAnnotationToJCasAdapter(bta);

        // POS exclusion logic for first word lookup
        if (isTagExcluded(bta.getPartOfSpeech()))
        {
          lt.addStringAttribute(
              FirstTokenPermutationImpl.LT_KEY_USE_FOR_LOOKUP,
              "false");
        }
View Full Code Here

    JFSIndexRepository indexes = jcas.getJFSIndexRepository();
    Iterator btaItr = indexes.getAnnotationIndex(BaseToken.type)
        .iterator();
    while (btaItr.hasNext())
    {
      BaseToken bta = (BaseToken) btaItr.next();
      if (!((bta instanceof NewlineToken)
          || (bta instanceof PunctuationToken)
          || (bta instanceof ContractionToken)
          || (bta instanceof SymbolToken)))
      {
View Full Code Here

TOP

Related Classes of org.apache.ctakes.typesystem.type.syntax.BaseToken

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.