Package org.languagetool.dev.conversion.cg

Examples of org.languagetool.dev.conversion.cg.CgSet


//    for (CgRule rule : rules) {
    ArrayList<Token> tokensList = new ArrayList<>();
    List<ArrayList<Token>> outerList = new ArrayList<>();   // in case we need to split the rule into several rules
    ArrayList<Token[]> processedLists = new ArrayList<>();
   
    CgSet targetSet = expandSetSets(grammar.getSet(rule.target));
    Token target = new Token(targetSet,false,0,false,false,new CgSet(),false,0,false);
    if (!isOrCompatible(target)) {
      System.err.println("Target for rule on line " + rule.line + " cannot be represented as one LT rule. Consider rewriting it.");
      return new ArrayList<>();
    }
    tokensList.add(target);
View Full Code Here


    CgTag[] surfaceforms = target.getSingleTagSurfaceforms();
    CgCompositeTag[] compositePostags = target.getCompositePostags();
   
    // actually checking and doing the splitting
    if (postags.length > 0 && baseforms.length > 0) {
      CgSet set1 = new CgSet(target);
      CgSet set2 = new CgSet(set1);
      set1.single_tags.removeAll(Arrays.asList(postags));
      set1.tags.removeAll(Arrays.asList(compositePostags));
      set2.single_tags.removeAll(Arrays.asList(baseforms));
      newSets.add(set1);
      newSets.add(set2);
      return newSets;
    }
    if (postags.length > 0 && surfaceforms.length > 0) {
      CgSet set1 = new CgSet(target);
      CgSet set2 = new CgSet(target);
      set1.single_tags.removeAll(Arrays.asList(postags));
      set1.tags.removeAll(Arrays.asList(compositePostags));
      set2.single_tags.removeAll(Arrays.asList(surfaceforms));
      newSets.add(set1);
      newSets.add(set2);
      return newSets;
    }
    if (surfaceforms.length > 0 && baseforms.length > 0) {
      CgSet set1 = new CgSet(target);
      CgSet set2 = new CgSet(target);
      set1.single_tags.removeAll(Arrays.asList(surfaceforms));
      set2.single_tags.removeAll(Arrays.asList(baseforms));
      newSets.add(set1);
      newSets.add(set2);
      return newSets;
View Full Code Here

      }
      dict.put(postags.toString(), postags);
    }
    List<CgSet> ret = new ArrayList<>();
    for (String postagSet : bf.keySet()) {
      CgSet newSet = new CgSet(target);
      newSet.tags = new HashSet<>();
      Iterable<CgCompositeTag> bfs = bf.get(postagSet);
      for (CgCompositeTag singleBf : bfs) {
        CgCompositeTag newTotalTag = new CgCompositeTag();
        for (CgTag tag : dict.get(postagSet).tags) {
          newTotalTag.addTag(tag);
        }
        for (CgTag tag : singleBf.tags) {
          newTotalTag.addTag(tag);
        }
        newSet.addCompositeTag(newTotalTag);
      }
      ret.add(newSet);
    }
    return ret;
  }
View Full Code Here

    List<Token> newTokenList2 = new ArrayList<>();
    int index=0;
    for (index = 0;index<tokens.length;index++) {
      if (tokens[index].scanbehind && tokens[index].negate && !tokens[index].barrier.isEmpty()) {
        Token newToken = new Token(tokens[index]);
        newToken.barrier = new CgSet();
        newTokenList1.add(tokens[index]);
        newTokenList2.add(newToken);
        break;
      } else {
        newTokenList1.add(tokens[index]);
View Full Code Here

          if (tokens.get(i).offset == tokens.get(i-1).offset || tokens.get(i).offset == (tokens.get(i-1).offset + 1)) {
            if (i == tokens.size() - 1) notdone = false;
            continue;
          }
          else {
            Token newToken = new Token(new CgSet(), false, tokens.get(i-1).offset + 1, false, false, new CgSet(), false, 0, false);
            newToken.relativeOffset = tokens.get(i-1).relativeOffset + 1;
            Token oldToken = tokens.get(i-1);
            oldToken.relativeOffset = -1;
            tokens.set(i-1, oldToken);
            tokens.add(i,newToken);
View Full Code Here

    ArrayList<Token> tokenList = new ArrayList<>(Arrays.asList(tokens));
    for (int i=0;i<tokenList.size();i++) {
      // forward scans (1* Verb)
      if (tokenList.get(i).scanahead) {
        if (i == 0) {
          Token newToken = new Token(new CgSet(), false, tokenList.get(i).offset - 1, false, false, new CgSet(), false, -1, false);
          if (!tokenList.get(i).barrier.isEmpty() || tokenList.get(i).negate) {
            newToken.exceptionString = getBarrierExceptionStringFromToken(tokenList.get(i));
          }
          Token oldToken = tokenList.get(i);
          // if it's a negative scan (NOT 1* Noun), then the target of the next token becomes the barrier + SENT_END
          if (oldToken.negate) { 
            CgSet newTarget = oldToken.barrier;
            CgTag sentEndTag = new CgTag();
            sentEndTag.tag = SENT_END;
            newTarget.single_tags.add(sentEndTag);
            oldToken.target = newTarget;
            oldToken.postags = oldToken.target.getPostagsString();
            oldToken.baseforms = oldToken.target.getSingleTagBaseformsString();
            oldToken.surfaceforms = oldToken.target.getSingleTagSurfaceformsString();
            oldToken.compositeTags = oldToken.target.getCompositeTags();
            tokenList.set(0, oldToken);
          }
          tokenList.add(0, newToken);
        } else {
          int index = i-1;
          String exceptionString = null;
          if (!tokenList.get(i).barrier.isEmpty() || tokenList.get(i).negate) {
            exceptionString = getBarrierExceptionStringFromToken(tokenList.get(i));
          }
         
          int prevOffset = tokenList.get(index).offset;
          while (index >= 0 && tokenList.get(index).offset == prevOffset) {
            Token prevToken = tokenList.get(index);
            prevToken.skip = -1;
            prevToken.exceptionString = exceptionString;
            tokenList.set(index, prevToken);
            index--;
          }
          Token oldToken = tokenList.get(i);
          if (oldToken.negate) {
            CgSet newTarget = oldToken.barrier;
            CgTag sentEndTag = new CgTag();
            sentEndTag.tag = SENT_END;
            newTarget.single_tags.add(sentEndTag);
            oldToken.target = newTarget;
            oldToken.postags = oldToken.target.getPostagsString();
            oldToken.baseforms = oldToken.target.getSingleTagBaseformsString();
            oldToken.surfaceforms = oldToken.target.getSingleTagSurfaceformsString();
            oldToken.compositeTags = oldToken.target.getCompositeTags();
            oldToken.negate = false;
            tokenList.set(i,oldToken);
          }
        }
      }
      // reverse scans (-1* Verb)
      else if (tokenList.get(i).scanbehind) {
        Token newToken = new Token(new CgSet(), false, tokenList.get(i).offset - 1, false, false, new CgSet(), false, -1, false);
        String exceptionString = null;
        if (!tokenList.get(i).barrier.isEmpty() || tokenList.get(i).negate) {
          exceptionString = getBarrierExceptionStringFromToken(tokenList.get(i));
        }
        CgSet newTarget = newToken.target;
        CgTag sentStartTag = new CgTag();
        sentStartTag.tag = SENT_START;
        newTarget.single_tags.add(sentStartTag);
        newToken.target = newTarget;
        newToken.postags = newToken.target.getPostagsString();
View Full Code Here

          if (tokenList.get(i).offset == tokenList.get(i-1).offset) {
            if (i == tokenList.size()-1) notdone = false;
            continue;
          }
          if (tokenList.get(i).offset != (tokenList.get(i-1).offset + 1) && tokenList.get(i).prevToken == null) {
            tokenList.add(i, new Token(new CgSet(), false, tokenList.get(i-1).offset + 1, false, false, new CgSet(), false, 0, false));
            break;
          }
        }
        if (i == tokenList.size() - 1) {
          notdone = false;
View Full Code Here

  /**
   * Helper that takes a normal contextual test (i.e. not a Parent or a Linked test, e.g. (1 Noun))
   * and returns the properly filled-out Token object
   */
  public Token getTokenFromNormalTest(CgContextualTest test) {
    CgSet testTarget = expandSetSets(grammar.getSet(test.target));
    boolean testCareful = test.pos.contains(POS.POS_CAREFUL.value);
    int testOffset = test.offset;
    boolean testScanAhead = test.pos.contains(POS.POS_SCANFIRST.value) && testOffset >= 0;
    boolean testScanBehind = test.pos.contains(POS.POS_SCANFIRST.value) && testOffset < 0;
    boolean testNot = test.pos.contains(POS.POS_NOT.value);
    CgSet testBarrier = grammar.getSet(test.barrier);
    CgSet testCBarrier = grammar.getSet(test.cbarrier);
    CgSet barrier = null;
    boolean cbarrier = false;
    if (testBarrier != null && testCBarrier != null) {
      System.err.println("Can't have both a barrier and a careful barrier");
      System.exit(1);
    }
    if (testBarrier != null) {
      barrier = testBarrier;
      cbarrier = false;
    } else if (testCBarrier != null) {
      barrier = testCBarrier;
      cbarrier = true;
    } else {
      barrier = new CgSet();
      cbarrier = false;
    }
    if (test.line == 548 && test.offset == 1) {
      System.out.println();
    }
View Full Code Here

  /**
   * takes a CgSet and, if it contains nested sets, expands them according to the proper
   * set operators (set_ops) and returns the new set.
   */
  public CgSet expandSetSets(CgSet set) {
    CgSet newSet = new CgSet();
    newSet.line = set.line;
    newSet.type = set.type;
    newSet.name  = set.name;
    if (set.sets.isEmpty()) {
      return set;
    }
    else if (set.sets.size() > 1 && set.set_ops.isEmpty()) {
      System.err.println("Error: something wonky with the set on line " + set.line);
      System.exit(1);
    }
    else if (set.set_ops.isEmpty()) {
      CgSet expandedSet = expandSetSets(grammar.getSet(set.sets.get(0)));
      for (CgCompositeTag ctag : expandedSet.tags) {
        newSet.tags.add(ctag);
      }
      for (CgTag tag : expandedSet.single_tags) {
        newSet.single_tags.add(tag);
      }
    }
    else {
      for (int op=0;op<set.set_ops.size();op++) {
        CgSet expandedSet1 = expandSetSets(grammar.getSet(set.sets.get(op)));
        CgSet expandedSet2 = expandSetSets(grammar.getSet(set.sets.get(op+1)));
        // Cartesian set product (+)
        if (set.set_ops.get(op) == 4) {
          for (CgTag tag : expandedSet1.single_tags) {
            for (CgTag tag2 : expandedSet2.single_tags) {
              if (tag.tag.equals(tag2.tag)) {
View Full Code Here

      // nothing
    }
   
    // copy constructor
    public Token(Token another) {
      this.target = new CgSet(another.target);
      this.postags = target.getPostagsString();
      this.surfaceforms = target.getSingleTagSurfaceformsString();
      this.baseforms = target.getSingleTagBaseformsString();
      this.compositeTags = target.getCompositeTags();
      this.careful = another.careful;
      this.offset = another.offset;
      this.scanahead = another.scanahead;
      this.scanbehind = another.scanbehind;
      this.barrier = new CgSet(another.barrier);
      this.cbarrier = another.cbarrier;
      this.skip = another.skip;
      this.negate = another.negate;
      this.nextToken = another.nextToken;
      this.prevToken = another.prevToken;
View Full Code Here

TOP

Related Classes of org.languagetool.dev.conversion.cg.CgSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.