Package edu.stanford.nlp.ling

Examples of edu.stanford.nlp.ling.IndexedWord


      }
      if (td1.reln() == KILL) {
        continue;
      }

      IndexedWord td1Dep = td1.dep();
      SortedSet<TypedDependency> possibles = map.get(td1Dep);
      if (possibles == null) {
        continue;
      }

      // look for the "second half"

      // unique: the head prep and whether it should be pobj
      Pair<TypedDependency, Boolean> prepDep = null;
      TypedDependency ccDep = null; // treat as unique
      // list of dep and prepOtherDep and pobj (or  pcomp)
      List<Triple<TypedDependency, TypedDependency, Boolean>> conjs = new ArrayList<Triple<TypedDependency, TypedDependency, Boolean>>();
      Set<TypedDependency> otherDtrs = new TreeSet<TypedDependency>();

      // first look for a conj(prep, prep) (there might be several conj relations!!!)
      boolean samePrepositionInEachConjunct = true;
      int conjIndex = -1;
      for (TypedDependency td2 : possibles) {
        if (td2.reln() == CONJUNCT) {
          IndexedWord td2Dep = td2.dep();
          String td2DepPOS = td2Dep.tag();
          if (td2DepPOS.equals("IN") || td2DepPOS.equals("TO")) {
            samePrepositionInEachConjunct = samePrepositionInEachConjunct && td2Dep.value().equals(td1Dep.value());
            Set<TypedDependency> possibles2 = map.get(td2Dep);
            boolean pobj = true;// default of collapsing preposition is prep_
            TypedDependency prepOtherDep = null;
            if (possibles2 != null) {
              for (TypedDependency td3 : possibles2) {
                IndexedWord td3Dep = td3.dep();
                String td3DepPOS = td3Dep.tag();
                // CDM Mar 2006: I put in disjunction here when I added in
                // PREPOSITIONAL_OBJECT. If it catches all cases, we should
                // be able to delete the DEPENDENT disjunct
                // maybe better to delete the DEPENDENT disjunct - it creates
                // problem with multiple prep (mcdm)
                if ((td3.reln() == PREPOSITIONAL_OBJECT || td3.reln() == PREPOSITIONAL_COMPLEMENT) && (!(td3DepPOS.equals("IN") || td3DepPOS.equals("TO"))) && prepOtherDep == null) {
                  prepOtherDep = td3;
                  if (td3.reln() == PREPOSITIONAL_COMPLEMENT) {
                    pobj = false;
                  }
                } else {
                  otherDtrs.add(td3);
                }
              }
            }
            if (conjIndex < td2Dep.index()) {
              conjIndex = td2Dep.index();
            }
            conjs.add(new Triple<TypedDependency, TypedDependency, Boolean>(td2, prepOtherDep, pobj));
          }
        }
      } // end td2:possibles

      if (conjs.isEmpty()) {
        continue;
      }

      // if we have a conj under a preposition dependency, we look for the other
      // parts

      String td1DepPOS = td1Dep.tag();
      for (TypedDependency td2 : possibles) {
        // we look for the cc linked to this conjDep
        // the cc dep must have an index smaller than the dep of conjDep
        if (td2.reln() == COORDINATION && td2.dep().index() < conjIndex) {
          ccDep = td2;
        } else {
          IndexedWord td2Dep = td2.dep();
          String td2DepPOS = td2Dep.tag();
          // System.err.println("prepDep find: td1.reln: " + td1.reln() +
          // "; td2.reln: " + td2.reln() + "; td1DepPos: " + td1DepPOS +
          // "; td2DepPos: " + td2DepPOS + "; index " + index +
          // "; td2.dep().index(): " + td2.dep().index());
          if ((td2.reln() == DEPENDENT || td2.reln() == PREPOSITIONAL_OBJECT || td2.reln() == PREPOSITIONAL_COMPLEMENT) && (td1DepPOS.equals("IN") || td1DepPOS.equals("TO") || td1DepPOS.equals("VBG")) && prepDep == null && (!(td2DepPOS.equals("RB") || td2DepPOS.equals("IN") || td2DepPOS.equals("TO")))) {
            // same index trick, in case we have multiple deps
            // I deleted this to see if it helped [cdm Jan 2010] &&
            // td2.dep().index() < index)
            prepDep = new Pair<TypedDependency, Boolean>(td2, td2.reln() != PREPOSITIONAL_COMPLEMENT);
          } else if (!inConjDeps(td2, conjs)) {// don't want to add the conjDep
            // again!
            otherDtrs.add(td2);
          }
        }
      }

      if (prepDep == null || ccDep == null) {
        continue; // we can't deal with it in the hairy prep/conj interaction
        // case!
      }

      if (DEBUG) {
        if (ccDep != null) {
          System.err.println("!! Conj and prep case:");
          System.err.println("  td1 (prep): " + td1);
          System.err.println("  Kids of td1 are: " + possibles);
          System.err.println("  prepDep: " + prepDep);
          System.err.println("  ccDep: " + ccDep);
          System.err.println("  conjs: " + conjs);
          System.err.println("  samePrepositionInEachConjunct: " + samePrepositionInEachConjunct);
          System.err.println("  otherDtrs: " + otherDtrs);
        }
      }

      // check if we have the same prepositions in the conjunction
      if (samePrepositionInEachConjunct) { // conjDep != null && prepOtherDep !=
        // null &&
        // OK, we have a conjunction over parallel PPs: Fred flew to Greece and
        // to Serbia.
        GrammaticalRelation reln = determinePrepRelation(map, vmod, td1, td1, prepDep.second());

        TypedDependency tdNew = new TypedDependency(reln, td1.gov(), prepDep.first().dep());
        newTypedDeps.add(tdNew);
        if (DEBUG) {
          System.err.println("PrepPoss Conj branch (two parallel PPs) adding: " + tdNew);
          System.err.println("  removing: " + td1 + "  " + prepDep + "  " + ccDep);
        }
        td1.setReln(KILL);// remember these are "used up"
        prepDep.first().setReln(KILL);
        ccDep.setReln(KILL);

        for (Triple<TypedDependency, TypedDependency, Boolean> trip : conjs) {
          TypedDependency conjDep = trip.first();
          TypedDependency prepOtherDep = trip.second();
          if (prepOtherDep == null) {
            // CDM July 2010: I think this should only ever happen if there is a
            // misparse, but it has happened in such circumstances. You have
            // something like (PP in or in (NP Serbia)), with the two
            // prepositions the same. We just clean up the mess.
            if (DEBUG) {
              System.err.println("  apparent misparse: same P twice with only one NP object (prepOtherDep is null)");
              System.err.println("  removing: " + conjDep);
            }
            ccDep.setReln(KILL);
          } else {
            TypedDependency tdNew2 = new TypedDependency(conjValue(ccDep.dep().value()), prepDep.first().dep(), prepOtherDep.dep());
            newTypedDeps.add(tdNew2);
            if (DEBUG) {
              System.err.println("  adding: " + tdNew2);
              System.err.println("  removing: " + conjDep + "  " + prepOtherDep);
            }
            prepOtherDep.setReln(KILL);
          }
          conjDep.setReln(KILL);
        }

        // promote dtrs that would be orphaned
        for (TypedDependency otd : otherDtrs) {
          if (DEBUG) {
            System.err.print("Changed " + otd);
          }
          otd.setGov(td1.gov());
          if (DEBUG) {
            System.err.println(" to " + otd);
          }
        }

        // Now we need to see if there are any TDs that will be "orphaned"
        // by this collapse. Example: if we have:
        // dep(drew, on)
        // dep(on, book)
        // dep(on, right)
        // the first two will be collapsed to on(drew, book), but then
        // the third one will be orphaned, since its governor no
        // longer appears. So, change its governor to 'drew'.
        // CDM Feb 2010: This used to not move COORDINATION OR CONJUNCT, but now
        // it does, since they're not automatically deleted
        // Some things in possibles may have already been changed, so check gov
        if (DEBUG) {
          System.err.println("td1: " + td1 + "; possibles: " + possibles);
        }
        for (TypedDependency td2 : possibles) {
          // if (DEBUG) {
          // System.err.println("[a] td2.reln " + td2.reln() + " td2.gov " +
          // td2.gov() + " td1.dep " + td1.dep());
          // }
          if (td2.reln() != KILL && td2.gov().equals(td1.dep())) { // && td2.reln()
            // != COORDINATION
            // && td2.reln()
            // != CONJUNCT
            if (DEBUG) {
              System.err.println("Changing " + td2 + " to have governor of " + td1 + " [a]");
            }
            td2.setGov(td1.gov());
          }
        }
        continue; // This one has been dealt with successfully
      } // end same prepositions

      // case of "Lufthansa flies to and from Serbia". Make it look like next
      // case :-)
      // that is, the prepOtherDep should be the same as prepDep !
      for (Triple<TypedDependency, TypedDependency, Boolean> trip : conjs) {
        if (trip.first() != null && trip.second() == null) {
          trip.setSecond(new TypedDependency(prepDep.first().reln(), trip.first().dep(), prepDep.first().dep()));
          trip.setThird(prepDep.second());
        }
      }

      // we have two different prepositions in the conjunction
      // in this case we need to add a node
      // "Bill jumped over the fence and through the hoop"
      // prep_over(jumped, fence)
      // conj_and(jumped, jumped)
      // prep_through(jumped, hoop)

      GrammaticalRelation reln = determinePrepRelation(map, vmod, td1, td1, prepDep.second());
      TypedDependency tdNew = new TypedDependency(reln, td1.gov(), prepDep.first().dep());
      newTypedDeps.add(tdNew);
      if (DEBUG) {
        System.err.println("ConjPP (different preps) adding: " + tdNew);
        System.err.println("  deleting: " + td1 + "  " + prepDep.first() + "  " + ccDep);
      }
      td1.setReln(KILL);// remember these are "used up"
      prepDep.first().setReln(KILL);
      ccDep.setReln(KILL);
      // so far we added the first prep grammatical relation

      int copyNumber = 1;
      for (Triple<TypedDependency, TypedDependency, Boolean> trip : conjs) {
        TypedDependency conjDep = trip.first();
        TypedDependency prepOtherDep = trip.second();
        boolean pobj = trip.third();
        // OK, we have a conjunction over different PPs
        // we create a new node;
        // in order to make a distinction between the original node and its copy
        // we add a "copy" entry in the CoreLabel
        // existence of copy key is checked at printing (toString method of
        // TypedDependency)
        IndexedWord label = td1.gov().makeCopy(copyNumber);
        copyNumber++;

        // now we add the conjunction relation between td1.gov and the copy
        // the copy has the same label as td1.gov() but is another TreeGraphNode
        TypedDependency tdNew2 = new TypedDependency(conjValue(ccDep.dep().value()), td1.gov(), label);
        newTypedDeps.add(tdNew2);

        // now we still need to add the second prep grammatical relation
        // between the copy and the dependent of the prepOtherDep node
        TypedDependency tdNew3;

        GrammaticalRelation reln2 = determinePrepRelation(map, vmod, conjDep, td1, pobj);
        tdNew3 = new TypedDependency(reln2, label, prepOtherDep.dep());
        newTypedDeps.add(tdNew3);

        if (DEBUG) {
          System.err.println("  adding: " + tdNew2 + "  " + tdNew3);
          System.err.println("  deleting: " + conjDep + "  " + prepOtherDep);
        }
        conjDep.setReln(KILL);
        prepOtherDep.setReln(KILL);

        // promote dtrs that would be orphaned
        for (TypedDependency otd : otherDtrs) {
          // special treatment for prepositions: the original relation is
          // likely to be a "dep" and we want this to be a "prep"
          if (otd.dep().tag().equals("IN")) {
            otd.setReln(PREPOSITIONAL_MODIFIER);
          }
          otd.setGov(td1.gov());
        }
      }

      // Now we need to see if there are any TDs that will be "orphaned" off
      // the first preposition
      // by this collapse. Example: if we have:
      // dep(drew, on)
      // dep(on, book)
      // dep(on, right)
      // the first two will be collapsed to on(drew, book), but then
      // the third one will be orphaned, since its governor no
      // longer appears. So, change its governor to 'drew'.
      // CDM Feb 2010: This used to not move COORDINATION OR CONJUNCT, but now
      // it does, since they're not automatically deleted
      for (TypedDependency td2 : possibles) {
        if (td2.reln() != KILL) { // && td2.reln() != COORDINATION &&
          // td2.reln() != CONJUNCT) {
          if (DEBUG) {
            System.err.println("Changing " + td2 + " to have governor of " + td1 + " [b]");
          }
          td2.setGov(td1.gov());
        }
      }
      // end for different prepositions
    } // for TypedDependency td1 : list

    // below here is the single preposition/possessor basic case!!
    for (TypedDependency td1 : list) {
      if (td1.reln() == KILL) {
        continue;
      }

      IndexedWord td1Dep = td1.dep();
      String td1DepPOS = td1Dep.tag();
      // find all other typedDeps having our dep as gov
      Set<TypedDependency> possibles = map.get(td1Dep);

      if (possibles != null && (td1.reln() == PREPOSITIONAL_MODIFIER || td1.reln() == POSSESSION_MODIFIER || td1.reln() == CONJUNCT)) {

        // look for the "second half"
        boolean pobj = true;// default for prep relation is prep_
        for (TypedDependency td2 : possibles) {
          if (td2.reln() != COORDINATION && td2.reln() != CONJUNCT) {

            IndexedWord td2Dep = td2.dep();
            String td2DepPOS = td2Dep.tag();
            if ((td1.reln() == POSSESSION_MODIFIER || td1.reln() == CONJUNCT)) {
              if (td2.reln() == POSSESSIVE_MODIFIER) {
                if ( ! map.containsKey(td2Dep)) {  // if 's has no kids of its own (it shouldn't!)
                  td2.setReln(KILL);
                }


  private static void collapseMultiWordPrep(Collection<TypedDependency> list, Collection<TypedDependency> newTypedDeps, String str_mwp0, String str_mwp1, String w_mwp0, String w_mwp1) {

    // first find the multiword_preposition: dep(mpw[0], mwp[1])
    // the two words should be next to another in the sentence (difference of
    // indexes = 1)
    IndexedWord mwp0 = null;
    IndexedWord mwp1 = null;
    TypedDependency dep = null;
    for (TypedDependency td : list) {
      if (td.gov().value().equalsIgnoreCase(w_mwp0) && td.dep().value().equalsIgnoreCase(w_mwp1) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
        mwp0 = td.gov();
        mwp1 = td.dep();
        dep = td;
      }
    }

    if (mwp0 == null) {
      return;
    }

    // now search for prep|advmod|dep|amod(gov, mwp0)
    IndexedWord governor = null;
    TypedDependency prep = null;
    for (TypedDependency td1 : list) {
      if ((td1.reln() == PREPOSITIONAL_MODIFIER || td1.reln() == ADVERBIAL_MODIFIER || td1.reln() == ADJECTIVAL_MODIFIER || td1.reln() == DEPENDENT || td1.reln() == MULTI_WORD_EXPRESSION) && td1.dep().equals(mwp0)) {
        // we found prep|advmod|dep|amod(gov, mwp0)
        prep = td1;

    Collection<TypedDependency> newTypedDeps = new ArrayList<TypedDependency>();

    for (String[] mwp : MULTIWORD_PREPS) {
      newTypedDeps.clear();

      IndexedWord mwp0 = null;
      IndexedWord mwp1 = null;
      IndexedWord governor = null;

      TypedDependency prep = null;
      TypedDependency dep = null;
      TypedDependency pobj = null;
      TypedDependency newtd = null;

    // first, loop over the prepositions for NP annotation
    for (String[] mwp : THREEWORD_PREPS) {
      newTypedDeps.clear();

      IndexedWord mwp0 = null;
      IndexedWord mwp1 = null;
      IndexedWord mwp2 = null;

      TypedDependency dep1 = null;
      TypedDependency dep2 = null;

      // first find the first part of the 3word preposition: dep(mpw[0], mwp[1])
      // the two words should be next to another in the sentence (difference of
      // indexes = 1)

      for (TypedDependency td : list) {
        if (td.gov().value().equalsIgnoreCase(mwp[0]) && td.dep().value().equalsIgnoreCase(mwp[1]) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
          mwp0 = td.gov();
          mwp1 = td.dep();
          dep1 = td;
        }
      }

      // find the second part of the 3word preposition: dep(mpw[1], mwp[2])
      // the two words should be next to another in the sentence (difference of
      // indexes = 1)

      for (TypedDependency td : list) {
        if (td.gov().equals(mwp1) && td.dep().value().equalsIgnoreCase(mwp[2]) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
          mwp2 = td.dep();
          dep2 = td;
        }
      }

      if (dep1 != null && dep2 != null) {

        // now search for prep(gov, mwp0)
        IndexedWord governor = null;
        TypedDependency prep = null;
        for (TypedDependency td1 : list) {
          if (td1.reln() == PREPOSITIONAL_MODIFIER && td1.dep().equals(mwp0)) {// we
            // found
            // prep(gov,
            // mwp0)
            prep = td1;
            governor = prep.gov();
          }
        }

        // search for the complement: pobj|pcomp(mwp2,X)

        TypedDependency pobj = null;
        TypedDependency newtd = null;
        for (TypedDependency td2 : list) {
          if (td2.reln() == PREPOSITIONAL_OBJECT && td2.gov().equals(mwp2)) {
            pobj = td2;
            // create the new gr relation
            GrammaticalRelation gr = EnglishGrammaticalRelations.getPrep(mwp[0] + '_' + mwp[1] + '_' + mwp[2]);
            if (governor != null) {
              newtd = new TypedDependency(gr, governor, pobj.dep());
            }
          }
          if (td2.reln() == PREPOSITIONAL_COMPLEMENT && td2.gov().equals(mwp2)) {
            pobj = td2;
            // create the new gr relation
            GrammaticalRelation gr = EnglishGrammaticalRelations.getPrepC(mwp[0] + '_' + mwp[1] + '_' + mwp[2]);
            if (governor != null) {
              newtd = new TypedDependency(gr, governor, pobj.dep());
            }
          }
        }

        // only if we found the governor and complement parts, set to KILL and
        // remove
        // and add the new one
        if (prep != null && pobj != null && newtd != null) {
          prep.setReln(KILL);
          dep1.setReln(KILL);
          dep2.setReln(KILL);
          pobj.setReln(KILL);
          newTypedDeps.add(newtd);

          // now remove typed dependencies with reln "kill"
          // and promote possible orphans
          for (TypedDependency td1 : list) {
            if (td1.reln() != KILL) {
              if (td1.gov().equals(mwp0) || td1.gov().equals(mwp1) || td1.gov().equals(mwp2)) {
                td1.setGov(governor);
              }
              if (!newTypedDeps.contains(td1)) {
                newTypedDeps.add(td1);
              }
            }
          }
          list.clear();
          list.addAll(newTypedDeps);
        }
      }
    }

    // second, loop again looking at flat annotation
    for (String[] mwp : THREEWORD_PREPS) {
      newTypedDeps.clear();

      IndexedWord mwp0 = null;
      IndexedWord mwp1 = null;
      IndexedWord mwp2 = null;

      TypedDependency dep1 = null;
      TypedDependency dep2 = null;

      // first find the first part of the 3word preposition: dep(mpw[0], mwp[1])
      // the two words should be next to another in the sentence (difference of
      // indexes = 1)
      for (TypedDependency td : list) {
        if (td.gov().value().equalsIgnoreCase(mwp[0]) && td.dep().value().equalsIgnoreCase(mwp[1]) && Math.abs(td.gov().index() - td.dep().index()) == 1) {
          mwp0 = td.gov();
          mwp1 = td.dep();
          dep1 = td;
        }
      }

      // find the second part of the 3word preposition: dep(mpw[0], mwp[2])
      // the two words should be one word apart in the sentence (difference of
      // indexes = 2)
      for (TypedDependency td : list) {
        if (td.gov().equals(mwp0) && td.dep().value().equalsIgnoreCase(mwp[2]) && Math.abs(td.gov().index() - td.dep().index()) == 2) {
          mwp2 = td.dep();
          dep2 = td;
        }
      }

      if (dep1 != null && dep2 != null) {

        // now search for prep(gov, mwp0)
        IndexedWord governor = null;
        TypedDependency prep = null;
        for (TypedDependency td1 : list) {
          if (td1.dep().equals(mwp0) && td1.reln() == PREPOSITIONAL_MODIFIER) {// we
            // found
            // prep(gov,

    Collection<TypedDependency> newTypedDeps = new ArrayList<TypedDependency>();

    for (String[] mwp : MULTIWORD_PREPS) {
      newTypedDeps.clear();

      IndexedWord mwp1 = null;
      IndexedWord governor = null;

      TypedDependency prep = null;
      TypedDependency dep = null;
      TypedDependency pobj = null;

   */
  private static void eraseMultiConj(Collection<TypedDependency> list) {
    // find typed deps of form cc(gov, x)
    for (TypedDependency td1 : list) {
      if (td1.reln() == COORDINATION) {
        IndexedWord x = td1.dep();
        // find typed deps of form dep(x,y) and kill them
        for (TypedDependency td2 : list) {
          if (td2.gov().equals(x) && (td2.reln() == DEPENDENT || td2.reln() == MULTI_WORD_EXPRESSION || td2.reln() == COORDINATION ||
                  td2.reln() == ADVERBIAL_MODIFIER || td2.reln() == NEGATION_MODIFIER || td2.reln() == AUX_MODIFIER)) {
            td2.setReln(KILL);

  private static void removeDep(Collection<TypedDependency> list) {
    Set<GrammaticalRelation> prepRels = Generics.newHashSet(EnglishGrammaticalRelations.getPreps());
    prepRels.addAll(EnglishGrammaticalRelations.getPrepsC());
    for (TypedDependency td1 : list) {
      if (prepRels.contains(td1.reln())) { // if we have a prep_ relation
        IndexedWord gov = td1.gov();
        IndexedWord dep = td1.dep();

        for (TypedDependency td2 : list) {
          if (td2.reln() == DEPENDENT && td2.gov().equals(gov) && td2.dep().equals(dep)) {
            td2.setReln(KILL);
          }

    @Override
    public boolean test(TypedDependency d) {
      if (d == null) return false;

      IndexedWord l = d.dep();
      if (l == null) return false;

      return npf.test(l.value());
    }

      for(CorefCluster entity : entities.values()){
        for(Mention mention : entity.getCorefMentions()){
          // Ignore verbal mentions
          if(mention.headWord.tag().startsWith("V")) continue;

          IndexedWord head = mention.dependency.getNodeByIndexSafe(mention.headWord.index());
          if(head == null) continue;         
          ArrayList<String> feats = mention.getSingletonFeatures(dict);
          dataset.add(new BasicDatum<String, String>(feats, "1"));
        }      
      }

      // Generate features for singletons with class label 0
      ArrayList<CoreLabel> gold_heads = new ArrayList<CoreLabel>();
      for(Mention gold_men : document.allGoldMentions.values()){
        gold_heads.add(gold_men.headWord);
      }     
      for(Mention predicted_men : document.allPredictedMentions.values()){
        SemanticGraph dep = predicted_men.dependency;
        IndexedWord head = dep.getNodeByIndexSafe(predicted_men.headWord.index());
        if(head == null) continue;

        // Ignore verbal mentions
        if(predicted_men.headWord.tag().startsWith("V")) continue;
        // If the mention is in the gold set, it is not a singleton and thus ignore

      if (grel == null) {
        throw new RuntimeException("Unknown grammatical relation '" + depName+"'");
      }

      //Word govWord = new Word(govName.substring(0, govDash));
      IndexedWord govWord = new IndexedWord();
      govWord.setValue(normalizeNumbers(govName));
      govWord.setWord(govWord.value());
      //Word childWord = new Word(childName.substring(0, childDash));
      IndexedWord childWord = new IndexedWord();
      childWord.setValue(normalizeNumbers(childName));
      childWord.setWord(childWord.value());
      TypedDependency dep = new TypedDependencyStringEquality(grel, govWord, childWord);
      deps.add(dep);
      } catch (Exception e) {
        breader.close();
        throw new RuntimeException("Error on line "+breader.getLineNumber()+":\n\n"+e);

TOP

Related Classes of edu.stanford.nlp.ling.IndexedWord

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.