Package edu.stanford.nlp.util

Examples of edu.stanford.nlp.util.Pair$ByFirstPairComparator


  private static final String HACER_TIME_FORM = "(?i)^(?:hac(?:er|ía))$";

  @SuppressWarnings("unchecked")
  private void buildAnnotations() {
    // +.25 F1
    annotations.put("-markInf", new Pair("/^(S|grup\\.verb|infinitiu|gerundi)/ < @infinitiu",
                                         new SimpleStringFunction("-infinitive")));
    annotations.put("-markGer", new Pair("/^(S|grup\\.verb|infinitiu|gerundi)/ < @gerundi",
                                         new SimpleStringFunction("-gerund")));

    // +.04 F1
    annotations.put("-markRelative", new Pair("@S <, @relatiu",
                                              new SimpleStringFunction("-relative")));

    // Negative F1; unused in default config
    annotations.put("-markPPHeads", new Pair("@sp",
                                             new AnnotateHeadFunction(headFinder)));

    // +.1 F1
    annotations.put("-markComo", new Pair("@cs < /(?i)^como$/",
                                          new SimpleStringFunction("[como]")));
    annotations.put("-markSpecHeads", new Pair("@spec", new AnnotateHeadFunction(headFinder)));

    // +.32 F1
    annotations.put("-markSingleChildNPs", new Pair("/^(sn|grup\\.nom)/ <: __",
                                                    new SimpleStringFunction("-singleChild")));

    // +.05 F1
    annotations.put("-markPPFriendlyVerbs", new Pair("/^v/ > /^grup\\.prep/",
                                                     new SimpleStringFunction("-PPFriendly")));

    // +.46 F1
    annotations.put("-markConjTypes", new Pair("@conj <: /^c[cs]/=c", new MarkConjTypeFunction()));

    // +.09 F1
    annotations.put("-markPronounNPs", new Pair("/^(sn|grup\\.nom)/ <<: /^p[0p]/",
                                                new SimpleStringFunction("-pronoun")));

    // +1.39 F1
    annotations.put("-markParticipleAdjs", new Pair(
      "@aq0000 < /(?i)([aeií]d|puest|biert|vist|(ben|mal)dit|[fh]ech|scrit|muert|[sv]uelt|[rl]ect|"
        + "frit|^(rot|dich|impres|desnud|sujet|exent))[oa]s?$/",
      new SimpleStringFunction("-part")));

    // Negative F1; unused in default config
    annotations.put("-markSentenceInitialClauses", new Pair("@S !, __",
                                                            new SimpleStringFunction("-init")));

    // Insignificant F1; unused in default config
    annotations.put("-markPoder", new Pair(
      String.format("/^(infinitiu|gerundi|grup\\.verb)/ <<: /%s/", PODER_FORM),
      new SimpleStringFunction("-poder")));

    // +.29 F1
    annotations.put("-markBaseNPs", new Pair("/^grup\\.nom/ !< (__ < (__ < __))",
                                             new SimpleStringFunction("-base")));

    // +.17 F1
    annotations.put("-markVerbless", new Pair("@S|sentence !<< /^(v|participi$)/",
                                              new SimpleStringFunction("-verbless")));

    // +.23 F1
    annotations.put("-markDominatesVerb", new Pair("__ << (/^(v|participi$)/ < __)",
                                                   new SimpleStringFunction("-dominatesV")));

    // Negative F1 -- not used by default
    annotations.put("-markNonRecSPs", new Pair("@sp !<< @sp", new SimpleStringFunction("-nonRec")));

    // In right-recursive verb phrases, mark the prefix of the first verb on its tag.
    // This annotation tries to capture the fact that only a few roots are ever really part of
    // these constructions: poder, deber, ir, etc.
    annotations.put("-markRightRecVPPrefixes",
                    new Pair("/^v/ $+ @infinitiu|gerundi >, /^(grup.verb|infinitiu|gerundi)/",
                             new MarkPrefixFunction(3)));


    // Negative F1 -- not used by default
    annotations.put("-markParentheticalNPs", new Pair("@sn <<, fpa <<` fpt",
                                                      new SimpleStringFunction("-paren")));
    annotations.put("-markNumericNPs", new Pair("@sn << (/^z/ < __) !<< @sn",
                                                new SimpleStringFunction("-num")));

    // Negative F1 -- not used by default
    annotations.put("-markCoordinatedNPs", new Pair(
      "@sn <, (/^(sn|grup\\.nom)/ $+ (@conj < /^(cc|grup\\.cc)/ $+ /^(sn|grup\\.nom)/=last))" +
        "<` =last",
      new SimpleStringFunction("-coord")));

    annotations.put("-markHacerTime", new Pair(
      String.format("/^vm/ < /%s/ $+ /^d/", HACER_TIME_FORM),
      new SimpleStringFunction("-hacerTime")));

    compileAnnotations(headFinder);
  }
View Full Code Here


            // e.getKey(), label, true, null, null, dictOddsWordWeights);
            // throw new RuntimeException("not implemented yet");
          }

          if (useFreqPhraseExtractedByPat)
            score = score * scoringFunction.apply(new Pair(en.getKey(), word));
          if (constVars.sqrtPatScore)
            patterns.incrementCount(en.getKey(), Math.sqrt(score));
          else
            patterns.incrementCount(en.getKey(), score);
        }
View Full Code Here

          numrand++;
          chooseThis = true;
        } else
          chooseThis = false;
        if (chooseThis) {
          chosen.add(new Pair(en.getKey(), i));
          RVFDatum<String, String> d = getDatum(sent, i);
          dataset.add(d, en.getKey(), Integer.toString(i));
        }
      }
    }
View Full Code Here

    if (arcs.contains(a)) {
      return false;
    }
    // it's new, so add to the rest of the data structures
    // add to source and input map
    Pair p = Generics.newPair(source, input);
    if (arcsBySourceAndInput.containsKey(p) && checkDeterminism) {
      throw new RuntimeException("Creating nondeterminism while inserting arc " + a + " because it already has arc " + arcsBySourceAndInput.get(p) + checkDeterminism);
    }
    arcsBySourceAndInput.put(p, a);
    Maps.putIntoValueHashSet(arcsBySource, source, a);
View Full Code Here

    // remove from arcs
    if (!arcs.remove(a)) {
      return false;
    }
    // remove from arcsBySourceAndInput
    Pair p = Generics.newPair(source, input);
    if (!arcsBySourceAndInput.containsKey(p)) {
      return false;
    }
    arcsBySourceAndInput.remove(p);
    // remove from arcsBySource
View Full Code Here

    Arc a = new Arc(source, target, input, output);
    if (arcs.contains(a)) // inexpensive check
    {
      return false;
    }
    Pair p = Generics.newPair(source, input);
    return !arcsBySourceAndInput.containsKey(p); // expensive check
  }
View Full Code Here

  public static class InputSplittingProcessor implements ArcProcessor {
    @Override
    public Arc processArc(Arc a) {
      a = new Arc(a);
      Pair p = (Pair) a.getInput();
      a.setInput(p.first);
      a.setOutput(p.second);
      return a;
    }
View Full Code Here

  @SuppressWarnings("unchecked")
  public void testSeparatePronouns() {
    List<String> pronouns = new ArrayList<String>();
    pronouns.add("me");
    assertEquals(new Pair("decir", pronouns),
                 verbStripper.separatePronouns("decirme"));

    // Should match capitalized verbs as well
    assertEquals(new Pair("Decir", pronouns),
      verbStripper.separatePronouns("Decirme"));

    pronouns.clear();
    pronouns.add("se");
    pronouns.add("lo");
    assertEquals(new Pair("contando", pronouns),
                 verbStripper.separatePronouns("contándoselo"));

    pronouns.clear();
    pronouns.add("se");
    pronouns.add("les");
    assertEquals(new Pair("aplicar", pronouns),
      verbStripper.separatePronouns("aplicárseles"));

    // Don't treat plural past participles as 2nd-person commands!
    Pair<String, List<String>> l = verbStripper.separatePronouns("sentados");
    assertNull(l);

    pronouns.clear();
    pronouns.add("os");
    assertEquals(new Pair("sentad", pronouns),
      verbStripper.separatePronouns("sentaos"));

    pronouns.clear();
    pronouns.add("se");
    assertEquals(new Pair("Imaginen", pronouns),
      verbStripper.separatePronouns("Imagínense"));

    // Match elided 1P verb forms
    pronouns.clear();
    pronouns.add("nos");
    assertEquals(new Pair("vamos", pronouns),
                 verbStripper.separatePronouns("vámonos"));

    // Let's write it to her
    pronouns.clear();
    pronouns.add("se");
    pronouns.add("la");
    assertEquals(new Pair("escribamos", pronouns),
                 verbStripper.separatePronouns("escribámosela"));

    // Looks like a verb with a clitic pronoun.. but it's not! There are
    // a *lot* of these in Spanish.
    assertNull(verbStripper.separatePronouns("címbalo"));
View Full Code Here

        //   double kl = cntr2.klDivergence(tempCounter);
        /* alternative 2 ends here */

        String annotatedLabel = label + "=l=" + sis;
        System.out.println("KL(" + annotatedLabel + "||" + label + ") = " + nf.format(kl) + "\t" + "support(" + sis + ") = " + support2);
        answers.add(new Pair(annotatedLabel, new Double(kl * support2)));
        topScores.add(new Pair(annotatedLabel, new Double(kl * support2)));
      }

      for (Iterator it2 = ((HashMap) rightRules.get(label)).keySet().iterator(); it2.hasNext();) {
        String sis = (String) it2.next();
        ClassicCounter cntr2 = (ClassicCounter) ((HashMap) rightRules.get(label)).get(sis);
        double support2 = (cntr2.totalCount());
        double kl = Counters.klDivergence(cntr2, cntr);
        String annotatedLabel = label + "=r=" + sis;
        System.out.println("KL(" + annotatedLabel + "||" + label + ") = " + nf.format(kl) + "\t" + "support(" + sis + ") = " + support2);
        answers.add(new Pair(annotatedLabel, new Double(kl * support2)));
        topScores.add(new Pair(annotatedLabel, new Double(kl * support2)));
      }

       
      // upto
       
      System.out.println("----");
      System.out.println("Sorted descending support * KL");
      Collections.sort(answers, (o1, o2) -> {
        Pair p1 = (Pair) o1;
        Pair p2 = (Pair) o2;
        Double p12 = (Double) p1.second();
        Double p22 = (Double) p2.second();
        return p22.compareTo(p12);
      });
      for (int i = 0, size = answers.size(); i < size; i++) {
        Pair p = (Pair) answers.get(i);
        double psd = ((Double) p.second()).doubleValue();
        System.out.println(p.first() + ": " + nf.format(psd));
        if (psd >= CUTOFFS[0]) {
          String annotatedLabel = (String) p.first();
          for (int j = 0; j < CUTOFFS.length; j++) {
            if (psd >= CUTOFFS[j]) {
              //javaSB[j].append("\"").append(annotatedLabel);
              //javaSB[j].append("\",");
            }
          }
        }
      }
      System.out.println();
    }


    Collections.sort(topScores, (o1, o2) -> {
      Pair p1 = (Pair) o1;
      Pair p2 = (Pair) o2;
      Double p12 = (Double) p1.second();
      Double p22 = (Double) p2.second();
      return p22.compareTo(p12);
    });
    String outString = "All enriched categories, sorted by score\n";
    for (int i = 0, size = topScores.size(); i < size; i++) {
      Pair p = (Pair) topScores.get(i);
      double psd = ((Double) p.second()).doubleValue();
      System.out.println(p.first() + ": " + nf.format(psd));
    }


    System.out.println();
    System.out.println("  // Automatically generated by SisterAnnotationStats -- preferably don't edit");
    int k = CUTOFFS.length - 1;
    for (int j = 0; j < topScores.size(); j++) {
      Pair p = (Pair) topScores.get(j);
      double psd = ((Double) p.second()).doubleValue();
      if (psd < CUTOFFS[k]) {
        if (k == 0) {
          break;
        } else {
          k--;
          j -= 1; // messy but should do it
          continue;
        }
      }
      javaSB[k].append("\"").append(p.first());
      javaSB[k].append("\",");
    }


    for (int i = 0; i < CUTOFFS.length; i++) {
View Full Code Here

      }
      System.out.println("----");
      System.out.println("Sorted descending support * KL");
      Collections.sort(answers, (o1, o2) -> o2.second().compareTo(o1.second()));
      for (int i = 0, size = answers.size(); i < size; i++) {
        Pair p = (Pair) answers.get(i);
        double psd = ((Double) p.second()).doubleValue();
        System.out.println(p.first() + ": " + nf.format(psd));
        if (psd >= CUTOFFS[0]) {
          List lst = (List) p.first();
          String nd = (String) lst.get(0);
          String par = (String) lst.get(1);
          for (int j = 0; j < CUTOFFS.length; j++) {
            if (psd >= CUTOFFS[j]) {
              javaSB[j].append("\"").append(nd).append("^");
              javaSB[j].append(par).append("\", ");
            }
          }
        }
      }
      System.out.println();
    }

    /*
          // do value of parent with info gain -- yet to finish this
          for (Iterator it = nodeRules.entrySet().iterator(); it.hasNext(); ) {
              Map.Entry pair = (Map.Entry) it.next();
              String node = (String) pair.getKey();
              Counter cntr = (Counter) pair.getValue();
              double support = (cntr.totalCount());
              System.out.println("Node " + node + " support is " + support);
              ArrayList dtrs = new ArrayList();
              for (Iterator it2 = pRules.entrySet().iterator(); it2.hasNext();) {
                  HashMap annotated = new HashMap();
                  Map.Entry pair2 = (Map.Entry) it2.next();
                  List node2 = (List) pair2.getKey();
                  Counter cntr2 = (Counter) pair2.getValue();
                  if (node2.get(0).equals(node)) {   // only do it if they match
                      annotated.put(node2, cntr2);
                  }
              }

              // upto

              List answers = new ArrayList();
              System.out.println("----");
              System.out.println("Sorted descending support * KL");
              Collections.sort(answers,
                               new Comparator() {
                                   public int compare(Object o1, Object o2) {
                                       Pair p1 = (Pair) o1;
                                       Pair p2 = (Pair) o2;
                                       Double p12 = (Double) p1.second();
                                       Double p22 = (Double) p2.second();
                                       return p22.compareTo(p12);
                                   }
                               });
              for (int i = 0, size = answers.size(); i < size; i++) {
                  Pair p = (Pair) answers.get(i);
                  double psd = ((Double) p.second()).doubleValue();
                  System.out.println(p.first() + ": " + nf.format(psd));
                  if (psd >= CUTOFFS[0]) {
                      List lst = (List) p.first();
                      String nd = (String) lst.get(0);
                      String par = (String) lst.get(1);
                      for (int j=0; j < CUTOFFS.length; j++) {
                          if (psd >= CUTOFFS[j]) {
                              javaSB[j].append("\"").append(nd).append("^");
                              javaSB[j].append(par).append("\", ");
                          }
                      }
                  }
              }
              System.out.println();
          }
    */

    // do value of grandparent
    for (List<String> node : pRules.keySet()) {
      ArrayList<Pair<List<String>, Double>> answers = Generics.newArrayList();
      ClassicCounter<List<String>> cntr = pRules.get(node);
      double support = (cntr.totalCount());
      if (support < SUPPCUTOFF) {
        continue;
      }
      System.out.println("Node " + node + " support is " + support);
      for (List<String> key : gPRules.keySet()) {
        if (key.get(0).equals(node.get(0)) && key.get(1).equals(node.get(1))) {  // only do it if they match
          ClassicCounter<List<String>> cntr2 = gPRules.get(key);
          double support2 = (cntr2.totalCount());
          double kl = Counters.klDivergence(cntr2, cntr);
          System.out.println("KL(" + key + "||" + node + ") = " + nf.format(kl) + "\t" + "support(" + key + ") = " + support2);
          double score = kl * support2;
          answers.add(Pair.makePair(key, new Double(score)));
          allScores.setCount(key,score);
        }
      }
      System.out.println("----");
      System.out.println("Sorted descending support * KL");
      Collections.sort(answers, (o1, o2) -> o2.second().compareTo(o1.second()));
      for (int i = 0, size = answers.size(); i < size; i++) {
        Pair p = (Pair) answers.get(i);
        double psd = ((Double) p.second()).doubleValue();
        System.out.println(p.first() + ": " + nf.format(psd));
        if (psd >= CUTOFFS[0]) {
          List lst = (List) p.first();
          String nd = (String) lst.get(0);
          String par = (String) lst.get(1);
          String gpar = (String) lst.get(2);
          for (int j = 0; j < CUTOFFS.length; j++) {
            if (psd >= CUTOFFS[j]) {
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.util.Pair$ByFirstPairComparator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.