Source Code of edu.stanford.nlp.sempre.fbalignment.lexicons.LexicalEntry$LexicalEntrySerializer

package edu.stanford.nlp.sempre.fbalignment.lexicons;


import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import edu.stanford.nlp.sempre.Formula;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.util.StringUtils;
import fig.basic.LispTree;
import fig.basic.MapUtils;


import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;


public class LexicalEntry {


  public String textDescription; //the query as submitted to the lexicon
  public String normalizedTextDesc; //the query after normalization
  public Set<String> fbDescriptions; //descriptions matching the formula
  public Formula formula;
  public EntrySource source;
  public double popularity;
  public double distance;


  public LexicalEntry(String textDescription, String normalizedTextDesc, Set<String> fbDescriptions, Formula formula, EntrySource source, double popularity, double distance) {
    this.textDescription = textDescription;
    this.normalizedTextDesc = normalizedTextDesc;
    this.fbDescriptions = fbDescriptions;
    this.formula = formula;
    this.source = source;
    this.popularity = popularity;
    this.distance = distance;
  }


  public Formula getFormula() {
    return formula;
  }


  public double getPopularity() {
    return popularity;
  }


  public double getDistance() {
    return distance;
  }


  private String stringRepn;
  public String toString() {
    if (stringRepn == null) {
      stringRepn = textDescription + " (" + normalizedTextDesc + ")" +
          ", FB: " + fbDescriptions +
          ", formula: " + formula +
          ", source: " + source +
          ", popularity: " + popularity +
          ", distance: " + distance;
    }
    return stringRepn;
  }


  public static int computeEditDistance(String query, Set<String> descriptions) {


    int distance = Integer.MAX_VALUE;
    for (String description : descriptions) {
      int currDistance = StringUtils.editDistance(query, description.toLowerCase());
      if (currDistance < distance) {
        distance = currDistance;
      }
    }
    return Math.min(15, distance);
  }


  public static class BinaryLexicalEntry extends LexicalEntry {


    public String expectedType1;
    public String expectedType2;
    public String unitId = "";
    public String unitDescription = "";
    public Map<String,Double> alignmentScores;
    public String fullLexeme; //the lexeme as it is in the alignment without some normalization applied before uploading the lexicon


    public BinaryLexicalEntry(String textDescription, String normalizedTextDesc, Set<String> fbDescriptions, Formula formula,
        EntrySource source, double popularity, String expectedType1, String expectedType2, Map<String,Double> alignmentScores, String fullLexeme) {
      super(textDescription, normalizedTextDesc, fbDescriptions, formula, source, popularity, computeEditDistance(textDescription, fbDescriptions));
      this.expectedType1 = expectedType1;
      this.expectedType2 = expectedType2;
      this.alignmentScores = alignmentScores;
      this.fullLexeme = fullLexeme;
    }


    public BinaryLexicalEntry(String textDescription, String normalizedTextDesc, Set<String> fbDescriptions, Formula formula, EntrySource source, double popularity,
        String expectedType1, String expectedType2, String unitId, String unitDesc, Map<String,Double> alignmentScores, String fullLexeme) {
      super(textDescription, normalizedTextDesc, fbDescriptions, formula, source, popularity, computeEditDistance(textDescription, fbDescriptions));
      this.expectedType1 = expectedType1;
      this.expectedType2 = expectedType2;
      this.unitId = unitId;
      this.unitDescription = unitDesc;
      this.alignmentScores = alignmentScores;
      this.fullLexeme = fullLexeme;
      assert (fullLexeme.contains(normalizedTextDesc));
    }


    public boolean identicalFormulaInfo(Object other) {
      if (!(other instanceof BinaryLexicalEntry))
        return false;
      BinaryLexicalEntry otherBinary = (BinaryLexicalEntry) other;


      if (!formula.equals(otherBinary.formula))
        return false;
      if (Math.abs(popularity - otherBinary.popularity) > 0.000001)
        return false;
      if (!expectedType1.equals(otherBinary.expectedType1))
        return false;
      if (!expectedType2.equals(otherBinary.expectedType2))
        return false;
      if (!unitId.equals(otherBinary.unitId))
        return false;
      if (!unitDescription.equals(otherBinary.unitDescription))
        return false;
      return true;
    }


    public String getExpectedType1() {
      return expectedType1;
    }


    public String getExpectedType2() {
      return expectedType2;
    }


    public String getUnitId() {
      return unitId;
    }


    public String getUnitDescription() {
      return unitDescription;
    }


    private String stringRepn;
    public String toString() {
      if (stringRepn == null) {
        StringBuilder sb = new StringBuilder();
        sb.append(super.toString());
        sb.append(", " + expectedType1 + " x " + expectedType2);
        if (unitId != null) {
          sb.append(", " + unitId + ":" + unitDescription);
        }
        if (alignmentScores.size() > 0)
          sb.append(", " + alignmentScores);
        stringRepn = sb.toString();
      }
      return stringRepn;
    }


    public boolean isFullLexemeEqualToNormalizedText() {
      return fullLexeme.equals(normalizedTextDesc);
    }


    public String[] getLeftContext() {
      if (fullLexeme.startsWith(normalizedTextDesc))
        return new String[]{};
      String leftContext = fullLexeme.substring(0, fullLexeme.indexOf(normalizedTextDesc)).trim();
      return leftContext.split("\\s+");
    }


    public String[] getRightContext() {
      if (fullLexeme.endsWith(normalizedTextDesc))
        return new String[]{};
      String rightContext = fullLexeme.substring(fullLexeme.indexOf(normalizedTextDesc) + normalizedTextDesc.length()).trim();
      return rightContext.split("\\s+");
    }


    public double jaccard() {
      double intersection = MapUtils.getDouble(alignmentScores, BinaryLexicon.INTERSECTION, 0.0); 
      if (intersection < 2.01)
        intersection = 0.0;
      double nlSize = MapUtils.getDouble(alignmentScores, BinaryLexicon.NL_TYPED, 0.0); 
      double fbSize = MapUtils.getDouble(alignmentScores, BinaryLexicon.FB_TYPED, 0.0);
      return intersection / (nlSize + fbSize - intersection + 5);
    }


    /** rids all counts and keeps jaccard only */
    public void retainJaccardOnly() {
      double jaccard = jaccard();
      alignmentScores.clear();
      alignmentScores.put("jaccard", jaccard);
    }
  }


  public static class EntityLexicalEntry extends LexicalEntry {


    public Set<String> types = new HashSet<String>();
    public Counter<String> tokenEditDistanceFeatures;


    public EntityLexicalEntry(String textDescription, String normalizedTextDesc, Set<String> fbDescriptions, Formula formula, EntrySource source, double popularity, double distance, Set<String> types, Counter<String> tokenEditDistanceFeatures) {
      super(textDescription, normalizedTextDesc, fbDescriptions, formula, source, popularity, distance);
      this.types = types;
      this.tokenEditDistanceFeatures = tokenEditDistanceFeatures;
    }


    public Set<String> getTypes() {
      return types;
    }


    public String toString() {
      return super.toString() + ", " + types;
    }
  }


  public static class UnaryLexicalEntry extends LexicalEntry {


    public Set<String> types = new HashSet<String>();
    public Map<String,Double> alignmentScores;


    public UnaryLexicalEntry(String textDescription, String normalizedTextDesc, Set<String> fbDescriptions, Formula formula, EntrySource source, double popularity,
        Map<String,Double> alignmentScores, Set<String> types) {
      super(textDescription, normalizedTextDesc, fbDescriptions, formula, source, popularity, computeEditDistance(textDescription, fbDescriptions));
      this.types = types;
      this.alignmentScores = alignmentScores;
    }


    public Set<String> getTypes() {
      return types;
    }


    String stringRepn;
    public String toString() {
      if (stringRepn == null)
        stringRepn = super.toString() + ", " + types;
      return stringRepn;
    }
  }


  /**
   * Holds the essential parts of a value in a lexicon
   * @author jonathanberant
   *
   */
  public static class LexiconValue {


    @JsonProperty public String lexeme;
    @JsonProperty public Formula formula;
    @JsonProperty public String source;
    @JsonProperty public Map<String,Double> features;


    @JsonCreator
    public LexiconValue(@JsonProperty("normLexeme") String lexeme,
        @JsonProperty("formula") Formula formula,
        @JsonProperty("source") String source,
        @JsonProperty("features") Map<String,Double> features) {
      this.lexeme = lexeme;
      this.formula = formula;
      this.source = source;
      this.features = features;
    }
  }


  public static class LexicalEntrySerializer {
    // Utilities that should move into fig later.
    static Counter<String> counterFromLispTree(LispTree tree) {
      Counter<String> counter = new ClassicCounter<String>();
      for (int i = 0; i < tree.children.size(); i++)
        counter.incrementCount(tree.child(i).child(0).value, Double.parseDouble(tree.child(i).child(1).value));
      return counter;
    }
    static LispTree counterToLispTree(Counter<String> counter) {
      LispTree tree = LispTree.proto.newList();
      for (String feature : counter.keySet())
        tree.addChild(LispTree.proto.newList(feature, "" + counter.getCount(feature)));
      return tree;
    }


    static Map<String,Double> featureMapFromLispTree(LispTree tree) {
      Map<String,Double> featureMap = new TreeMap<String,Double>();
      for (int i = 0; i < tree.children.size(); i++)
        featureMap.put(tree.child(i).child(0).value, Double.parseDouble(tree.child(i).child(1).value));
      return featureMap;
    }


    static LispTree featureMapToLispTree(Map<String,Double> featureMap) {
      LispTree tree = LispTree.proto.newList();
      for (String feature : featureMap.keySet())
        tree.addChild(LispTree.proto.newList(feature, "" + featureMap.get(feature)));
      return tree;
    }




    static Set<String> setFromLispTree(LispTree tree) {
      Set<String> set = new HashSet<String>();
      for (int i = 0; i < tree.children.size(); i++)
        set.add(tree.child(i).value);
      return set;
    }
    static LispTree setToLispTree(Set<String> set) {
      LispTree tree = LispTree.proto.newList();
      for (String x : set)
        tree.addChild(x);
      return tree;
    }


    static String[] stringArrayFromLispTree(LispTree tree) {
      String[] result = new String[tree.children.size()];
      for (int i = 0; i < tree.children.size(); i++)
        result[i] = tree.child(i).value;
      return result;
    }
    static LispTree stringArrayToLispTree(String[] array) {
      LispTree tree = LispTree.proto.newList();
      for (String x : array)
        tree.addChild(x);
      return tree;
    }


    public static LexicalEntry entryFromLispTree(LispTree tree) {
      int i = 1;
      if (tree.child(0).value.equals("entity")) {


        String textDescription = tree.child(i++).value;
        String normalizedTextDesc = tree.child(i++).value;
        Set<String> fbDescriptions = setFromLispTree(tree.child(i++));
        Formula formula = Formula.fromString(tree.child(i++).value);
        EntrySource source = EntrySource.parseSourceDesc(tree.child(i++).value);
        double popularity = Double.parseDouble(tree.child(i++).value);
        double distance = Double.parseDouble(tree.child(i++).value);
        Set<String> types = setFromLispTree(tree.child(i++));
        Counter<String> tokenEditDistanceFeatures = counterFromLispTree(tree.child(i++));


        return new LexicalEntry.EntityLexicalEntry(
            textDescription, normalizedTextDesc, fbDescriptions, formula,
            source, popularity, distance, types, tokenEditDistanceFeatures);
      } else if (tree.child(0).value.equals("unary")) {
        String textDescription = tree.child(i++).value;
        String normalizedTextDesc = tree.child(i++).value;
        Set<String> fbDescriptions = setFromLispTree(tree.child(i++));
        Formula formula = Formula.fromString(tree.child(i++).value);
        EntrySource source = EntrySource.parseSourceDesc(tree.child(i++).value);
        double popularity = Double.parseDouble(tree.child(i++).value);
        Double.parseDouble(tree.child(i++).value);
        Map<String,Double> alignmentScores = featureMapFromLispTree(tree.child(i++));
        Set<String> types = setFromLispTree(tree.child(i++));
        return new LexicalEntry.UnaryLexicalEntry(
            textDescription, normalizedTextDesc, fbDescriptions, formula, source,
            popularity, alignmentScores, types);
      } else if (tree.child(0).value.equals("binary")) {
        String textDescription = tree.child(i++).value;
        String normalizedTextDesc = tree.child(i++).value;
        Set<String> fbDescriptions = setFromLispTree(tree.child(i++));
        Formula formula = Formula.fromString(tree.child(i++).value);
        EntrySource source = EntrySource.parseSourceDesc(tree.child(i++).value);
        double popularity = Double.parseDouble(tree.child(i++).value);
        Double.parseDouble(tree.child(i++).value); //this is computed in the constructor so need not save it
        String expectedType1 = tree.child(i++).value;
        String expectedType2 = tree.child(i++).value;
        String unitId = tree.child(i++).value;
        String unitDescription = tree.child(i++).value;
        Map<String,Double> alignmentScores = featureMapFromLispTree(tree.child(i++));
        String fullLexeme = tree.child(i++).value;
        return new LexicalEntry.BinaryLexicalEntry(
            textDescription, normalizedTextDesc, fbDescriptions, formula, source,
            popularity, expectedType1, expectedType2, unitId, unitDescription, alignmentScores, fullLexeme);
      } else {
        throw new RuntimeException("Invalid: " + tree);
      }
    }


    public static String emptyIfNull(String s) { return s == null ? "" : s; }


    public static LispTree entryToLispTree(LexicalEntry rawEntry) {
      LispTree result = LispTree.proto.newList();
      if (rawEntry instanceof LexicalEntry.EntityLexicalEntry) {
        LexicalEntry.EntityLexicalEntry entry = (LexicalEntry.EntityLexicalEntry) rawEntry;
        result.addChild("entity");


        result.addChild(entry.textDescription);
        result.addChild(entry.normalizedTextDesc);
        result.addChild(setToLispTree(entry.fbDescriptions));
        result.addChild(entry.formula.toString());
        result.addChild(entry.source.toString());
        result.addChild("" + entry.popularity);
        result.addChild("" + entry.distance);
        result.addChild(setToLispTree(entry.types));
        result.addChild(counterToLispTree(entry.tokenEditDistanceFeatures));


      } else if (rawEntry instanceof LexicalEntry.UnaryLexicalEntry) {
        LexicalEntry.UnaryLexicalEntry entry = (LexicalEntry.UnaryLexicalEntry) rawEntry;
        result.addChild("unary");


        result.addChild(entry.textDescription);
        result.addChild(entry.normalizedTextDesc);
        result.addChild(setToLispTree(entry.fbDescriptions));
        result.addChild(entry.formula.toString());
        result.addChild(entry.source.toString());
        result.addChild("" + entry.popularity);
        result.addChild("" + entry.distance);
        result.addChild(featureMapToLispTree(entry.alignmentScores));
        result.addChild(setToLispTree(entry.types));
      } else if (rawEntry instanceof LexicalEntry.BinaryLexicalEntry) {
        LexicalEntry.BinaryLexicalEntry entry = (LexicalEntry.BinaryLexicalEntry) rawEntry;
        result.addChild("binary");


        result.addChild(entry.textDescription);
        result.addChild(entry.normalizedTextDesc);
        result.addChild(setToLispTree(entry.fbDescriptions));
        result.addChild(entry.formula.toString());
        result.addChild(entry.source.toString());
        result.addChild("" + entry.popularity);
        result.addChild("" + entry.distance);
        result.addChild(entry.expectedType1);
        result.addChild(entry.expectedType2);
        result.addChild(emptyIfNull(entry.unitId));
        result.addChild(emptyIfNull(entry.unitDescription));
        result.addChild(featureMapToLispTree(entry.alignmentScores));
        result.addChild(entry.fullLexeme);
      }
      return result;
    }
  }
}
Source Code of edu.stanford.nlp.sempre.fbalignment.lexicons.LexicalEntry$LexicalEntrySerializer

Related Classes of edu.stanford.nlp.sempre.fbalignment.lexicons.LexicalEntry$LexicalEntrySerializer