Package com.clearnlp.util.map

Examples of com.clearnlp.util.map.Prob1DMap


{
  Prob1DMap g_map;
 
  public CTExtractPhrases()
  {
    g_map = new Prob1DMap();
  }
View Full Code Here


  /** Called by {@link #getCollector(JointReader, String, JointFtrXml[], String[], int)}. */
  protected Set<String> getLowerSimplifiedForms(JointReader reader, JointFtrXml xml, String[] trainFiles, int devId)
  {
    Set<String> set = new HashSet<String>();
    int i, j, len, size = trainFiles.length;
    Prob1DMap map = new Prob1DMap();
    DEPTree tree;
   
    LOG.info("Collecting word-forms:\n");
   
    for (i=0; i<size; i++)
    {
      if (devId == icontinue;
     
      reader.open(UTInput.createBufferedFileReader(trainFiles[i]));
      set.clear();
     
      while ((tree = reader.next()) != null)
      {
        NLPProcess.simplifyForms(tree);
        len = tree.size();
       
        for (j=1; j<len; j++)
          set.add(tree.get(j).lowerSimplifiedForm);
      }
     
      reader.close();
      map.addAll(set);
      LOG.debug(".");
    LOG.debug("\n");
   
    return map.toSet(xml.getDocumentFrequencyCutoff());
  }
View Full Code Here

 
  public void countDEP(String[] args) throws Exception
  {
    DEPReader reader = new DEPReader(0, 1, 2, 3, 4, 6, 7);
    reader.open(UTInput.createBufferedFileReader(args[0]));
    Prob1DMap map = new Prob1DMap();
    int[] count = {0, 0};
    DEPTree tree;
   
    while ((tree = reader.next()) != null)
      countDEP(tree, count, map);
   
    reader.close();
    System.out.printf("%5.2f (%d/%d)\n", 100d*count[0]/count[1], count[0], count[1]);
    for (StringIntPair p : map.toSortedList())
      System.out.println(p.s+" "+p.i);
  }
View Full Code Here

 
  private Set<String> getLowerSimplifiedFormsByDocumentFrequencies(JointReader reader, String[] filenames, int devId, int cutoff)
  {
    int i, j, len, size = filenames.length;
    Set<String> set = Sets.newHashSet();
    Prob1DMap map = new Prob1DMap();
    DEPTree tree;
   
    LOG.info(String.format("Collecting simplified-forms: cutoff = %d\n", cutoff));
   
    for (i=0; i<size; i++)
    {
      if (i == devIdcontinue;
      reader.open(UTInput.createBufferedFileReader(filenames[i]));
      set.clear();
     
      while ((tree = reader.next()) != null)
      {
        len = tree.size();
       
        for (j=1; j<len; j++)
          set.add(MPLib.getSimplifiedLowercaseWordForm(tree.get(j).form));
      }
     
      map.addAll(set);
      reader.close();
      LOG.info(".");
    LOG.info("\n");
   
    return map.toSet(cutoff);
  }
View Full Code Here

    }
    }
 
  public void collectPhrases(String[] args) throws Exception
  {
    Prob1DMap map = new Prob1DMap();
    CTReader reader = new CTReader();
    reader.open(UTInput.createBufferedFileReader(args[0]));
    CTTree tree;
   
    while ((tree = reader.nextTree()) != null)
      traverse(tree.getRoot(), map, args[2]);

    PrintStream ugram = UTOutput.createPrintBufferedFileStream(args[1]+".1gram");
    PrintStream ngram = UTOutput.createPrintBufferedFileStream(args[1]+".ngram");
    String s;
   
    for (StringIntPair p : map.toSortedList())
    {
      s = p.s+" "+p.i;
      if (p.s.contains("_"))  ngram.println(s);
      else          ugram.println(s);
    }
View Full Code Here

 
  private Set<String> getLowerSimplifiedFormsByDocumentFrequencies(JointReader reader, String[] filenames, int devId, int cutoff, int maxCount)
  {
    int i, j, len, count = 0, size = filenames.length;
    Set<String> set = Sets.newHashSet();
    Prob1DMap map = new Prob1DMap();
    DEPTree tree;
   
    LOG.info(String.format("Collecting simplified-forms: cutoff = %d, max = %d\n", cutoff, maxCount));
   
    for (i=0; i<size; i++)
    {
      if (i == devIdcontinue;
      reader.open(UTInput.createBufferedFileReader(filenames[i]));
     
      while ((tree = reader.next()) != null)
      {
        len = tree.size();
       
        for (j=1; j<len; j++)
          set.add(MPLib.getSimplifiedLowercaseWordForm(tree.get(j).form));
       
        if ((count += len) >= maxCount)
        {
          map.addAll(set);
          LOG.info(".");
          set.clear();
          count = 0;
        }
      }
     
      reader.close();
    LOG.info("\n");
   
    if (!set.isEmpty()) map.addAll(set);
    return map.toSet(cutoff);
  }
View Full Code Here

 
  /** Constructs a semantic role labeler for collecting lexica. */
  public AbstractSRLabeler(JointFtrXml[] xmls, AbstractFrames frames)
  {
    super(xmls);
    m_down   = new Prob1DMap();
    m_up     = new Prob1DMap();
    m_frames = frames;
  }
View Full Code Here

 
  /** Constructs a semantic role labeler for collecting lexica. */
  public AbstractSRLabeler(JointFtrXml[] xmls, AbstractFrames frames)
  {
    super(xmls);
    m_down   = new Prob1DMap();
    m_up     = new Prob1DMap();
    m_frames = frames;
  }
View Full Code Here

  /** Called by {@link #getCollector(JointReader, String, JointFtrXml[], String[], int)}. */
  private Set<String> getLowerSimplifiedForms(JointReader reader, JointFtrXml xml, String[] trainFiles, int devId)
  {
    Set<String> set = new HashSet<String>();
    int i, j, len, size = trainFiles.length;
    Prob1DMap map = new Prob1DMap();
    DEPTree tree;
   
    LOG.info("Collecting word-forms:\n");
   
    for (i=0; i<size; i++)
    {
      if (devId == icontinue;
     
      reader.open(UTInput.createBufferedFileReader(trainFiles[i]));
      set.clear();
     
      while ((tree = reader.next()) != null)
      {
        NLPProcess.simplifyForms(tree);
        len = tree.size();
       
        for (j=1; j<len; j++)
          set.add(tree.get(j).lowerSimplifiedForm);
      }
     
      reader.close();
      map.addAll(set);
      LOG.debug(".");
    LOG.debug("\n");
   
    return map.toSet(xml.getDocumentFrequencyCutoff());
  }
View Full Code Here

  }

  protected Set<String> getSimplifiedFormsByDocumentFrequencies(JointReader reader, String[] filenames, int cutoff, int maxCount)
  {
    Set<String> set = Sets.newHashSet();
    Prob1DMap map = new Prob1DMap();
    int j, len, count = 0;
    DEPTree tree;
   
    for (String filename : filenames)
    {
      reader.open(UTInput.createBufferedFileReader(filename));
     
      while ((tree = reader.next()) != null)
      {
        len = tree.size();
       
        for (j=1; j<len; j++)
          set.add(MPLib.getSimplifiedLowercaseWordForm(tree.get(j).form));
       
        if ((count += len) >= maxCount)
        {
          map.addAll(set);
          count = 0;
          set = Sets.newHashSet();
        }
      }
     
      reader.close();
    }
   
    if (!set.isEmpty()) map.addAll(set);
    return map.toSet(cutoff);
  }
View Full Code Here

TOP

Related Classes of com.clearnlp.util.map.Prob1DMap

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.