Package edu.stanford.nlp.sequences

Source Code of edu.stanford.nlp.sequences.LibSVMReaderAndWriter

package edu.stanford.nlp.sequences;

import java.io.PrintWriter;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.objectbank.DelimitRegExIterator;
import edu.stanford.nlp.objectbank.IteratorFromReaderFactory;
import java.util.function.Function;


/**
* DocumentReader for column format
*
* @author Jenny Finkel
*/
//TODO: repair this so it works with the feature label/coreLabel change
public class LibSVMReaderAndWriter implements DocumentReaderAndWriter<CoreLabel> {

  /**
   *
   */
  private static final long serialVersionUID = -7997837004847909059L;
  private SeqClassifierFlags flags = null;
  private IteratorFromReaderFactory factory;
 
  public void init(SeqClassifierFlags flags) {
    this.flags = flags;
    factory = DelimitRegExIterator.getFactory("\n(\\s*\n)+", new ColumnDocParser());
  }
 
  public Iterator<List<CoreLabel>> getIterator(Reader r) {
    return factory.getIterator(r);
  }

  int num = 0;
  private class ColumnDocParser implements Function<String,List<CoreLabel>> {
    public List<CoreLabel> apply(String doc) {

      if (num % 1000 == 0) { System.err.print("["+num+"]"); }
      num++;
     
      List<CoreLabel> words = new ArrayList<CoreLabel>();
     
      String[] lines = doc.split("\n");
     
      for (int i = 0; i < lines.length; i++) {
        if (lines[i].trim().length() < 1) {
          continue;
        }
        CoreLabel wi = new CoreLabel();
        String[] info = lines[i].split("\\s+");             
        wi.set(CoreAnnotations.AnswerAnnotation.class, info[0]);
        wi.set(CoreAnnotations.GoldAnswerAnnotation.class, info[0]);
        for (int j = 1; j < info.length; j++) {
          String[] bits = info[j].split(":");
          //wi.set(bits[0], bits[1]);
        }
//        System.err.println(wi);
        words.add(wi);
      }
      return words;
    }
  }
 
  public void printAnswers(List<CoreLabel> doc, PrintWriter out) {
    for (CoreLabel wi : doc) {
      String answer = wi.get(CoreAnnotations.AnswerAnnotation.class);
      String goldAnswer = wi.get(CoreAnnotations.GoldAnswerAnnotation.class);
      out.println(goldAnswer + "\t" + answer);
    }
    out.println();
  }

}
TOP

Related Classes of edu.stanford.nlp.sequences.LibSVMReaderAndWriter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.