Package opennlp.tools.util

Examples of opennlp.tools.util.PlainTextByLineStream


        "opennlp/tools/chunker/test.txt");

    String encoding = "UTF-8";

    ObjectStream<ChunkSample> sampleStream = new ChunkSampleStream(
        new PlainTextByLineStream(new InputStreamReader(in, encoding)));

    ChunkerModel chunkerModel = ChunkerME.train("en", sampleStream, 1, 70);

    this.chunker = new ChunkerME(chunkerModel);
  }
View Full Code Here


  public ConllXPOSSampleStream(ObjectStream<String> lineStream) {
    super(new ParagraphStream(lineStream));
  }
 
  ConllXPOSSampleStream(Reader in) throws IOException {
    super(new ParagraphStream(new PlainTextByLineStream(in)));
  }
View Full Code Here

  ObjectStream<NameSample> create(Parameters params) {
   
    ObjectStream<String> lineStream;
    try {
      lineStream = new PlainTextByLineStream(new InputStreamReader(
          CmdLineUtil.openInFile(new File(params.getData())), params.getEncoding()));
     
      return new NameSampleDataStream(lineStream);
    } catch (UnsupportedEncodingException e) {
      System.err.println("Encoding not supported: " + params.getEncoding());
View Full Code Here

   * @param encoding  the <code>Charset</code> to apply to the input stream.
   */
  public NameFinderCensus90NameStream(InputStream in, Charset encoding) {
    this.locale = new Locale("en");   // locale is English
    this.encoding = encoding;
    this.lineStream = new PlainTextByLineStream(in, this.encoding);
  }
View Full Code Here

  }

  ObjectStream<POSSample> create(Parameters params) {
    ObjectStream<String> lineStream;
    try {
      lineStream = new PlainTextByLineStream(new InputStreamReader(
          CmdLineUtil.openInFile(new File(params.getData())), params.getEncoding()));
     
      return new ConllXPOSSampleStream(lineStream);
    } catch (UnsupportedEncodingException e) {
      System.err.println("Encoding not supported: " + params.getEncoding());
View Full Code Here

 
  private final ObjectStream<String> lineStream;
 
  public BioNLP2004NameSampleStream(InputStream in, int types) {
    try {
      this.lineStream = new PlainTextByLineStream(in, "UTF-8");
    } catch (UnsupportedEncodingException e) {
      // UTF-8 is available on all JVMs, will never happen
      throw new IllegalStateException(e);
    }
   
View Full Code Here

   * @param sentencesPerDocument the number of sentences which should be grouped into once {@link DocumentSample}
   * @param in the InputStream pointing to the contents of the sentences.txt input file
   */
  LeipzigDoccatSampleStream(String language, int sentencesPerDocument,
      InputStream in) throws IOException {
    super(new PlainTextByLineStream(in, "UTF-8"));
    this.language = language;
    this.sentencesPerDocument = sentencesPerDocument;
  }
View Full Code Here

   */
  public Conll02NameSampleStream(LANGUAGE lang, InputStream in, int types) {
   
    this.lang = lang;
    try {
      this.lineStream = new PlainTextByLineStream(in, "UTF-8");
    } catch (UnsupportedEncodingException e) {
      // UTF-8 is available on all JVMs, will never happen
      throw new IllegalStateException(e);
    }
    this.types = types;
View Full Code Here

   */
  public Conll03NameSampleStream(LANGUAGE lang, InputStream in, int types) {

    this.lang = lang;
    try {
      this.lineStream = new PlainTextByLineStream(in, "ISO-8859-1");
    } catch (UnsupportedEncodingException e) {
      // UTF-8 is available on all JVMs, will never happen
      throw new IllegalStateException(e);
    }
    this.types = types;
View Full Code Here

  }
 
  void process() {
   
    ObjectStream<String> untokenizedLineStream =
        new PlainTextByLineStream(new InputStreamReader(System.in));
   
    ObjectStream<String> tokenizedLineStream = new WhitespaceTokenStream(
        new TokenizerStream(tokenizer, untokenizedLineStream));
   
    PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
View Full Code Here

TOP

Related Classes of opennlp.tools.util.PlainTextByLineStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.