Package opennlp.tools.util

Examples of opennlp.tools.util.PlainTextByLineStream


        this.finders = loadFinders(pathPrefix, workerPrepareData.getHdfsFileSystem());
    }

    @Override
    public void execute(InputStream in, GraphPropertyWorkData data) throws Exception {
        ObjectStream<String> untokenizedLineStream = new PlainTextByLineStream(in, "UTF-8");
        String line;
        int charOffset = 0;

        LOGGER.debug("Processing artifact content stream");
        Vertex sourceVertex = (Vertex) data.getElement();
        List<Vertex> termMentions = new ArrayList<Vertex>();
        while ((line = untokenizedLineStream.read()) != null) {
            termMentions.addAll(processLine(sourceVertex, data.getProperty().getKey(), line, charOffset, LumifyProperties.VISIBILITY_JSON.getPropertyValue(sourceVertex)));
            getGraph().flush();
            charOffset += line.length() + NEW_LINE_CHARACTER_LENGTH;
        }
        applyTermMentionFilters(sourceVertex, termMentions);

        untokenizedLineStream.close();
        LOGGER.debug("Stream processing completed");
    }
View Full Code Here


        this.finders = loadFinders();
    }

    @Override
    public void execute(InputStream in, GraphPropertyWorkData data) throws Exception {
        ObjectStream<String> untokenizedLineStream = new PlainTextByLineStream(new InputStreamReader(in));
        String line;
        int charOffset = 0;

        LOGGER.debug("Processing artifact content stream");
        Vertex sourceVertex = (Vertex) data.getElement();
        List<Vertex> termMentions = new ArrayList<Vertex>();
        while ((line = untokenizedLineStream.read()) != null) {
            termMentions.addAll(processLine(sourceVertex, data.getProperty().getKey(), line, charOffset, LumifyProperties.VISIBILITY_JSON.getPropertyValue(sourceVertex)));
            getGraph().flush();
            charOffset += line.length() + NEW_LINE_CHARACTER_LENGTH;
        }
        applyTermMentionFilters(sourceVertex, termMentions);

        untokenizedLineStream.close();
        LOGGER.debug("Stream processing completed");
    }
View Full Code Here

    //<start id="ne-namesample-type"/>
    String taggedSent =
      "<START:person> Britney Spears <END> was reunited " +
      "with her sons <START:date> Saturday <END> ";
    ObjectStream<NameSample> nss = new NameSampleDataStream(
        new PlainTextByLineStream(new StringReader(taggedSent)));
    TokenNameFinderModel model = NameFinderME.train(
        "en",
        "default" ,
        nss,
        (AdaptiveFeatureGenerator) null,
View Full Code Here

    File baseDir = new File("src/test/resources");
    File destDir = new File("target");
    //<start id="ne-train"/>
    File inFile = new File(baseDir,"person.train");
    NameSampleDataStream nss = new NameSampleDataStream( //<co id="co.opennlp.name.initnamestream"/>
      new PlainTextByLineStream(
        new java.io.FileReader(inFile)));

    int iterations = 100;
    int cutoff = 5;
    TokenNameFinderModel model = NameFinderME.train( //<co id="co.opennlp.name.train"/>
View Full Code Here

    //<end id="ne-features"/>

    //<start id="ne-features-train"/>
    File inFile = new File(baseDir,"person.train");
    NameSampleDataStream nss = new NameSampleDataStream( //<co id="co.opennlp.name.initfeat"/>
      new PlainTextByLineStream(
        new java.io.FileReader(inFile)));

    int iterations = 100;
    int cutoff = 5;
    TokenNameFinderModel model = NameFinderME.train( //<co id="co.opennlp.name.train2"/>
View Full Code Here

   *
   * @param sentences reader with sentences
   * @throws IOException IOException
   */
  public WordTagSampleStream(Reader sentences) throws IOException {
    super(new PlainTextByLineStream(sentences));
  }
View Full Code Here

        }
       
        additionalTrainingDataIn = new FileInputStream(additionalTrainingDataFile);
       
        ObjectStream<TokenSample> additionalSamples = new TokenSampleStream(
            new PlainTextByLineStream(new InputStreamReader(additionalTrainingDataIn, additionalTrainingDataEncoding)));
       
        samples = ObjectStreamUtils.createObjectStream(samples, additionalSamples);
      }
     
      if (sampleTraceFile != null) {
View Full Code Here

        }
       
        additionalTrainingDataIn = new FileInputStream(additionalTrainingDataFile);
       
        ObjectStream<NameSample> additionalSamples = new NameSampleDataStream(
            new PlainTextByLineStream(new InputStreamReader(additionalTrainingDataIn, additionalTrainingDataEncoding)));
       
        samples = ObjectStreamUtils.createObjectStream(samples, additionalSamples);
      }

      if (sampleTraceFile != null) {
View Full Code Here

    String encoding = "ISO-8859-1";

    ObjectStream<NameSample> sampleStream =
          new NameSampleDataStream(
          new PlainTextByLineStream(new InputStreamReader(in, encoding)));

    TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream,
        Collections.<String, Object>emptyMap(), 70, 1);

    TokenNameFinder nameFinder = new NameFinderME(nameFinderModel);
View Full Code Here

        "opennlp/tools/namefind/AnnotatedSentencesWithTypes.txt");

    String encoding = "ISO-8859-1";

    ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
        new PlainTextByLineStream(new InputStreamReader(in, encoding)));

    TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream,
        Collections.<String, Object>emptyMap(), 70, 1);

    NameFinderME nameFinder = new NameFinderME(nameFinderModel);
View Full Code Here

TOP

Related Classes of opennlp.tools.util.PlainTextByLineStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.