Examples of edu.stanford.nlp.pipeline.StanfordCoreNLP

edu.stanford.nlp.pipeline.StanfordCoreNLP
This is a pipeline that takes in a string and returns various analyzed linguistic forms. The String is tokenized via a tokenizer (using a TokenizerAnnotator), and then other sequence model style annotation can be used to add things like lemmas, POS tags, and named entities. These are returned as a list of CoreLabels. Other analysis components build and store parse trees, dependency graphs, etc.
This class is designed to apply multiple Annotators to an Annotation. The idea is that you first build up the pipeline by adding Annotators, and then you take the objects you wish to annotate and pass them in and get in return a fully annotated object. At the command-line level you can, e.g., tokenize text with StanfordCoreNLP with a command like:
```
 java edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit -file document.txt 
```
Please see the package level javadoc for sample usage and a more complete description.
The main entry point for the API is StanfordCoreNLP.process() .
Implementation note: There are other annotation pipelines, but they don't extend this one. Look for classes that implement Annotator and which have "Pipeline" in their name. @author Jenny Finkel @author Anna Rafferty @author Christopher Manning @author Mihai Surdeanu @author Steven Bethard

    System.err.println("The reader log level is set to " + readerLogLevel);
    //Execution.fillOptions(GenericDataSetReaderProps.class, args);
    //Arguments.parse(args, reader);
    
    // create the pre-processing pipeline
    StanfordCoreNLP pipe = new StanfordCoreNLP(props, false);
    reader.setProcessor(pipe);
    if (auxReader != null) {
      auxReader.setProcessor(pipe);
    }

View Full Code Here

  private static final int MAXIMUM_QUERY_LENGTH = 4096;


  public void init() 
    throws ServletException 
  {
    pipeline = new StanfordCoreNLP();


    String xslPath = getServletContext().
                       getRealPath("/WEB-INF/data/CoreNLP-to-HTML.xsl");


    try {

View Full Code Here

    // "ptb3Escaping=false,normalizeParentheses=false,escapeForwardSlashAsterisk=false");


    if (posModelPath != null) {
      props.setProperty("pos.model", posModelPath);
    }
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false);


    Redwood.log(Redwood.DBG, "Annotating text");
    pipeline.annotate(doc);
    Redwood.log(Redwood.DBG, "Done annotating text");


    Map<String, List<CoreLabel>> sents = new HashMap<String, List<CoreLabel>>();


    for (CoreMap s : doc.get(CoreAnnotations.SentencesAnnotation.class)) {

View Full Code Here

      props.put("tokenize.options", "ptb3Escaping=false,normalizeParentheses=false,escapeForwardSlashAsterisk=false");


      if (posModelPath != null) {
        props.setProperty("pos.model", posModelPath);
      }
      pipeline = new StanfordCoreNLP(props);
    }


    String text = "";
    int numLines = 0;
    while(textReader.hasNext()) {

View Full Code Here

      props.setProperty("customAnnotatorClass.qen",
        "edu.stanford.nlp.pipeline.QuantifiableEntityNormalizingAnnotator");
    }    */
    // this replicates the tokenizer behavior in StanfordCoreNLP
    props.setProperty("tokenize.options", "invertible,ptb3Escaping=true");
    this.pipeline = new StanfordCoreNLP(props);
  }

View Full Code Here

    }
    if (count == 0) {
      throw new IllegalArgumentException("Please specify either -file, -fileList or -stdin");
    }


    StanfordCoreNLP tokenizer = (tokenizerProps == null) ? null : new StanfordCoreNLP(tokenizerProps);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(pipelineProps);


    if (filename != null) {
      // Process a file.  The pipeline will do tokenization, which
      // means it will split it into sentences as best as possible
      // with the tokenizer.
      List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, filename, filterUnknown);
      for (Annotation annotation : annotations) {
        pipeline.annotate(annotation);


        for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
          System.out.println(sentence);
          outputTree(System.out, sentence, outputFormats);
        }
      }
    } else if (fileList != null) {
      // Process multiple files.  The pipeline will do tokenization,
      // which means it will split it into sentences as best as
      // possible with the tokenizer.  Output will go to filename.out
      // for each file.
      for (String file : fileList.split(",")) {
        List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, file, filterUnknown);
        for (Annotation annotation : annotations) {
          pipeline.annotate(annotation);


          FileOutputStream fout = new FileOutputStream(file + ".out");
          PrintStream pout = new PrintStream(fout);
          for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            pout.println(sentence);
            outputTree(pout, sentence, outputFormats);
          }
          pout.flush();
          fout.close();
        }
      }
    } else {
      // Process stdin.  Each line will be treated as a single sentence.
      System.err.println("Reading in text from stdin.");
      System.err.println("Please enter one sentence per line.");
      System.err.println("Processing will end when EOF is reached.");
      BufferedReader reader = new BufferedReader(IOUtils.encodedInputStreamReader(System.in, "utf-8"));
      while (true) {
        String line = reader.readLine();
        if (line == null) {
          break;
        }
        line = line.trim();
        if (line.length() > 0) {
          Annotation annotation = tokenizer.process(line);
          pipeline.annotate(annotation);
          for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            outputTree(System.out, sentence, outputFormats);
          }
        } else {
          // Output blank lines for blank lines so the tool can be

View Full Code Here

    props.setProperty("annotators", "tokenize, ssplit, pos, number, qen");
    props.setProperty("customAnnotatorClass.number",
        "edu.stanford.nlp.pipeline.NumberAnnotator");
    props.setProperty("customAnnotatorClass.qen",
        "edu.stanford.nlp.pipeline.QuantifiableEntityNormalizingAnnotator");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    return pipeline;
  }

View Full Code Here

      }
    }
  }


  private static void run(String header, String [] texts, String [][] answers, String [][] normed) {
    StanfordCoreNLP pipe = makeNumericPipeline();
    for(int i = 0; i < texts.length; i ++) {
      if(VERBOSE) {
        System.out.println("Running test " + header + " for text: " + texts[i]);
      }
      checkLabels(pipe,

View Full Code Here

    CoreMapExpressionExtractor extractor = CoreMapExpressionExtractor
      .createExtractorFromFiles(
        TokenSequencePattern.getNewEnv(),
        rules);


    StanfordCoreNLP pipeline = new StanfordCoreNLP();
    Annotation annotation;
    if (args.length > 1) {
      annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[1]));
    } else {
//      annotation = new Annotation("I know Fred has acne.  And Wilma has breast cancer.");
      annotation = new Annotation("( ( five plus three plus four ) * 2 ) divided by three");


    }


    pipeline.annotate(annotation);


    // An Annotation is a Map and you can get and use the various analyses individually.
    out.println();
    // The toString() method on an Annotation just prints the text of the Annotation
    // But you can see what is in it with other methods like toShorterString()

View Full Code Here

  public void setUp() {
    synchronized (DcorefExactOutputITest.class) {
      if (pipeline == null) {
        Properties props = new Properties();
        props.setProperty("annotators", "tokenize, cleanxml, ssplit, pos, lemma, ner, parse, dcoref");
        pipeline = new StanfordCoreNLP(props);
      }
    }
  }

View Full Code Here

0 1 2

TOP

Related Classes of edu.stanford.nlp.pipeline.StanfordCoreNLP

AnnotationBridge

com.lulu.WordCloud.StanfordLemmatizer

edu.stanford.nlp.dcoref.DcorefExactOutputITest

edu.stanford.nlp.dcoref.MentionExtractor

edu.stanford.nlp.ie.machinereading.domains.ace.AceReader

edu.stanford.nlp.ie.machinereading.domains.roth.RothCONLL04Reader

edu.stanford.nlp.ie.machinereading.GenericDataSetReader

edu.stanford.nlp.ie.machinereading.MachineReading

edu.stanford.nlp.ie.NumberSequenceClassifierITest

edu.stanford.nlp.international.spanish.SpanishTokenizerAnnotatorITest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.