Examples of edu.stanford.nlp.pipeline.StanfordCoreNLP

edu.stanford.nlp.pipeline.StanfordCoreNLP
This is a pipeline that takes in a string and returns various analyzed linguistic forms. The String is tokenized via a tokenizer (using a TokenizerAnnotator), and then other sequence model style annotation can be used to add things like lemmas, POS tags, and named entities. These are returned as a list of CoreLabels. Other analysis components build and store parse trees, dependency graphs, etc.
This class is designed to apply multiple Annotators to an Annotation. The idea is that you first build up the pipeline by adding Annotators, and then you take the objects you wish to annotate and pass them in and get in return a fully annotated object. At the command-line level you can, e.g., tokenize text with StanfordCoreNLP with a command like:
```
 java edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit -file document.txt 
```
Please see the package level javadoc for sample usage and a more complete description.
The main entry point for the API is StanfordCoreNLP.process() .
Implementation note: There are other annotation pipelines, but they don't extend this one. Look for classes that implement Annotator and which have "Pipeline" in their name. @author Jenny Finkel @author Anna Rafferty @author Christopher Manning @author Mihai Surdeanu @author Steven Bethard

    String input = args[0];
    String output = args[1];


    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, cleanxml, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);


    // for example
    // "edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.sgm"
    String doc = IOUtils.slurpFile(input);
    Annotation annotation = pipeline.process(doc);
    Map<Integer, CorefChain> chains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
    saveResults(output, chains);
  }

View Full Code Here

  public void testSpanish() {
    Annotation ann = new Annotation("Damelo");
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize");
    props.setProperty("tokenize.language", "es");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(ann);


    Iterator<String> it = spanishTokens.iterator();
    for (CoreLabel word : ann.get(CoreAnnotations.TokensAnnotation.class)) {
      assertEquals("Bung token in new CoreLabel usage", it.next(), word.get(CoreAnnotations.TextAnnotation.class));
    }

View Full Code Here

  public static void main(String[] args) throws Exception {
    // just a simple test, to make sure stuff works
    Properties props = StringUtils.argsToProperties(args);
    RothCONLL04Reader reader = new RothCONLL04Reader();
    reader.setLoggerLevel(Level.INFO);
    reader.setProcessor(new StanfordCoreNLP(props));
    Annotation doc = reader.parse("/u/nlp/data/RothCONLL04/conll04.corp");
    System.out.println(AnnotationUtils.datasetToString(doc));
  }

View Full Code Here

  }


  // simple testing code
  public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    AceReader r = new AceReader(new StanfordCoreNLP(props, false), false);
    r.setLoggerLevel(Level.INFO);
    r.parse("/scr/nlp/data/ACE2005/");
    // Annotation a = r.parse("/user/mengqiu/scr/twitter/nlp/corpus_prep/standalone/ar/data");
    // BasicEntityExtractor.saveCoNLLFiles("/tmp/conll", a, false, false);
    System.err.println("done");

View Full Code Here

      annoSb.append(", parse");
    }
    String annoStr = annoSb.toString();
    SieveCoreferenceSystem.logger.info("MentionExtractor ignores specified annotators, using annotators=" + annoStr);
    pipelineProps.put("annotators", annoStr);
    return new StanfordCoreNLP(pipelineProps, false);
  }

View Full Code Here

  }
  
  private void modifyUsingCoreNLPNER(Annotation doc) {
    Properties ann = new Properties();
    ann.setProperty("annotators", "pos, lemma, ner");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(ann, false);
    pipeline.annotate(doc);
    for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
      List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
      if (entities != null) {
        List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
        for (EntityMention en : entities) {

View Full Code Here

   * Test that postprocessing like CC-processing can handle the parser
   * output properly
   */
  public void testCCProcess() {
    Properties props = PropertiesUtils.fromString("annotators=tokenize,ssplit,pos,depparse");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);


    String text = "Chris and John went to the store.";
    Annotation document = new Annotation(text);
    pipeline.annotate(document);


    SemanticGraph ccProcessed = document.get(CoreAnnotations.SentencesAnnotation.class).get(0)
                                        .get(
                                            SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
    Collection<TypedDependency> dependencies = ccProcessed.typedDependencies();

View Full Code Here

  public void testSerializationAnnotation() throws IOException, ClassNotFoundException {
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,depparse");
    String text = "Barack Obama, a Yale professor, is president.";
    Annotation document = new Annotation(text);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(document);


    // Serialization should not bork.
    File tempfile = IOUtils.writeObjectToTempFile(document.get(CoreAnnotations.SentencesAnnotation.class), "temp");


    // Deserialization should not bork.

View Full Code Here

  private static final Pattern dropPattern = Pattern.compile("what.* is (.*)");


  public QuestionGenerator() throws IOException {
    Properties props = new Properties();
    props.put("annotators", "tokenize,ssplit,pos,parse");
    pipeline = new StanfordCoreNLP(props);
    LogInfo.begin_track("uploading lexicon");
    uploadAlignmentLexicon();
    LogInfo.logs("Number of lexicon formulas: %s",formulaToLexemsMap.size());
    LogInfo.end_track();
  }

View Full Code Here

      props.put("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz,edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz");
    } else {
      props.put("pos.model", "edu/stanford/nlp/models/pos-tagger/english-caseless-left3words-distsim.tagger");
      props.put("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz,edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz");
    }
    pipeline = new StanfordCoreNLP(props);
  }

View Full Code Here

0 1 2

TOP

Related Classes of edu.stanford.nlp.pipeline.StanfordCoreNLP

AnnotationBridge

com.lulu.WordCloud.StanfordLemmatizer

edu.stanford.nlp.dcoref.DcorefExactOutputITest

edu.stanford.nlp.dcoref.MentionExtractor

edu.stanford.nlp.ie.machinereading.domains.ace.AceReader

edu.stanford.nlp.ie.machinereading.domains.roth.RothCONLL04Reader

edu.stanford.nlp.ie.machinereading.GenericDataSetReader

edu.stanford.nlp.ie.machinereading.MachineReading

edu.stanford.nlp.ie.NumberSequenceClassifierITest

edu.stanford.nlp.international.spanish.SpanishTokenizerAnnotatorITest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.