Package edu.stanford.nlp.pipeline

Examples of edu.stanford.nlp.pipeline.StanfordCoreNLP


    String input = args[0];
    String output = args[1];

    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, cleanxml, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // for example
    // "edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.sgm"
    String doc = IOUtils.slurpFile(input);
    Annotation annotation = pipeline.process(doc);
    Map<Integer, CorefChain> chains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
    saveResults(output, chains);
  }
View Full Code Here


  public void testSpanish() {
    Annotation ann = new Annotation("Damelo");
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize");
    props.setProperty("tokenize.language", "es");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(ann);

    Iterator<String> it = spanishTokens.iterator();
    for (CoreLabel word : ann.get(CoreAnnotations.TokensAnnotation.class)) {
      assertEquals("Bung token in new CoreLabel usage", it.next(), word.get(CoreAnnotations.TextAnnotation.class));
    }
View Full Code Here

  public static void main(String[] args) throws Exception {
    // just a simple test, to make sure stuff works
    Properties props = StringUtils.argsToProperties(args);
    RothCONLL04Reader reader = new RothCONLL04Reader();
    reader.setLoggerLevel(Level.INFO);
    reader.setProcessor(new StanfordCoreNLP(props));
    Annotation doc = reader.parse("/u/nlp/data/RothCONLL04/conll04.corp");
    System.out.println(AnnotationUtils.datasetToString(doc));
  }
View Full Code Here

  }

  // simple testing code
  public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    AceReader r = new AceReader(new StanfordCoreNLP(props, false), false);
    r.setLoggerLevel(Level.INFO);
    r.parse("/scr/nlp/data/ACE2005/");
    // Annotation a = r.parse("/user/mengqiu/scr/twitter/nlp/corpus_prep/standalone/ar/data");
    // BasicEntityExtractor.saveCoNLLFiles("/tmp/conll", a, false, false);
    System.err.println("done");
View Full Code Here

      annoSb.append(", parse");
    }
    String annoStr = annoSb.toString();
    SieveCoreferenceSystem.logger.info("MentionExtractor ignores specified annotators, using annotators=" + annoStr);
    pipelineProps.put("annotators", annoStr);
    return new StanfordCoreNLP(pipelineProps, false);
  }
View Full Code Here

  }
 
  private void modifyUsingCoreNLPNER(Annotation doc) {
    Properties ann = new Properties();
    ann.setProperty("annotators", "pos, lemma, ner");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(ann, false);
    pipeline.annotate(doc);
    for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
      List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
      if (entities != null) {
        List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
        for (EntityMention en : entities) {
View Full Code Here

   * Test that postprocessing like CC-processing can handle the parser
   * output properly
   */
  public void testCCProcess() {
    Properties props = PropertiesUtils.fromString("annotators=tokenize,ssplit,pos,depparse");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    String text = "Chris and John went to the store.";
    Annotation document = new Annotation(text);
    pipeline.annotate(document);

    SemanticGraph ccProcessed = document.get(CoreAnnotations.SentencesAnnotation.class).get(0)
                                        .get(
                                            SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
    Collection<TypedDependency> dependencies = ccProcessed.typedDependencies();
View Full Code Here

  public void testSerializationAnnotation() throws IOException, ClassNotFoundException {
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,depparse");
    String text = "Barack Obama, a Yale professor, is president.";
    Annotation document = new Annotation(text);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(document);

    // Serialization should not bork.
    File tempfile = IOUtils.writeObjectToTempFile(document.get(CoreAnnotations.SentencesAnnotation.class), "temp");

    // Deserialization should not bork.
View Full Code Here

  private static final Pattern dropPattern = Pattern.compile("what.* is (.*)");

  public QuestionGenerator() throws IOException {
    Properties props = new Properties();
    props.put("annotators", "tokenize,ssplit,pos,parse");
    pipeline = new StanfordCoreNLP(props);
    LogInfo.begin_track("uploading lexicon");
    uploadAlignmentLexicon();
    LogInfo.logs("Number of lexicon formulas: %s",formulaToLexemsMap.size());
    LogInfo.end_track();
  }
View Full Code Here

      props.put("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz,edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz");
    } else {
      props.put("pos.model", "edu/stanford/nlp/models/pos-tagger/english-caseless-left3words-distsim.tagger");
      props.put("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz,edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz");
    }
    pipeline = new StanfordCoreNLP(props);
  }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.pipeline.StanfordCoreNLP

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.