Package edu.stanford.nlp.pipeline

Examples of edu.stanford.nlp.pipeline.Annotation


    }

    public static Annotation sentencesToDocument(String documentID, List<CoreMap> sentences)
    {
      String docText = null;
      Annotation document = new Annotation(docText);
      document.set(CoreAnnotations.DocIDAnnotation.class, documentID);
      document.set(CoreAnnotations.SentencesAnnotation.class, sentences);


      // Accumulate docTokens and label sentence with overall token begin/end, and sentence index annotations
      List<CoreLabel> docTokens = new ArrayList<CoreLabel>();
      int sentenceIndex = 0;
      int tokenBegin = 0;
      for (CoreMap sentenceAnnotation:sentences) {
        List<CoreLabel> sentenceTokens = sentenceAnnotation.get(CoreAnnotations.TokensAnnotation.class);
        docTokens.addAll(sentenceTokens);

        int tokenEnd = tokenBegin + sentenceTokens.size();
        sentenceAnnotation.set(CoreAnnotations.TokenBeginAnnotation.class, tokenBegin);
        sentenceAnnotation.set(CoreAnnotations.TokenEndAnnotation.class, tokenEnd);
        sentenceAnnotation.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex);
        sentenceIndex++;
        tokenBegin = tokenEnd;
      }
      document.set(CoreAnnotations.TokensAnnotation.class, docTokens);

      // Put in character offsets
      int i = 0;
      for (CoreLabel token:docTokens) {
        String tokenText = token.get(CoreAnnotations.TextAnnotation.class);
View Full Code Here


      List<CoreMap> sentences = new ArrayList<CoreMap>(document.sentenceWordLists.size());
      for (List<String[]> sentWords:document.sentenceWordLists) {
        sentences.add(wordsToSentence(sentWords));
      }

      Annotation docAnnotation = sentencesToDocument(document.documentIdPart /*document.documentID + "." + document.partNo */, sentences);
      document.setAnnotation(docAnnotation);

      // Do this here so we have updated character offsets and all
      CollectionValuedMap<String, CoreMap> corefChainMap = new CollectionValuedMap<String, CoreMap>(CollectionFactory.<CoreMap>arrayListFactory());
      List<CoreMap> nerChunks = new ArrayList<CoreMap>();
View Full Code Here

   * Parses one file or directory with data from one domain
   * @param path
   * @throws IOException
   */
  public final Annotation parse(String path) throws IOException {
    Annotation retVal; // set below or exceptions

    try {
      //
      // this must return a dataset Annotation. each sentence in this dataset must contain:
      // - TokensAnnotation
View Full Code Here

    return parse(tokens, null);
  }

  protected Tree parse(List<CoreLabel> tokens,
                       List<ParserConstraint> constraints) {
    CoreMap sent = new Annotation("");
    sent.set(CoreAnnotations.TokensAnnotation.class, tokens);
    sent.set(ParserAnnotations.ConstraintAnnotation.class, constraints);
    Annotation doc = new Annotation("");
    List<CoreMap> sents = new ArrayList<CoreMap>();
    sents.add(sent);
    doc.set(CoreAnnotations.SentencesAnnotation.class, sents);
    getParser().annotate(doc);
    sents = doc.get(CoreAnnotations.SentencesAnnotation.class);
    return sents.get(0).get(TreeCoreAnnotations.TreeAnnotation.class);
  }
View Full Code Here

  public void testCCProcess() {
    Properties props = PropertiesUtils.fromString("annotators=tokenize,ssplit,pos,depparse");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    String text = "Chris and John went to the store.";
    Annotation document = new Annotation(text);
    pipeline.annotate(document);

    SemanticGraph ccProcessed = document.get(CoreAnnotations.SentencesAnnotation.class).get(0)
                                        .get(
                                            SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
    Collection<TypedDependency> dependencies = ccProcessed.typedDependencies();

    GrammaticalRelation expected = EnglishGrammaticalRelations.getConj("and");
View Full Code Here

   */
  public void testSerializationAnnotation() throws IOException, ClassNotFoundException {
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,depparse");
    String text = "Barack Obama, a Yale professor, is president.";
    Annotation document = new Annotation(text);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(document);

    // Serialization should not bork.
    File tempfile = IOUtils.writeObjectToTempFile(document.get(CoreAnnotations.SentencesAnnotation.class), "temp");

    // Deserialization should not bork.
    List<CoreMap> readSentences = IOUtils.readObjectFromFile(tempfile);

    // Make sure we didn't lose any information
    assertEquals(document.get(CoreAnnotations.SentencesAnnotation.class), readSentences);
  }
View Full Code Here

      @Override
      public Annotation next() {
        if (this.annotation == null) {
          throw new NoSuchElementException();
        }
        Annotation toReturn = this.annotation;
        this.annotation = this.findAnnotation();
        return toReturn;
      }

      @Override
View Full Code Here

    String docID = docElem.getAttributeValue("id");
    Matcher matcher = datePattern.matcher(docID);
    matcher.find();
    Calendar docDate = new Timex("DATE", matcher.group(1)).getDate();

    Annotation document = new Annotation(text.toString());
    document.set(CoreAnnotations.DocIDAnnotation.class, docID);
    document.set(CoreAnnotations.CalendarAnnotation.class, docDate);
    document.set(CoreAnnotations.SentencesAnnotation.class, sentences);
    return document;
  }
View Full Code Here

          new WordToSentenceProcessor<CoreLabel>(true); // treat input as one sentence


  private static void checkResult(WordToSentenceProcessor<CoreLabel> wts,
                                 String testSentence, String ... gold) {
    Annotation annotation = new Annotation(testSentence);
    ptbNL.annotate(annotation);
    List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
    List<List<CoreLabel>> sentences = wts.process(tokens);

    assertEquals("Output number of sentences didn't match:\n" +
            Arrays.toString(gold) + " vs. \n" + sentences + '\n',
            gold.length, sentences.size());

    Annotation[] goldAnnotations = new Annotation[gold.length];
    for (int i = 0; i < gold.length; ++i) {
      goldAnnotations[i] = new Annotation(gold[i]);
      ptb.annotate(goldAnnotations[i]);
      List<CoreLabel> goldTokens =
        goldAnnotations[i].get(CoreAnnotations.TokensAnnotation.class);
      List<CoreLabel> testTokens = sentences.get(i);
      int goldTokensSize = goldTokens.size();
View Full Code Here

            "as I like chocolate. And cookies.",
            "");
  }

  public void testExclamationPoint() {
    Annotation annotation = new Annotation("Foo!!");
    ptb.annotate(annotation);
    List list = annotation.get(CoreAnnotations.TokensAnnotation.class);
    assertEquals("Wrong double bang", "[Foo, !!]", list.toString());
  }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.pipeline.Annotation

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.