Package edu.stanford.nlp.pipeline

Examples of edu.stanford.nlp.pipeline.Annotation


* @author Angel Chang
*/
public class TextAnnotationCreator extends AbstractTextAnnotationCreator {
  @Override
  public Annotation createFromText(String text) throws IOException {
    return new Annotation(text);
  }
View Full Code Here


    }

    private Annotation createDoc(String docId, List<IN> tokens, List<IntPair> sentenceBoundaries, boolean includeText) {
      try {
        String docText = includeText ? join(tokens, CoreAnnotations.TextAnnotation.class, " ") : null;
        Annotation doc = new Annotation(docText);
        doc.set(CoreAnnotations.DocIDAnnotation.class, docId);
        Class tokensClass = Class.forName(tokensAnnotationClassName);
        doc.set(tokensClass, tokens);
        boolean setTokenCharOffsets = includeText;
        if (setTokenCharOffsets) {
          int i = 0;
          for (IN token : tokens) {
            String tokenText = token.get(CoreAnnotations.TextAnnotation.class);
            token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, i);
            i += tokenText.length();
            token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, i);
            /*
             * if (i > docText.length()) { System.err.println("index " + i +
             * " larger than docText length " + docText.length());
             * System.err.println("Token: " + tokenText);
             * System.err.println("DocText: " + docText); }
             */
            assert (i <= docText.length());
            i++; // Skip space
          }
        }
        if (sentenceBoundaries != null) {
          List<CoreMap> sentences = new ArrayList<CoreMap>(sentenceBoundaries.size());
          for (IntPair p : sentenceBoundaries) {
            // get the sentence text from the first and last character offsets
            List<IN> sentenceTokens = new ArrayList<IN>(tokens.subList(p.getSource(), p.getTarget() + 1));
            Integer begin = sentenceTokens.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
            int last = sentenceTokens.size() - 1;
            Integer end = sentenceTokens.get(last).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
            String sentenceText = includeText ? join(sentenceTokens, CoreAnnotations.TextAnnotation.class, " ") : null;

            // create a sentence annotation with text and token offsets
            Annotation sentence = new Annotation(sentenceText);
            sentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin);
            sentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, end);
            sentence.set(tokensClass, sentenceTokens);
            sentence.set(CoreAnnotations.TokenBeginAnnotation.class, p.getSource());
            sentence.set(CoreAnnotations.TokenEndAnnotation.class, p.getTarget() + 1);
            int sentenceIndex = sentences.size();
            sentence.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex);

            // add the sentence to the list
            sentences.add(sentence);
          }
          // add the sentences annotations to the document
View Full Code Here

    return parse(tokens, null);
  }

  private Tree parse(List<CoreLabel> tokens,
                     List<ParserConstraint> constraints) {
    CoreMap sent = new Annotation("");
    sent.set(CoreAnnotations.TokensAnnotation.class, tokens);
    sent.set(ParserAnnotations.ConstraintAnnotation.class, constraints);
    Annotation doc = new Annotation("");
    List<CoreMap> sents = new ArrayList<CoreMap>(1);
    sents.add(sent);
    doc.set(CoreAnnotations.SentencesAnnotation.class, sents);
    getParser().annotate(doc);
    sents = doc.get(CoreAnnotations.SentencesAnnotation.class);
    return sents.get(0).get(TreeCoreAnnotations.TreeAnnotation.class);
  }
View Full Code Here

    printConllOutput(document, writer, orderedMentions, gold);
  }

  public static void printConllOutput(Document document, PrintWriter writer, List<List<Mention>> orderedMentions, boolean gold)
  {
    Annotation anno = document.annotation;
    List<List<String[]>> conllDocSentences = document.conllDoc.sentenceWordLists;
    String docID = anno.get(CoreAnnotations.DocIDAnnotation.class);
    StringBuilder sb = new StringBuilder();
    sb.append("#begin document ").append(docID).append("\n");
    List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class);
    for(int sentNum = 0 ; sentNum < sentences.size() ; sentNum++){
      List<CoreLabel> sentence = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class);
      List<String[]> conllSentence = conllDocSentences.get(sentNum);
      Map<Integer,Set<Mention>> mentionBeginOnly = Generics.newHashMap();
      Map<Integer,Set<Mention>> mentionEndOnly = Generics.newHashMap();
View Full Code Here

    if (input.length() > MAXIMUM_QUERY_LENGTH) {
      out.print("<div>This query is too long.  If you want to run very long queries, please download and use our <a href=\"http://nlp.stanford.edu/software/corenlp.shtml\">publicly released distribution</a>.</div>");
      return;
    }
   
    Annotation annotation = new Annotation(input);
    pipeline.annotate(annotation);

    String outputFormat = request.getParameter("outputFormat");
    if (outputFormat == null || outputFormat.trim().equals("")) {
      outputFormat = this.defaultFormat;
View Full Code Here

      if (annotatorType == null) {
        annotatorType = "sutime";
      }
      Annotator timeAnnotator = pipeline.getTimeAnnotator(annotatorType, props);
      if (timeAnnotator != null) {
        Annotation anno = pipeline.process(query, dateString, timeAnnotator);
        out.println("<h3>Annotated Text</h3> <em>(tagged using " + annotatorType + "</em>)");
        displayAnnotation(out, query, anno, includeOffsets);
      } else {
        out.println("<br><br>Error creating annotator for " + annotatorType);
      }
View Full Code Here

    }
  }

  public static Map<String, List<CoreLabel>> runPOSNEROnTokens(List<CoreMap> sentsCM, String posModelPath, boolean useTargetNERRestriction,
      String prefix, boolean useTargetParserParentRestriction, String numThreads) {
    Annotation doc = new Annotation(sentsCM);

    Properties props = new Properties();
    List<String> anns = new ArrayList<String>();
    anns.add("pos");
    anns.add("lemma");

    if (useTargetParserParentRestriction) {
      anns.add("parse");

    }
    if (useTargetNERRestriction) {
      anns.add("ner");
    }

    props.setProperty("annotators", StringUtils.join(anns, ","));
    props.setProperty("parse.maxlen", "80");
    props.setProperty("nthreads", numThreads);
    props.setProperty("threads", numThreads);

    // props.put( "tokenize.options",
    // "ptb3Escaping=false,normalizeParentheses=false,escapeForwardSlashAsterisk=false");

    if (posModelPath != null) {
      props.setProperty("pos.model", posModelPath);
    }
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false);

    Redwood.log(Redwood.DBG, "Annotating text");
    pipeline.annotate(doc);
    Redwood.log(Redwood.DBG, "Done annotating text");

    Map<String, List<CoreLabel>> sents = new HashMap<String, List<CoreLabel>>();

    for (CoreMap s : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
      if (useTargetParserParentRestriction)
        inferParentParseTag(s.get(TreeAnnotation.class));
      sents.put(prefix + s.get(CoreAnnotations.DocIDAnnotation.class), s.get(CoreAnnotations.TokensAnnotation.class));
    }
View Full Code Here

      if (lowercase)
        line = line.toLowerCase();
      text += line+"\n";
    }

    Annotation doc = new Annotation(text);
    pipeline.annotate(doc);
    Redwood.log(Redwood.DBG, "Done annotating text");

    int i = -1;
    for (CoreMap s : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
      i++;
      if (useTargetParserParentRestriction)
        inferParentParseTag(s.get(TreeAnnotation.class));
      sents.put(sentIDPrefix + i, s.get(CoreAnnotations.TokensAnnotation.class));
View Full Code Here

  }

  public Annotation process(String sentence, String dateString, Annotator timeAnnotator)
  {
    System.err.println("Processing text \"" + sentence + "\" with dateString = " + dateString);
    Annotation anno = new Annotation(sentence);
    if (dateString != null && !dateString.equals("")) {
      anno.set(CoreAnnotations.DocDateAnnotation.class, dateString);
    }
    pipeline.annotate(anno);

    timeAnnotator.annotate(anno);
    return anno;
View Full Code Here

    SUTimePipeline pipeline = new SUTimePipeline();
    Annotator timeAnnotator = pipeline.getTimeAnnotator("sutime", new Properties());
    BufferedReader is = new BufferedReader(new InputStreamReader(System.in));
    System.out.print("> ");
    for(String line; (line = is.readLine()) != null; ){
      Annotation ann = pipeline.process(line, null, timeAnnotator);
      System.out.println(ann.get(TimeAnnotations.TimexAnnotations.class));
      System.out.print("> ");
    }
  }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.pipeline.Annotation

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.