Examples of edu.stanford.nlp.process.Morphology

edu.stanford.nlp.process.Morphology
nformatics.susx.ac.uk/research/nlp/carroll/morph.html . There are several ways of invoking Morphology. One is by calling the static methods WordTag stemStatic(String word, String tag) or WordTag stemStatic(WordTag wordTag). If we have created a Morphology object already we can use the methods WordTag stem(String word, string tag) or WordTag stem(WordTag wordTag).
Another way of using Morphology is to run it on an input file by running java Morphology filename. In this case, POS tags MUST be separated from words by an underscore ("_").
Note that a single instance of Morphology is not thread-safe, as the underlying lexer object is not built to be re-entrant. One thing that you can do to get around this is build a new Morphology object for each set of calls to the Morphology. For example, the MorphaAnnotator builds a Morphology for each document it annotates. The other approach is to use the synchronized methods in this class.
@author Kristina Toutanova (kristina@cs.stanford.edu) @author Christopher Manning

    int numWords = 0;
    int numSentences = 0;


    boolean outputVerbosity = config.getOutputVerbosity();
    boolean outputLemmas = config.getOutputLemmas();
    Morphology morpha = (outputLemmas) ? new Morphology() : null;


    if (outputStyle == OutputStyle.XML ||
        outputStyle == OutputStyle.INLINE_XML) {
      writer.write("<?xml version=\"1.0\" encoding=\"" +
                   config.getEncoding() + "\"?>\n");

View Full Code Here

        numWords += taggedSentence.size();
        outputTaggedSentence(taggedSentence, outputLemmas, outputStyle, outputVerbosity, numSentences, "\n", writer);
        numSentences++;
      }
    } else {
      Morphology morpha = (outputLemmas) ? new Morphology() : null;
      for (List<X> sentence : document) {
        numWords += sentence.size();


        tagAndOutputSentence(sentence, outputLemmas, morpha, outputStyle,
                             outputVerbosity, numSentences, "\n", writer);

View Full Code Here

      if (outputLemmas) {
        // We may want to lemmatize things without using an existing
        // Morphology object, as Morphology objects are not
        // thread-safe, so we would make a new one here
        if (morpha == null) {
          morpha = new Morphology();
        }
        lemmatize(coreLabels, morpha);
      }
      return coreLabels;
    } else {

View Full Code Here

  public WordStemmer() { }


  public void visitTree(Tree t) {
    // A single Morphology is not threadsafe, so to make this class
    // threadsafe, we have to create a new Morphology for each visit
    processTree(t, null, new Morphology());
  }

View Full Code Here

import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.process.Morphology;


public class TreeLemmatizer implements TreeTransformer {
  public Tree transformTree(Tree t) {
    Morphology morphology = new Morphology();


    List<TaggedWord> tagged = null;
    int index = 0;


    for (Tree leaf : t.getLeaves()) {
      Label label = leaf.label();
      if (label == null) {
        continue;
      }


      String tag;
      if (!(label instanceof HasTag) || ((HasTag) label).tag() == null) {
        if (tagged == null) {
          tagged = t.taggedYield();
        }
        tag = tagged.get(index).tag();
      } else {
        tag = ((HasTag) label).tag();
      }


      if (!(label instanceof HasLemma)) {
        throw new IllegalArgumentException("Got a tree with labels which do not support lemma");
      }
      ((HasLemma) label).setLemma(morphology.lemma(label.value(), tag, true));
      ++index;
    }
    return t;
  }

View Full Code Here

      }


      outputStyle = OutputStyle.fromShortName(config.getOutputFormat());
      outputVerbosity = config.getOutputVerbosity();
      outputLemmas = config.getOutputLemmas();
      morpha = (outputLemmas) ? new Morphology() : null;
      tokenize = config.getTokenize();
      tagSeparator = config.getTagSeparator();
    }

View Full Code Here

          List<? extends HasWord> taggedSentence = wrapper.poll();
          tagger.outputTaggedSentence(taggedSentence, outputLemmas, outputStyle, outputVerbosity, sentNum++, " ", taggedResults);
        }
      } else {
        for (List<? extends HasWord> sent : sentences) {
          Morphology morpha = (outputLemmas) ? new Morphology() : null;
          sent = tagger.tagCoreLabelsOrHasWords(sent, morpha, outputLemmas);
          tagger.outputTaggedSentence(sent, outputLemmas, outputStyle, outputVerbosity, sentNum++, " ", taggedResults);
        }
      }
      return taggedResults.toString();

View Full Code Here

      tagged = tagger.apply(tokens);
    } else {
      Tree tree = parse(tokens);
      tagged = tree.taggedYield();
    }
    Morphology morpha = new Morphology();
    List<CoreLabel> lemmas = Generics.newArrayList();
    for (TaggedWord token : tagged) {
      CoreLabel label = new CoreLabel();
      label.setWord(token.word());
      label.setTag(token.tag());
      morpha.stem(label);
      lemmas.add(label);
    }
    return lemmas;
  }

View Full Code Here

    assertEquals("hunt", stemStatic(hunt).word());
    assertEquals("hunt", lemmatizeStatic(hunt).lemma());
  }


  public void testDash() {
    Morphology morpha = new Morphology();
    morpha.stem("b-");
  }

View Full Code Here


  public void annotate(Annotation annotation) {
    if (VERBOSE) {
      System.err.print("Finding lemmas ...");
    }
    Morphology morphology = new Morphology();
    if (annotation.has(CoreAnnotations.SentencesAnnotation.class)) {
      for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
        List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
        //System.err.println("Lemmatizing sentence: " + tokens);
        for (CoreLabel token : tokens) {

View Full Code Here

0 1

TOP

Related Classes of edu.stanford.nlp.process.Morphology

com.music.service.text.TimelineToMusicService

edu.stanford.nlp.ling.Word

edu.stanford.nlp.ling.WordLemmaTag

edu.stanford.nlp.ling.WordTag

edu.stanford.nlp.parser.common.ParserGrammar

edu.stanford.nlp.pipeline.MorphaAnnotator

edu.stanford.nlp.process.MorphologyTest

edu.stanford.nlp.tagger.maxent.MaxentTagger

edu.stanford.nlp.tagger.maxent.MaxentTagger$TaggerWrapper

edu.stanford.nlp.trees.TreeLemmatizer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.