Package edu.stanford.nlp.tagger.maxent

Examples of edu.stanford.nlp.tagger.maxent.MaxentTagger


  void setTagger() throws Exception
  {
    if(Dictionary.tagger==null)
    {
      if(this.language.equals("en"))
        Dictionary.tagger=new MaxentTagger("./data/taggermodels/"+this.language+"/english-left3words-distsim.tagger");
      if(this.language.equals("es"))
      {
        //TODO
      }
       
View Full Code Here


    preprocessor.setSentenceDelimiter(config.sentenceDelimiter);
    preprocessor.setTokenizerFactory(config.tlp.getTokenizerFactory());

    Timing timer = new Timing();

    MaxentTagger tagger = new MaxentTagger(config.tagger);
    List<List<TaggedWord>> tagged = new ArrayList<>();
    for (List<HasWord> sentence : preprocessor) {
      tagged.add(tagger.tagSentence(sentence));
    }

    System.err.printf("Tagging completed in %.2f sec.%n",
        timer.stop() / 1000.0);
View Full Code Here

      }
    }

    String text = "I can almost always tell when movies use fake dinosaurs.";

    MaxentTagger tagger = new MaxentTagger(taggerPath);
    DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);

    DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
    for (List<HasWord> sentence : tokenizer) {
      List<TaggedWord> tagged = tagger.tagSentence(sentence);
      GrammaticalStructure gs = parser.predict(tagged);

      // Print typed dependencies
      System.err.println(gs);
    }
View Full Code Here

      }
    }

    String text = "My dog likes to shake his stuffed chickadee toy.";

    MaxentTagger tagger = new MaxentTagger(taggerPath);
    ShiftReduceParser model = ShiftReduceParser.loadModel(modelPath);

    DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
    for (List<HasWord> sentence : tokenizer) {
      List<TaggedWord> tagged = tagger.tagSentence(sentence);
      Tree tree = model.apply(tagged);
      System.err.println(tree);
    }
  }
View Full Code Here

        throw new IllegalArgumentException("Unknown argument: " + args[argIndex]);
      }
    }

    LexicalizedParser parser = LexicalizedParser.loadModel(inputFile);
    MaxentTagger tagger = new MaxentTagger(taggerFile);
    parser.reranker = new TaggerReranker(tagger, parser.getOp());
    parser.saveParserToSerialized(outputFile);
  }
View Full Code Here

    Timing timer = null;
    if (verbose) {
      timer = new Timing();
      timer.doing("Loading POS Model [" + loc + ']');
    }
    MaxentTagger tagger = new MaxentTagger(loc);
    if (verbose) {
      timer.done();
    }
    return tagger;
  }
View Full Code Here

      System.exit(-1);
    }
    try {
      // Load MaxentTagger, which is threadsafe
      String modelFile = args[0];
      final MaxentTagger tagger = new MaxentTagger(modelFile);

      // Configure to run with 4 worker threads
      int nThreads = 4;
      MulticoreWrapper<String,String> wrapper =
          new MulticoreWrapper<String,String>(nThreads,
              new ThreadsafeProcessor<String,String>() {
                @Override
                public String process(String input) {
                  return tagger.tagString(input);
                }
                @Override
                public ThreadsafeProcessor<String, String> newInstance() {
                  // MaxentTagger is threadsafe
                  return this;
View Full Code Here

  public static void main(String[] args) throws Exception {
    if (args.length != 2) {
      System.err.println("usage: java TaggerDemo2 modelFile fileToTag");
      return;
    }
    MaxentTagger tagger = new MaxentTagger(args[0]);
    TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
                     "untokenizable=noneKeep");
    BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
    PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
    DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
    documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
    for (List<HasWord> sentence : documentPreprocessor) {
      List<TaggedWord> tSentence = tagger.tagSentence(sentence);
      pw.println(Sentence.listToString(tSentence, false));
    }

    // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
    List<HasWord> sent = Sentence.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
    List<TaggedWord> taggedSent = tagger.tagSentence(sent);
    for (TaggedWord tw : taggedSent) {
      if (tw.tag().startsWith("JJ")) {
        pw.println(tw.word());
      }
    }
View Full Code Here

  public static void main(String[] args) throws Exception {
    if (args.length != 2) {
      System.err.println("usage: java TaggerDemo modelFile fileToTag");
      return;
    }
    MaxentTagger tagger = new MaxentTagger(args[0]);
    List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader(args[1])));
    for (List<HasWord> sentence : sentences) {
      List<TaggedWord> tSentence = tagger.tagSentence(sentence);
      System.out.println(Sentence.listToString(tSentence, false));
    }
  }
View Full Code Here

  public void testEnglishTagSet() {
    LexicalizedParser lp = LexicalizedParser.loadModel(englishParsers[0]);
    Set<String> tagSet = lp.getLexicon().tagSet(lp.treebankLanguagePack().getBasicCategoryFunction());
    for (String name : englishTaggers) {
      MaxentTagger tagger = new MaxentTagger(name);
      assertEquals("English PCFG parser/" + name + " tag set mismatch", tagSet, tagger.tagSet());
    }
    for (String name : englishParsers) {
      LexicalizedParser lp2 = LexicalizedParser.loadModel(name);
      assertEquals("English PCFG parser/" + name + " tag set mismatch",
                   tagSet, lp2.getLexicon().tagSet(lp.treebankLanguagePack().getBasicCategoryFunction()));
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.tagger.maxent.MaxentTagger

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.