Examples of opennlp.ccg.parse.supertagger.ml.STFex

Package opennlp.ccg.parse.supertagger.ml

Examples of opennlp.ccg.parse.supertagger.ml.STFex

opennlp.ccg.parse.supertagger.ml.STFex
@author Dennis N. Mehay @version $Revision: 1.7 $, $Date: 2010/12/15 07:16:03 $

    }


    public void useWordDict(boolean useIt) { useWordDict = useIt; }
    public void usePOSDict(boolean useIt) { usePOSDict = useIt; }
    public WordAndPOSDictionaryLabellingStrategy(STTaggerWordDictionary wd, STTaggerPOSDictionary pd, int K, MaxentModel mo) {
        this(wd, pd, K, mo, new STFex());
    }

View Full Code Here

            try { priorM = new STPriorModel(priorModS, priorVocabS); } 
            catch (IOException ex) {
                Logger.getLogger(WordAndPOSDictionaryLabellingStrategy.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        STFex fex = new STFex(priorM);
        STTaggerWordDictionary wD = (wDictS != null) ? new XMLWordDictionaryReader(new File(wDictS)).read() : null;        
        STTaggerPOSDictionary pD = (pDictS != null) ? new XMLPOSDictionaryReader(new File(pDictS)).read() : null;
        int kay = (opts.get("firstk") == null) ? 20 : Integer.parseInt(opts.get("firstk")), firstK, lastK;
        firstK = (opts.get("firstk") == null) ? 20 : Integer.parseInt(opts.get("firstk"));
        lastK = (opts.get("lastk") == null) ? 100 : Integer.parseInt(opts.get("lastk"));

View Full Code Here

              WordAndPOSDictionaryLabellingStrategy tagger = new WordAndPOSDictionaryLabellingStrategy(
                      wd,
                      pd, 
                      (options.has("K") ? options.valueOf(kspec).intValue() : 20), 
                      maxentModel = new ZLMEM(options.valueOf(modspec)),
                      new STFex(stPrior),
                      seqMod,
                      alg);
              
              tagger.setMaxSearchBeam(fbWidth);
              maxentModel.verbose = true;
              
              Iterator<List<Word>> corpus = null;
              Iterator<List<Word>> goldCorpus = null;
              
              if(options.valueOf(tokenisation).equalsIgnoreCase("srilm")) {
                  corpus = new SRILMFactoredBundleCorpusIterator(new BufferedReader(new FileReader(options.valueOf(inputspec))));
              } else if(options.valueOf(tokenisation).equalsIgnoreCase("candc")) {
                  corpus = new PipeDelimitedFactoredBundleCorpusIterator(new BufferedReader(new FileReader(options.valueOf(inputspec))));
              }
              if(options.has("test") && options.valueOf(tokenisation).equalsIgnoreCase("srilm")) {
                  goldCorpus = new SRILMFactoredBundleCorpusIterator(new BufferedReader(new FileReader(options.valueOf(goldstandspec))));
              } else if(options.has("test") && options.valueOf(tokenisation).equalsIgnoreCase("candc")) {
                  goldCorpus = new PipeDelimitedFactoredBundleCorpusIterator(new BufferedReader(new FileReader(options.valueOf(goldstandspec))));
              }
              
              BufferedWriter outf = new BufferedWriter(new FileWriter(options.valueOf(outputspec)));
              
              boolean test = options.has("test");
              
              ResultSink results = new ResultSink();
              int sentCnt = 0;
              
              tagger.setBetas(new double[] {beta});
              
              while(corpus.hasNext()) {
                  sentCnt++;
                  List<Word> sent = corpus.next();
                  
                  List<List<Pair<Double,String>>> taggings = tagger.multitag(sent, beta); 
                  
                  if(test) {
                      List<Word> goldsent = goldCorpus.next();
                      results.addSent(taggings, goldsent);
                  }                  
                  
                  Iterator<Word> sentiter = sent.iterator(); 
                  // output file format = word goldtag tag1 ... tagK                  
                  outf.write("<s>"+System.getProperty("line.separator"));
                  for(List<Pair<Double,String>> tagging : taggings) {                      
                      Word nextw = sentiter.next();
                      outf.write(nextw.getForm() + "\t1\t" + nextw.getPOS() + "\t1.0\t" + tagging.size() + "\t");// + nextw.getSupertag() + " ");
                      //outf.write(nextw.getForm() + "|||"+ nextw.getStem() + "|||" + nextw.getPOS() + "|||");
                      String tags = "";
                      for(Pair<Double,String> tg : tagging) {
                          //tags+="^"+tg.b+":"+tg.a;
                          tags+= "\t" + tg.b + "\t"+tg.a;
                      }
                      // write out the multitagging, minus the initial space (tab).
                      outf.write(tags.substring(1) + System.getProperty("line.separator"));
                      
                      //// write out the multitagging, minus the initial ^.
                      //outf.write(tags.substring(1) + " ");
                  }                  
                                
                  outf.write("</s>"+System.getProperty("line.separator"));
                  if(sentCnt % 10 == 0) {
                      outf.flush();
                  }
              }
              outf.flush();
              outf.close();
              if(test) {
                  System.err.println(results.report());
              }
              long end = System.currentTimeMillis();
              System.err.println("Time to tag: " + ((end - start + 0.0)/1000) + " seconds.");
              
            } else if (options.has("tagdictextract")) {
              // extract tagging dictionaries.
              File wd = options.valueOf(wdictspec);
              File pd = options.valueOf(pdictspec);
              File inf = options.valueOf(inputspec);
              TaggingDictionaryExtractor tde = new TaggingDictionaryExtractor(inf,wd,pd,options.valueOf(tokenisation));
              System.err.println("Extracting dictionaries from: "+inf.toString()+" into files: "+wd.toString()+" and: "+pd.toString()+"\n(wdict and posdict, resp.).");
              tde.extract();
            } else {
                // train (extract features).        
                File inf = options.valueOf(inputspec);
                File outf = options.valueOf(outputspec);
                FeatureExtractor fexer = (stPrior == null) ? new STFex() : new STFex(stPrior);
                ZhangLeTrainingExtractor fexApp = new ZhangLeTrainingExtractor(inf, outf, options.valueOf(tokenisation), fexer);
                System.err.println("Extracting features from file: " + inf.toString() + ", and placing extracted features in: " + outf.toString() + ".");
                fexApp.writeFeats();
            }

View Full Code Here

     * path to the input file of SRILM-compliant factored bundles.
     * @param outputFileName A <code>String</code> giving the complete
     * path to the output file where the features will be written.
     */
    public ZhangLeTrainingExtractor(File corpus, File outputF, String tokenisation) {
        this(corpus, outputF, tokenisation, new STFex());
    }

View Full Code Here

TOP

Related Classes of opennlp.ccg.parse.supertagger.ml.STFex

opennlp.ccg.parse.postagger.POSTagger

opennlp.ccg.parse.supertagger.JavaSupertaggingApp

opennlp.ccg.parse.supertagger.ml.ZhangLeTrainingExtractor

opennlp.ccg.parse.supertagger.WordAndPOSDictionaryLabellingStrategy

opennlp.ccg.parse.tagger.io.SRILMFactoredBundleCorpusIterator

opennlp.ccg.parse.tagger.TaggedWord

java.io.InputStreamReader

java.io.BufferedReader

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.