Package opennlp.ccg.parse.supertagger.ml

Examples of opennlp.ccg.parse.supertagger.ml.STFex


    }

    public void useWordDict(boolean useIt) { useWordDict = useIt; }
    public void usePOSDict(boolean useIt) { usePOSDict = useIt; }
    public WordAndPOSDictionaryLabellingStrategy(STTaggerWordDictionary wd, STTaggerPOSDictionary pd, int K, MaxentModel mo) {
        this(wd, pd, K, mo, new STFex());
    }
View Full Code Here


            try { priorM = new STPriorModel(priorModS, priorVocabS); }
            catch (IOException ex) {
                Logger.getLogger(WordAndPOSDictionaryLabellingStrategy.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        STFex fex = new STFex(priorM);
        STTaggerWordDictionary wD = (wDictS != null) ? new XMLWordDictionaryReader(new File(wDictS)).read() : null;       
        STTaggerPOSDictionary pD = (pDictS != null) ? new XMLPOSDictionaryReader(new File(pDictS)).read() : null;
        int kay = (opts.get("firstk") == null) ? 20 : Integer.parseInt(opts.get("firstk")), firstK, lastK;
        firstK = (opts.get("firstk") == null) ? 20 : Integer.parseInt(opts.get("firstk"));
        lastK = (opts.get("lastk") == null) ? 100 : Integer.parseInt(opts.get("lastk"));
View Full Code Here

              WordAndPOSDictionaryLabellingStrategy tagger = new WordAndPOSDictionaryLabellingStrategy(
                      wd,
                      pd,
                      (options.has("K") ? options.valueOf(kspec).intValue() : 20),
                      maxentModel = new ZLMEM(options.valueOf(modspec)),
                      new STFex(stPrior),
                      seqMod,
                      alg);
             
              tagger.setMaxSearchBeam(fbWidth);
              maxentModel.verbose = true;
             
              Iterator<List<Word>> corpus = null;
              Iterator<List<Word>> goldCorpus = null;
             
              if(options.valueOf(tokenisation).equalsIgnoreCase("srilm")) {
                  corpus = new SRILMFactoredBundleCorpusIterator(new BufferedReader(new FileReader(options.valueOf(inputspec))));
              } else if(options.valueOf(tokenisation).equalsIgnoreCase("candc")) {
                  corpus = new PipeDelimitedFactoredBundleCorpusIterator(new BufferedReader(new FileReader(options.valueOf(inputspec))));
              }
              if(options.has("test") && options.valueOf(tokenisation).equalsIgnoreCase("srilm")) {
                  goldCorpus = new SRILMFactoredBundleCorpusIterator(new BufferedReader(new FileReader(options.valueOf(goldstandspec))));
              } else if(options.has("test") && options.valueOf(tokenisation).equalsIgnoreCase("candc")) {
                  goldCorpus = new PipeDelimitedFactoredBundleCorpusIterator(new BufferedReader(new FileReader(options.valueOf(goldstandspec))));
              }
             
              BufferedWriter outf = new BufferedWriter(new FileWriter(options.valueOf(outputspec)));
             
              boolean test = options.has("test");
             
              ResultSink results = new ResultSink();
              int sentCnt = 0;
             
              tagger.setBetas(new double[] {beta});
             
              while(corpus.hasNext()) {
                  sentCnt++;
                  List<Word> sent = corpus.next();
                 
                  List<List<Pair<Double,String>>> taggings = tagger.multitag(sent, beta);
                 
                  if(test) {
                      List<Word> goldsent = goldCorpus.next();
                      results.addSent(taggings, goldsent);
                  }                 
                 
                  Iterator<Word> sentiter = sent.iterator();
                  // output file format = word goldtag tag1 ... tagK                 
                  outf.write("<s>"+System.getProperty("line.separator"));
                  for(List<Pair<Double,String>> tagging : taggings) {                     
                      Word nextw = sentiter.next();
                      outf.write(nextw.getForm() + "\t1\t" + nextw.getPOS() + "\t1.0\t" + tagging.size() + "\t");// + nextw.getSupertag() + " ");
                      //outf.write(nextw.getForm() + "|||"+ nextw.getStem() + "|||" + nextw.getPOS() + "|||");
                      String tags = "";
                      for(Pair<Double,String> tg : tagging) {
                          //tags+="^"+tg.b+":"+tg.a;
                          tags+= "\t" + tg.b + "\t"+tg.a;
                      }
                      // write out the multitagging, minus the initial space (tab).
                      outf.write(tags.substring(1) + System.getProperty("line.separator"));
                     
                      //// write out the multitagging, minus the initial ^.
                      //outf.write(tags.substring(1) + " ");
                  }                 
                               
                  outf.write("</s>"+System.getProperty("line.separator"));
                  if(sentCnt % 10 == 0) {
                      outf.flush();
                  }
              }
              outf.flush();
              outf.close();
              if(test) {
                  System.err.println(results.report());
              }
              long end = System.currentTimeMillis();
              System.err.println("Time to tag: " + ((end - start + 0.0)/1000) + " seconds.");
             
            } else if (options.has("tagdictextract")) {
              // extract tagging dictionaries.
              File wd = options.valueOf(wdictspec);
              File pd = options.valueOf(pdictspec);
              File inf = options.valueOf(inputspec);
              TaggingDictionaryExtractor tde = new TaggingDictionaryExtractor(inf,wd,pd,options.valueOf(tokenisation));
              System.err.println("Extracting dictionaries from: "+inf.toString()+" into files: "+wd.toString()+" and: "+pd.toString()+"\n(wdict and posdict, resp.).");
              tde.extract();
            } else {
                // train (extract features).       
                File inf = options.valueOf(inputspec);
                File outf = options.valueOf(outputspec);
                FeatureExtractor fexer = (stPrior == null) ? new STFex() : new STFex(stPrior);
                ZhangLeTrainingExtractor fexApp = new ZhangLeTrainingExtractor(inf, outf, options.valueOf(tokenisation), fexer);
                System.err.println("Extracting features from file: " + inf.toString() + ", and placing extracted features in: " + outf.toString() + ".");
                fexApp.writeFeats();
            }
View Full Code Here

     * path to the input file of SRILM-compliant factored bundles.
     * @param outputFileName A <code>String</code> giving the complete
     * path to the output file where the features will be written.
     */
    public ZhangLeTrainingExtractor(File corpus, File outputF, String tokenisation) {
        this(corpus, outputF, tokenisation, new STFex());
    }
View Full Code Here

TOP

Related Classes of opennlp.ccg.parse.supertagger.ml.STFex

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.