Examples of Corpus


Examples of joshua.corpus.Corpus

    FormatUtil.useUTF8();
 
    try {
     
      Vocabulary symbolTable;
      Corpus corpusArray;
      Suffixes suffixArray;
     
      logger.fine("Constructing vocabulary from file " + corpusFileName);
      symbolTable = new Vocabulary();
      int[] lengths = Vocabulary.initializeVocabulary(corpusFileName, symbolTable, true);
View Full Code Here

Examples of org.apache.stanbol.enhancer.engines.lucenefstlinking.TaggingSession.Corpus

        }
        long taggingStart = System.currentTimeMillis();
        final NavigableMap<int[],Tag> tags = new TreeMap<int[],Tag>(Tag.SPAN_COMPARATOR);
        try {
            //process the language of the document
            Corpus corpus = null;
            if(session.getLanguageCorpus() != null){
                corpus = session.getLanguageCorpus();
                long t = System.currentTimeMillis();
                int d = tag(at, session,corpus,tags);
                log.debug(" - {}: fst: {}ms (callback: {}ms)", new Object[]{
                        corpus.getIndexedField(), System.currentTimeMillis()-t, d
                });
            }
            if(session.getDefaultCorpus() != null){
                if(corpus == null){
                    corpus = session.getDefaultCorpus();
View Full Code Here

Examples of syntaxLearner.corpus.Corpus

   
  }
 
  private static void testCorpus(String name, String inFolder, String outFolder, String clusters, String threshold, String epsilon){
    Learner l = new Learner(Integer.parseInt(clusters),Integer.parseInt(threshold),Double.parseDouble(epsilon));
    Corpus c = new Corpus(name,l);
   
    Calendar cal = Calendar.getInstance();
      SimpleDateFormat sdf = new SimpleDateFormat("dd.MM.yyyy HH-mm-ss");
    String outName = String.format("%1$s %2$s", name, sdf.format(cal.getTime()));
    File f1 = new File(inFolder);
View Full Code Here

Examples of uk.ac.cam.ha293.tweetlabel.types.Corpus

   
    //Configure and run LDA
    Tools.configure(stem);

    //TODO: Flexibility in Corpus choice - make automatic?
    Corpus corpus = null;
    if(stem) corpus = Corpus.load("allprofiles-stemmed");
    else corpus = Corpus.load("allprofiles-unstemmed");
   
    //Check for model existence
    LDATopicModel lda = null;
View Full Code Here

Examples of uk.ac.cam.ha293.tweetlabel.types.Corpus

   
    //Configure and run LDA
    Tools.configure(stem);

    //TODO: Flexibility in Corpus choice - make automatic?
    Corpus corpus = null;
    if(stem) corpus = Corpus.loadLabelled(topicType, "allprofiles-stemmed");
    else corpus = Corpus.loadLabelled(topicType, "allprofiles-unstemmed");
   
    //Check for model existence
    LLDATopicModel llda = null;
View Full Code Here

Examples of uk.ac.cam.ha293.tweetlabel.types.Corpus

  }
 
  public static void lldaStuff() {
    //Corpus corpus = Corpus.loadLabelled("alchemy","allprofiles-unstemmed-alchemy-top3");
    //Corpus corpus = Corpus.loadLabelled("calais","allprofiles-unstemmed-calais-top3");
    Corpus corpus = Corpus.loadLabelled("textwise","allprofiles-unstemmed-textwise-top3");
    corpus.removeLeastCommonWords(10,1);
    Set<Double> alphaSet = new HashSet<Double>();
    alphaSet.add(0.25);
    alphaSet.add(0.5);
    alphaSet.add(0.75);
    alphaSet.add(1.00);
View Full Code Here

Examples of uk.ac.cam.ha293.tweetlabel.types.Corpus

      threadNum++;
      final double alph=al;
      Thread thread = new Thread(){
        public void run() {
          System.out.println("THREAD: "+"Running for alpha="+alph);
          Corpus corpus = Corpus.loadLabelled("textwiseproper", "allprofiles-unstemmed-textwiseproper-top3");
          LLDATopicModel llda = new LLDATopicModel(corpus,1000,100,0,alph,0.01,fThread);
          llda.runQuickCVGibbsSampling(0);
        }
      };
      thread.start();
View Full Code Here

Examples of uk.ac.cam.ha293.tweetlabel.types.Corpus

      System.out.println("THREAD "+threadNum+": Starting up");
      final int iReduction = reduction;
      final double fReduction = reduction/10.0;
      String[] topicTypes = {"alchemy","calais","textwiseproper"};
      for(String topicType : topicTypes) {
        Corpus corpus = Corpus.loadLabelled(topicType, "allprofiles-unstemmed-"+topicType+"-top3");
        final Corpus fCorpus = corpus.randomlyRemove(fReduction);
        System.out.println(fCorpus.size());
        final int fThread=threadNum;
        threadNum++;
        Thread thread = new Thread(){
          public void run() {
            double[] alphas = {0.25,0.5,0.75,1.0,1.25,1.5,1.75,2.0};
View Full Code Here

Examples of uk.ac.cam.ha293.tweetlabel.types.Corpus

      System.out.println("THREAD "+threadNum+": Starting up");
      final int iReduction = reduction;
      final double fReduction = reductions[reduction-1];
      String[] topicTypes = {"textwiseproper"};
      for(String topicType : topicTypes) {
        final Corpus fCorpus = Corpus.loadLabelled(topicType, "allprofiles-unstemmed-"+fReduction+"-tweets-top3");
        final int fThread=threadNum;
        threadNum++;
        Thread thread = new Thread(){
          public void run() {
            double[] alphas = {0.25,0.5,0.75,1.0,1.25,1.5,1.75,2.0};
View Full Code Here

Examples of uk.ac.cam.ha293.tweetlabel.types.Corpus

    for(int reduction=1; reduction <=9; reduction++) {
      System.out.println("THREAD "+threadNum+": Starting up");
      final int iReduction = reduction;
      final double fReduction = reductions[reduction-1];
      //final Corpus fCorpus = Corpus.loadLabelled(topicType, "allprofiles-unstemmed-"+fReduction+"-tweets-top3");
      final Corpus fCorpus = Corpus.load("allprofiles-unstemmed");
      fCorpus.randomlyRemove(fReduction);
      final int fThread=threadNum;
      threadNum++;
      Thread thread = new Thread(){
        public void run() {
          double[] alphas = {0.25,0.5,0.75,1.0,1.25,1.5,1.75,2.0};
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.