Package com.digitalpebble.classification

Examples of com.digitalpebble.classification.RAMTrainingCorpus


    }   
    Learner creator = Learner.getLearner("./temp",Learner.LibSVMModelCreator,true);
    creator.setParameters(parameters);
    creator.setMethod(Parameters.WeightingMethod.BOOLEAN);

    RAMTrainingCorpus subjectiveCorpus = getCorpus(fileSubj,"subjective",creator);
    RAMTrainingCorpus objectiveCorpus = getCorpus(fileObj,"objective",creator);
    subjectiveCorpus.addAll(objectiveCorpus);
   
    // filter some of the attributes out based on their LLR score
    creator.keepTopNAttributesLLR(2000);
   
View Full Code Here


  
  private RAMTrainingCorpus getCorpus(String file, String label, Object operator) throws IOException{
    File original = new File(file);
    BufferedReader reader = new BufferedReader(new FileReader(original));
    String line;
    RAMTrainingCorpus corpusList = new RAMTrainingCorpus();
    while ((line=reader.readLine())!=null)
    {
      String[] tokens = line.split("\\W");
      // lower case
      for (int i=0;i<tokens.length;i++){
        tokens[i]=tokens[i].toLowerCase();
      }     
     
      Document doc = null;
      if (operator instanceof Learner)
      doc = ((Learner)operator).createDocument(tokens,label);
      else
        doc = ((TextClassifier)operator).createDocument(tokens);
      corpusList.add(doc);
    }
    reader.close();
    return corpusList;
  }
View Full Code Here

    fields3[2] = new Field("title", new String[] { "some", "different",
        "content" });
    learner.setMethod(Parameters.WeightingMethod.TFIDF);
    Document doc3 = learner.createDocument(fields3, "small");

    RAMTrainingCorpus corpus = new RAMTrainingCorpus();
    corpus.add(doc);
    corpus.add(doc2);
    learner.learn(corpus);

    TextClassifier classi = TextClassifier.getClassifier(tempFile);
    double[] scores = classi.classify(doc);
    assertEquals("large", classi.getBestLabel(scores));
View Full Code Here

        "this", "will", "have", "a", "small", "value" };
    Document doc2 = learner.createDocument(simplecontent, "small");

    // com.digitalpebble.classification.TrainingCorpus tc =
    // learner.getFileTrainingCorpus();
    com.digitalpebble.classification.TrainingCorpus tc = new RAMTrainingCorpus();
    tc.addDocument(doc);
    tc.addDocument(doc2);
    tc.close();

    int numDoc = 0;

    Iterator iter = tc.iterator();
    while (iter.hasNext()) {
      numDoc++;
      iter.next();
    }

    assertEquals(2, numDoc);

    tc = learner.getFileTrainingCorpus();
    tc.addDocument(doc);
    tc.addDocument(doc2);
    tc.close();

    numDoc = 0;

    iter = tc.iterator();
    while (iter.hasNext()) {
      numDoc++;
      iter.next();
    }
View Full Code Here

    evaluateWeightingSchemes(Parameters.WeightingMethod.TFIDF);
  }

  private void evaluateWeightingSchemes(WeightingMethod method) {

    RAMTrainingCorpus corpus = new RAMTrainingCorpus();

    learner.setMethod(method);

    for (String[] content : docs) {
      Document doc = learner.createDocument(content);
      corpus.add(doc);
    }

    Iterator<Document> corpusIter = corpus.iterator();

    Map<Integer, String> invertedIndex = learner.getLexicon()
        .getInvertedIndex();

    List<Map> expectedset = references.get(method);

    // check that we have the same number of docs in the corpus
    // and in the ref

    assertEquals(expectedset.size(), corpus.size());

    for (Map<String, Double> ref : expectedset) {
      Document doc = corpusIter.next();
      Vector vector = doc.getFeatureVector(learner.getLexicon());
      // now let's compare what we wanted to have with the content of the
View Full Code Here

TOP

Related Classes of com.digitalpebble.classification.RAMTrainingCorpus

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.