Examples of com.digitalpebble.classification.Lexicon

Package com.digitalpebble.classification

Examples of com.digitalpebble.classification.Lexicon

com.digitalpebble.classification.Lexicon
A lexicon contains all the information about the tokens used during learning and ensures that the same mapping is used during classification

  public static void getAttributeScores(String modelPath, String lexiconF,
      int topAttributesNumber) throws IOException {
    // load the model + the lexicon
    // try to see if we can get a list of the best scores from the model
    // works only for liblinear
    Lexicon lexicon = new Lexicon(lexiconF);
    Model liblinearModel = Model.load(new File(modelPath));
    double[] weights = liblinearModel.getFeatureWeights();
    // dump all the weights
    int numClasses = liblinearModel.getNrClass();
    int numFeatures = liblinearModel.getNrFeature();


    Map<Integer, String> invertedAttributeIndex = lexicon
        .getInvertedIndex();


    Map<String, WeightedAttributeQueue> topAttributesPerLabel = new HashMap<String, WeightedAttributeQueue>(
        numClasses);


    for (int i = 0; i < weights.length; i++) {
      // get current class num
      int classNum = i / numFeatures;
      int featNum = i % numFeatures;
      String classLabel = lexicon.getLabel(classNum);
      String attLabel = invertedAttributeIndex.get(featNum + 1);


      // display the values between -0.001 and +0.001 as 0
      if (weights[i] < 0.001 && weights[i] > -0.001)
        weights[i] = 0;

View Full Code Here


    public static void dumpBestAttributes(String raw, String lexiconF)
            throws IOException {
        // load the corpus + the lexicon
        // load the lexicon and the raw file
        Lexicon lexicon = new Lexicon(lexiconF);
        FileTrainingCorpus corpus = new FileTrainingCorpus(new File(raw));
        AttributeScorer scorer = logLikelihoodAttributeScorer.getScorer(corpus,
                lexicon);
    }

View Full Code Here

                .getProperty("compact.attribute.nums"));


        String format = props.getProperty("format");


        // load the lexicon and the raw file
        Lexicon lexicon = new Lexicon(lexiconF);


        String weightingScheme = props.getProperty(
                "classification_weight_scheme", "tfidf");
        WeightingMethod method = WeightingMethod
                .methodFromString(weightingScheme);
        lexicon.setMethod(method);


        // get the raw file
        FileTrainingCorpus ftc = new FileTrainingCorpus(new File(raw));


        int keepNBestAttributes = Integer.parseInt(props.getProperty(
                "keepNBestAttributes", "-1"));


        if (keepNBestAttributes != -1) {
            // double scores[] = logLikelihoodAttributeFilter.getScores(ftc,
            // lexicon);
            // lexicon.setLogLikelihoodRatio(scores);
            // lexicon.keepTopNAttributesLLR(keepNBestAttributes);
            AttributeScorer scorer = logLikelihoodAttributeScorer.getScorer(
                    ftc, lexicon);
            lexicon.setAttributeScorer(scorer);
            lexicon.applyAttributeFilter(scorer, keepNBestAttributes);
        } else {
            // apply the filters on the Lexicon
            int minFreq = Integer.parseInt(props
                    .getProperty("classification_minFreq"));
            int maxFreq = Integer.MAX_VALUE;


            lexicon.pruneTermsDocFreq(minFreq, maxFreq);
        }


        // change the indices of the attributes to remove
        // gaps between them
        Map<Integer, Integer> equiv = null;
        if (compact) {
            // create a new Lexicon object
            equiv = lexicon.compact();
        }


        // save the modified lexicon file
        if (newLexicon != null)
            lexicon.saveToFile(newLexicon);


        // dump a new vector file
        Utils.writeExamples(ftc, lexicon, true, vector_location, equiv, format);
    }

View Full Code Here

  public static void getAttributeScores(String modelPath, String lexiconF,
      int topAttributesNumber) throws IOException {
    // load the model + the lexicon
    // try to see if we can get a list of the best scores from the model
    // works only for liblinear
    Lexicon lexicon = new Lexicon(lexiconF);
    Model liblinearModel = Model.load(new File(modelPath));
    double[] weights = liblinearModel.getFeatureWeights();
    // dump all the weights
    int numClasses = liblinearModel.getNrClass();
    int numFeatures = liblinearModel.getNrFeature();


    Map<Integer, String> invertedAttributeIndex = lexicon
        .getInvertedIndex();


    Map<String, WeightedAttributeQueue> topAttributesPerLabel = new HashMap<String, WeightedAttributeQueue>(
        numClasses);


    // for (int i = 0; i < nr_w; i++) {
    // double contrib = w[(idx - 1) * nr_w + i] * lx.getValue();
    // }
    //
    // idx 1 in class 1 -> 0 x 22 + 0 = 0
    // idx 2 in class 1 -> 1 x 22 + 0 = 22
    // idx 1 in class 2 -> 0 x 22 + 1 = 1
    // idx 2 in class 2 -> 1 x 22 + 1 = 23


    // initialise the queues
    if (topAttributesNumber != -1) {
      for (int classNum = 0; classNum < numClasses; classNum++) {
        String classLabel = lexicon.getLabel(classNum);
        WeightedAttributeQueue queue = new WeightedAttributeQueue(
            topAttributesNumber);
        topAttributesPerLabel.put(classLabel, queue);
      }
    }


    for (int classNum = 0; classNum < numClasses; classNum++) {
      String classLabel = lexicon.getLabel(classNum);
      WeightedAttributeQueue queue = topAttributesPerLabel
          .get(classLabel);
      for (int featNum = 0; featNum < numFeatures; featNum++) {
        int pos = featNum * numClasses + classNum;
        double featWeight = weights[pos];

View Full Code Here


  private boolean cross_validation = false;


  public LibSVMModelCreator(String lexicon_location, String model_location,
      String vectorFile) {
    lexicon = new Lexicon();
    this.model_file_name = model_location;
    this.lexiconLocation = lexicon_location;
    this.vector_location = vectorFile;
  }

View Full Code Here


  private String outputLearner;


  public LibLinearModelCreator(String lexicon_location,
      String model_location, String vector_location) {
    lexicon = new Lexicon();
    this.SVM_Model_location = model_location;
    this.lexiconLocation = lexicon_location;
    this.vector_location = vector_location;


    learner_filename = System.getProperty("liblinear_train",

View Full Code Here

TOP

Related Classes of com.digitalpebble.classification.Lexicon

com.digitalpebble.classification.liblinear.LibLinearModelCreator

com.digitalpebble.classification.libsvm.LibSVMModelCreator

com.digitalpebble.classification.Parameters.WeightingMethod

com.digitalpebble.classification.util.CorpusUtils

com.digitalpebble.classification.util.ModelUtils

java.util.regex.Pattern

java.io.FileReader

java.io.BufferedReader

java.util.TreeMap

java.io.FileWriter

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.