Package com.jgaap.classifiers

Source Code of com.jgaap.classifiers.Xent2

/*
* JGAAP -- a graphical program for stylometric authorship attribution
* Copyright (C) 2009,2011 by Patrick Juola
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
/**
**/
package com.jgaap.classifiers;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import com.jgaap.generics.AnalysisDriver;
import com.jgaap.generics.AnalyzeException;
import com.jgaap.util.Document;
import com.jgaap.util.EventSet;
import com.jgaap.util.EventTrie;
import com.jgaap.util.Pair;

public class Xent2 extends AnalysisDriver {

  private int windowSize;
  private Map<String, EventTrie> eventTries;
  private boolean authorModel;
 
  public Xent2() {
    addParams("model", "Model", "Document", new String[]{"Document","Author"}, false);
    addParams("windowSize", "Window Size", "15", new String[] { "1", "2",
        "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
        "14", "15", "16", "17", "18", "19", "20", "21", "22", "23",
        "24", "25" }, false);
  }

  public String displayName() {
    return "JW Cross Entropy";
  }

  public String tooltipText() {
    return "Juola-Wyner Cross Entropy";
  }

  public boolean showInGUI() {
    return true;
  }

  public double distance(EventTrie eventTrie, Document document) {

    double me = meanEntropy(eventTrie, document);
    double hhat = (Math.log(1.0 * windowSize) / Math.log(2.0)) / me;

    return hhat;
  }

  private double meanEntropy(EventTrie eventTrie, Document document) {

    double totalEntropy = 0;
    int trials = 0;

    for(EventSet eventSet : document.getEventSets().values()){
      for (int i = 0; i < eventSet.size(); i++) {
        totalEntropy += eventTrie.find(window(eventSet, i, windowSize));
        trials++;
      }
    }
    return totalEntropy / trials;
  }

  private EventSet window(EventSet e1, int offset, int windowSize) {
    return e1.subset(offset, offset + windowSize);
  }
 
  private String identifier(Document document){
    return (authorModel? document.getAuthor() : document.getAuthor()+" -"+document.getFilePath());
  }

  @Override
  public void train(List<Document> knownDocuments) throws AnalyzeException {
    windowSize = getParameter("windowSize", 15);
    authorModel = getParameter("model").equalsIgnoreCase("author");
    eventTries = new HashMap<String, EventTrie>();
    for(Document document : knownDocuments){
      EventTrie eventTrie = eventTries.get(identifier(document));
      if(eventTrie == null){
        eventTrie = new EventTrie();
        eventTries.put(identifier(document), eventTrie);
      }
      for(EventSet eventSet : document.getEventSets().values()) {
        for (int i = 0; i < eventSet.size(); i++) {
          EventSet dictionary;
          dictionary = window(eventSet, i, windowSize);
          eventTrie.add(dictionary);
        }
      }
    }
  }

  @Override
  public List<Pair<String, Double>> analyze(Document unknownDocument)
      throws AnalyzeException {
    Set<Entry<String,EventTrie>> entrySet = eventTries.entrySet();
    List<Pair<String, Double>> results = new ArrayList<Pair<String,Double>>(entrySet.size());
    for(Entry<String,EventTrie> entry : entrySet){
      results.add(new Pair<String, Double>(entry.getKey(), distance(entry.getValue(), unknownDocument), 2));
    }
    Collections.sort(results);
    return results;
  }
}
TOP

Related Classes of com.jgaap.classifiers.Xent2

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.