Package com.jgaap.classifiers

Source Code of com.jgaap.classifiers.BurrowsDelta

package com.jgaap.classifiers;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map.Entry;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.ImmutableSet;
import com.jgaap.backend.Utils;
import com.jgaap.generics.AnalysisDriver;
import com.jgaap.util.Document;
import com.jgaap.util.Event;
import com.jgaap.util.EventMap;
import com.jgaap.util.Pair;

public class BurrowsDelta extends AnalysisDriver {

  private ImmutableSet<Event> events;
  private ImmutableMultimap<String, EventMap> knownHistograms;
  private ImmutableMap<String,EventMap> knownCentroids;
  private boolean useCentroid;
  private ImmutableMap<Event, Double> eventStddev;

  public BurrowsDelta() {
    addParams("centroid", "Centroid Model", "false", new String[] { "true", "false" }, false);
  }

  @Override
  public String displayName() {
    return "Burrows Delta";
  }

  @Override
  public String tooltipText() {
    return "Burrow's Delta with Argamon's Formula";
  }

  @Override
  public boolean showInGUI() {
    return true;
  }

  public void train(List<Document> knowns) {
    useCentroid = "true".equalsIgnoreCase(getParameter("centroid"));
    ImmutableSet.Builder<Event> eventsBuilder = ImmutableSet.builder();
    ImmutableMultimap.Builder<String, EventMap> knownHistogramsBuilder = ImmutableMultimap.builder();
    for (Document known : knowns) {
      EventMap eventMap = new EventMap(known);
      eventsBuilder.addAll(eventMap.uniqueEvents());
      knownHistogramsBuilder.put(known.getAuthor(), eventMap);
    }
    events = eventsBuilder.build();
    knownHistograms = knownHistogramsBuilder.build();
   
    ImmutableMap.Builder<Event, Double> eventStddevBuilder = ImmutableMap.builder();

    if (useCentroid) {
      ImmutableMap.Builder<String, EventMap> knownCentroidsBuilder = ImmutableMap.builder();
      for (Entry<String, Collection<EventMap>> entry : knownHistograms.asMap().entrySet()) {
        knownCentroidsBuilder.put(entry.getKey(), EventMap.centroid(entry.getValue()));
      }
      knownCentroids = knownCentroidsBuilder.build();
      for (Event event : events) {
        List<Double> sample = new ArrayList<Double>();
        for (EventMap histogram : knownCentroids.values()) {
          Double value = histogram.relativeFrequency(event);
          if (value == null) {
            sample.add(0.0);
          } else {
            sample.add(value);
          }
        }
        eventStddevBuilder.put(event, Utils.stddev(sample));
      }
    } else {
      for (Event event : events) {
        List<Double> sample = new ArrayList<Double>();
        for (Collection<EventMap> histograms : knownHistograms.asMap().values()) {
          for (EventMap histogram : histograms) {
            sample.add(histogram.relativeFrequency(event));
          }
        }
        eventStddevBuilder.put(event, Utils.stddev(sample));
      }
    }
    eventStddev = eventStddevBuilder.build();
  }

  /**
   * Burrows Delta using Argamon's Formula Note this is sum(|(Xi - Yi) /
   * sigma|) (Basically, a Manhattan Distance normalized by the standard
   * deviation of a word across the known author list)
   */
  public List<Pair<String, Double>> analyze(Document unknown) {
    List<Pair<String, Double>> results = new ArrayList<Pair<String, Double>>();
    EventMap unknownEventMap = new EventMap(unknown);
    if (useCentroid) {
      for (Entry<String, EventMap> entry : knownCentroids.entrySet()) {
        double delta = 0.0;
        for (Event event : events) {
          Double knownFrequency = entry.getValue().relativeFrequency(event);
          if (knownFrequency == null) {
            knownFrequency = 0.0;
          }
          delta += Math.abs((unknownEventMap.relativeFrequency(event) - knownFrequency) / eventStddev.get(event));
        }
        results.add(new Pair<String, Double>(entry.getKey(), delta,2));
      }
    } else {
      for (Entry<String, Collection<EventMap>> entry : knownHistograms.asMap().entrySet()) {
        for (EventMap histogram : entry.getValue()) {
          double delta = 0.0;
          for (Event event : events) {
            delta += Math.abs((unknownEventMap.relativeFrequency(event) - histogram.relativeFrequency(event)) / eventStddev.get(event));
          }
          results.add(new Pair<String, Double>(entry.getKey(), delta,2));
        }
      }
    }
    Collections.sort(results);
    return results;
  }
}
TOP

Related Classes of com.jgaap.classifiers.BurrowsDelta

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.