Package

Source Code of BayesianSpam

/*
* Encog(tm) Examples v3.1 - Java Version
* http://www.heatonresearch.com/encog/
* http://code.google.com/p/encog-java/
* Copyright 2008-2012 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*  
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
//package org.encog.examples.ml.bayesian;

import java.util.ArrayList;
import java.util.List;

import org.encog.mathutil.probability.CalcProbability;
import org.encog.ml.bayesian.BayesianEvent;
import org.encog.ml.bayesian.BayesianNetwork;
import org.encog.ml.bayesian.EventType;
import org.encog.ml.bayesian.query.enumerate.EnumerationQuery;
import org.encog.ml.bayesian.query.sample.SamplingQuery;
import org.encog.util.Format;
import org.encog.util.text.BagOfWords;

public class BayesianSpam {

  public final static String[] SPAM_DATA = {
    "offer is secret",
    "click secret link",
    "secret sports link"
  };
 
  public final static String[] HAM_DATA = {
    "play sports today",
    "went play sports",
    "secret sports event",
    "sports is today",
    "sports costs money"
  };
 
  private int k;
 
  private BagOfWords spamBag;
  private BagOfWords hamBag;
  private BagOfWords totalBag;
   
  public void init(int theK) {
   
    this.k = theK;
   
    this.spamBag = new BagOfWords(this.k);
    this.hamBag = new BagOfWords(this.k);
    this.totalBag = new BagOfWords(this.k);

   
    for(String line: SPAM_DATA) {
      spamBag.process(line);
      totalBag.process(line);
    }
   
    for(String line: HAM_DATA) {
      hamBag.process(line);
      totalBag.process(line);
    }
   
    this.hamBag.setLaplaceClasses(totalBag.getUniqueWords());
    this.spamBag.setLaplaceClasses(totalBag.getUniqueWords());   
  }
 
  public List<String> separateSpaces(String str) {
    List<String> result = new ArrayList<String>();
    StringBuilder word = new StringBuilder();

    for (int i = 0; i < str.length(); i++) {
      char ch = str.charAt(i);
      if (ch != '\'' && !Character.isLetterOrDigit(ch)) {
        if (word.length() > 0) {
          result.add(word.toString());
          word.setLength(0);
        }
      } else {
        word.append(ch);
      }
    }

    if (word.length() > 0) {
      result.add(word.toString());
    }
   
    return result;
  }
 
  public double probabilitySpam(String m) {
    List<String> words = separateSpaces(m);
   
    BayesianNetwork network = new BayesianNetwork();
    BayesianEvent spamEvent = network.createEvent("spam");
   
    int index = 0;
    for( String word: words) {
      BayesianEvent event = network.createEvent(word+index);
      network.createDependency(spamEvent, event);
      index++;
    }
   
    network.finalizeStructure();
   
    //SamplingQuery query = new SamplingQuery(network);
    EnumerationQuery query = new EnumerationQuery(network);
   
    CalcProbability messageProbability = new CalcProbability(this.k);
    messageProbability.addClass(SPAM_DATA.length);
    messageProbability.addClass(HAM_DATA.length);
    double probSpam = messageProbability.calculate(0);

    spamEvent.getTable().addLine(probSpam, true);
    query.defineEventType(spamEvent, EventType.Outcome);
    query.setEventValue(spamEvent, true);
       
    index = 0;
    for( String word: words) {
      String word2 = word+index;
      BayesianEvent event = network.getEvent(word2);
      event.getTable().addLine(this.spamBag.probability(word), true, true); // spam
      event.getTable().addLine(this.hamBag.probability(word), true, false); // ham
      query.defineEventType(event, EventType.Evidence);
      query.setEventValue(event, true);
      index++;
    }

    //query.setSampleSize(100000000);
    query.execute();
    return query.getProbability();   
  }
 
  public void test(String message) {
    double d = probabilitySpam(message);
    System.out.println("Probability of \"" + message + "\" being spam is " + Format.formatPercent(d));
  }
 
  public static final void main(String[] args) {
    BayesianSpam program = new BayesianSpam();
   
    System.out.println("Using Laplace of 0");
    program.init(0);
    program.test("today"); // 0.0
    program.test("sports"); // 16.67
    program.test("today is secret"); // 0.0
    program.test("secret is secret"); // 96.15
   
    System.out.println("Using Laplace of 1");
    program.init(1);
    program.test("today");
    program.test("sports");
    program.test("today is secret"); // 48.58
    program.test("secret is secret");
  }
 
}
TOP

Related Classes of BayesianSpam

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.