package edu.stanford.nlp.classify;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.optimization.AbstractCachingDiffFunction;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.HashIndex;
import edu.stanford.nlp.util.IntTriple;
import edu.stanford.nlp.util.IntTuple;
import edu.stanford.nlp.util.IntUni;
import java.util.Arrays;
/**
* Maximizes the conditional likelihood with a given prior.
* Constrains parameters for the same history to sum to 1
* Adapted from {@link LogConditionalObjectiveFunction}
*
* @author Kristina Toutanova
*/
public class LogConditionalEqConstraintFunction extends AbstractCachingDiffFunction {
public static final int NO_PRIOR = 0;
public static final int QUADRATIC_PRIOR = 1;
/* Use a Huber robust regression penalty (L1 except very near 0) not L2 */
public static final int HUBER_PRIOR = 2;
public static final int QUARTIC_PRIOR = 3;
protected int numFeatures = 0;
protected int numClasses = 0;
protected int[][] data = null;
protected int[] labels = null;
protected int[] numValues = null;
private int prior;
private double sigma = 1.0;
private double epsilon;
private Index<IntTuple> featureIndex;
@Override
public int domainDimension() {
return featureIndex.size();
}
int classOf(int index) {
IntTuple i = featureIndex.get(index);
return i.get(0);
}
/**
* the feature number of the original feature or -1 if this is for a prior
*
*/
int featureOf(int index) {
IntTuple i = featureIndex.get(index);
if (i.length() == 1) {
return -1;
}
return i.get(1);
}
/**
* @return the index of the prior for class c
*/
protected int indexOf(int c) {
return featureIndex.indexOf(new IntUni(c));
}
protected int indexOf(int f, int c, int val) {
return featureIndex.indexOf(new IntTriple(c, f, val));
}
/**
* create an index for each parameter - the prior probs and the features with all of their values
*
*/
protected Index<IntTuple> createIndex() {
Index<IntTuple> index = new HashIndex<IntTuple>();
for (int c = 0; c < numClasses; c++) {
index.add(new IntUni(c));
for (int f = 0; f < numFeatures; f++) {
for (int val = 0; val < numValues[f]; val++) {
index.add(new IntTriple(c, f, val));
}
}
}
return index;
}
public double[][][] to3D(double[] x1) {
double[] x = normalize(x1);
double[][][] x2 = new double[numClasses][numFeatures][];
for (int c = 0; c < numClasses; c++) {
for (int f = 0; f < numFeatures; f++) {
x2[c][f] = new double[numValues[f]];
for (int val = 0; val < numValues[f]; val++) {
x2[c][f][val] = x[indexOf(f, c, val)];
}
}
}
return x2;
}
public double[] priors(double[] x1) {
double[] x = normalize(x1);
double[] x2 = new double[numClasses];
for (int c = 0; c < numClasses; c++) {
x2[c] = x[indexOf(c)];
}
return x2;
}
/**
* normalize the parameters s.t qi=log(e^li/Z);
*
*/
private double[] normalize(double[] x) {
double[] x1 = new double[x.length];
copy(x1, x);
//the priors
double[] sums = new double[numClasses];
for (int c = 0; c < numClasses; c++) {
int priorc = indexOf(c);
sums[c] += x[priorc];
}
double total = ArrayMath.logSum(sums);
for (int c = 0; c < numClasses; c++) {
int priorc = indexOf(c);
x1[priorc] -= total;
}
//the features
for (int c = 0; c < numClasses; c++) {
for (int f = 0; f < numFeatures; f++) {
double[] vals = new double[numValues[f]];
for (int val = 0; val < numValues[f]; val++) {
int index = indexOf(f, c, val);
vals[val] = x[index];
}
total = ArrayMath.logSum(vals);
for (int val = 0; val < numValues[f]; val++) {
int index = indexOf(f, c, val);
x1[index] -= total;
}
}
}
return x1;
}
@Override
protected void calculate(double[] x1) {
double[] x = normalize(x1);
double[] xExp = new double[x.length];
for (int i = 0; i < x.length; i++) {
xExp[i] = Math.exp(x[i]);
}
value = 0.0;
Arrays.fill(derivative, 0.0);
double[] sums = new double[numClasses];
double[] probs = new double[numClasses];
double[] counts = new double[numClasses];
Arrays.fill(counts, 0.0);
for (int d = 0; d < data.length; d++) {
int[] features = data[d];
// activation
Arrays.fill(sums, 0.0);
for (int c = 0; c < numClasses; c++) {
int priorc = indexOf(c);
sums[c] += x[priorc];
for (int f = 0; f < features.length; f++) {
int i = indexOf(f, c, features[f]);
sums[c] += x[i];
}
}
// expectation (slower routine replaced by fast way)
// double total = Double.NEGATIVE_INFINITY;
// for (int c=0; c<numClasses; c++) {
// total = SloppyMath.logAdd(total, sums[c]);
// }
double total = ArrayMath.logSum(sums);
for (int c = 0; c < numClasses; c++) {
probs[c] = Math.exp(sums[c] - total);
int priorc = indexOf(c);
derivative[priorc] += probs[c];
for (int f = 0; f < features.length; f++) {
for (int val = 0; val < numValues[f]; val++) {
int i = indexOf(f, c, val);
double thetha = xExp[i];
derivative[i] -= probs[c] * thetha;
if (labels[d] == c) {
derivative[i] += thetha;
}
}
}
}
// observed
for (int f = 0; f < features.length; f++) {
int i = indexOf(f, labels[d], features[f]);
derivative[i] -= 1.0;
for (int c = 0; c < numClasses; c++) {
int i1 = indexOf(f, c, features[f]);
derivative[i1] += probs[c];
}
}
value -= sums[labels[d]] - total;
int priorc = indexOf(labels[d]);
derivative[priorc] -= 1;
}
// priors
if (prior == QUADRATIC_PRIOR) {
double sigmaSq = sigma * sigma;
for (int i = 0; i < x1.length; i++) {
double k = 1.0;
double w = x1[i];
value += k * w * w / 2.0 / sigmaSq;
derivative[i] += k * w / sigmaSq;
}
} else if (prior == HUBER_PRIOR) {
double sigmaSq = sigma * sigma;
for (int i = 0; i < x1.length; i++) {
double w = x1[i];
double wabs = Math.abs(w);
if (wabs < epsilon) {
value += w * w / 2.0 / epsilon / sigmaSq;
derivative[i] += w / epsilon / sigmaSq;
} else {
value += (wabs - epsilon / 2) / sigmaSq;
derivative[i] += ((w < 0.0) ? -1.0 : 1.0) / sigmaSq;
}
}
} else if (prior == QUARTIC_PRIOR) {
double sigmaQu = sigma * sigma * sigma * sigma;
for (int i = 0; i < x.length; i++) {
double k = 1.0;
double w = x1[i];
value += k * w * w * w * w / 2.0 / sigmaQu;
derivative[i] += k * w / sigmaQu;
}
}
// else no prior
/*
System.out.println("N: "+data.length);
System.out.println("Value: "+value);
double ds = 0.0;
for (int i=0; i<x.length; i++) {
ds += derivative[i];
System.out.println(i+" is: "+derivative[i]);
}
*/
//System.out.println("Deriv sum is: "+ds);
}
public LogConditionalEqConstraintFunction(int numFeatures, int numClasses, int[][] data, int[] labels) {
this(numFeatures, numClasses, data, labels, 1.0);
}
public LogConditionalEqConstraintFunction(int numFeatures, int numClasses, int[][] data, int[] labels, double sigma) {
this(numFeatures, numClasses, data, labels, QUADRATIC_PRIOR, sigma, 0.0);
}
public LogConditionalEqConstraintFunction(int numFeatures, int numClasses, int[][] data, int[] labels, int prior, double sigma, double epsilon) {
this.numFeatures = numFeatures;
this.numClasses = numClasses;
this.data = data;
this.labels = labels;
if (prior >= 0 && prior <= QUARTIC_PRIOR) {
this.prior = prior;
} else {
throw new IllegalArgumentException("Invalid prior: " + prior);
}
this.epsilon = epsilon;
this.sigma = sigma;
numValues = NaiveBayesClassifierFactory.numberValues(data, numFeatures);
for (int i = 0; i < numValues.length; i++) {
System.out.println("numValues " + i + " " + numValues[i]);
}
featureIndex = createIndex();
}
/**
* use a random starting point uniform -1 1
*
*/
@Override
public double[] initial() {
double[] initial = new double[domainDimension()];
for (int i = 0; i < initial.length; i++) {
double r = Math.random();
r -= .5;
initial[i] = r;
}
return initial;
}
}