package edu.stanford.nlp.classify;
import java.util.Arrays;
import java.util.Collection;
import edu.stanford.nlp.ling.Datum;
import edu.stanford.nlp.math.ADMath;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.math.DoubleAD;
import edu.stanford.nlp.optimization.AbstractStochasticCachingDiffUpdateFunction;
import edu.stanford.nlp.optimization.StochasticCalculateMethods;
import edu.stanford.nlp.util.Index;
/**
* Maximizes the conditional likelihood with a given prior.
*
* @author Dan Klein
* @author Galen Andrew
* @author Chris Cox (merged w/ SumConditionalObjectiveFunction, 2/16/05)
* @author Sarah Spikes (Templatization, allowing an {@code Iterable<Datum<L, F>>} to be passed in instead of a {@code GeneralDataset<L, F>})
* @author Angel Chang (support in place SGD - extend AbstractStochasticCachingDiffUpdateFunction)
* @author Christopher Manning (cleaned out the cruft and sped it up in 2014)
*/
public class LogConditionalObjectiveFunction<L, F> extends AbstractStochasticCachingDiffUpdateFunction {
protected final LogPrior prior;
protected final int numFeatures;
protected final int numClasses;
/** Normally, this contains the data. The first index is the datum number,
* and then there is an array of feature indices for each datum.
*/
protected final int[][] data;
/** Alternatively, the data may be available from an Iterable in not yet
* indexed form. (In 2014, it's not clear any code actually uses this option.)
* And then you need an index for both.
*/
protected final Iterable<Datum<L, F>> dataIterable;
protected final Index<L> labelIndex;
protected final Index<F> featureIndex;
/** Same size as data if the features have values; null if the features are binary. */
protected final double[][] values;
/** The label of each data index. */
protected final int[] labels;
protected final float[] dataWeights;
protected final boolean useSummedConditionalLikelihood; //whether to use sumConditional or logConditional
/** This is used to cache the numerator in batch methods. */
protected double[] derivativeNumerator = null;
/** The only reason this is around is because the Prior Functions don't handle stochastic calculations yet. */
protected double [] priorDerivative = null;
@Override
public int domainDimension() {
return numFeatures * numClasses;
}
@Override
public int dataDimension(){
return data.length;
}
private int classOf(int index) {
return index % numClasses;
}
private int featureOf(int index) {
return index / numClasses;
}
/** Converts a Phi feature number and class index into an f(x,y) feature index. */
// [cdm2014: Tried inline this; no big gains.]
protected int indexOf(int f, int c) {
return f * numClasses + c;
}
public double[][] to2D(double[] x) {
double[][] x2 = new double[numFeatures][numClasses];
for (int i = 0; i < numFeatures; i++) {
for (int j = 0; j < numClasses; j++) {
x2[i][j] = x[indexOf(i, j)];
}
}
return x2;
}
/**
* Calculate the conditional likelihood.
* If {@code useSummedConditionalLikelihood} is {@code false} (the default),
* this calculates standard(product) CL, otherwise this calculates summed CL.
* What's the difference? See Klein and Manning's 2002 EMNLP paper.
*/
@Override
protected void calculate(double[] x) {
//If the batchSize is 0 then use the regular calculate methods
if (useSummedConditionalLikelihood) {
calculateSCL(x);
} else {
calculateCL(x);
}
}
/**
* This function is used to come up with an estimate of the value / gradient based on only a small
* portion of the data (referred to as the batchSize for lack of a better term. In this case batch does
* not mean All!! It should be thought of in the sense of "a small batch of the data".
*/
@Override
public void calculateStochastic(double[] x, double[] v, int[] batch) {
if(method.calculatesHessianVectorProduct() && v != null){
// This is used for Stochastic Methods that involve second order information (SMD for example)
if(method.equals(StochasticCalculateMethods.AlgorithmicDifferentiation)){
calculateStochasticAlgorithmicDifferentiation(x,v,batch);
}else if(method.equals(StochasticCalculateMethods.IncorporatedFiniteDifference)){
calculateStochasticFiniteDifference(x,v,finiteDifferenceStepSize,batch);
}
} else{
//This is used for Stochastic Methods that don't need anything but the gradient (SGD)
calculateStochasticGradientLocal(x,batch);
}
}
/**
* Calculate the summed conditional likelihood of this data by summing
* conditional estimates.
*
*/
private void calculateSCL(double[] x) {
//System.out.println("Checking at: "+x[0]+" "+x[1]+" "+x[2]);
value = 0.0;
Arrays.fill(derivative, 0.0);
double[] sums = new double[numClasses];
double[] probs = new double[numClasses];
double[] counts = new double[numClasses];
Arrays.fill(counts, 0.0);
for (int d = 0; d < data.length; d++) {
int[] features = data[d];
// activation
Arrays.fill(sums, 0.0);
for (int c = 0; c < numClasses; c++) {
for (int feature : features) {
int i = indexOf(feature, c);
sums[c] += x[i];
}
}
// expectation (slower routine replaced by fast way)
// double total = Double.NEGATIVE_INFINITY;
// for (int c=0; c<numClasses; c++) {
// total = SloppyMath.logAdd(total, sums[c]);
// }
double total = ArrayMath.logSum(sums);
int ld = labels[d];
for (int c = 0; c < numClasses; c++) {
probs[c] = Math.exp(sums[c] - total);
for (int feature : features) {
int i = indexOf(feature, c);
derivative[i] += probs[ld] * probs[c];
}
}
// observed
for (int feature : features) {
int i = indexOf(feature, labels[d]);
derivative[i] -= probs[ld];
}
value -= probs[ld];
}
// priors
if (true) {
for (int i = 0; i < x.length; i++) {
double k = 1.0;
double w = x[i];
value += k * w * w / 2.0;
derivative[i] += k * w;
}
}
}
/**
* Calculate the conditional likelihood of this data by multiplying
* conditional estimates. Full dataset batch estimation.
*/
private void calculateCL(double[] x) {
if (values != null) {
rvfcalculate(x);
} else if (dataIterable != null) {
calculateCLiterable(x);
} else {
calculateCLbatch(x);
}
}
private void calculateCLbatch(double[] x) {
//System.out.println("Checking at: "+x[0]+" "+x[1]+" "+x[2]);
value = 0.0;
// [cdm Mar 2014] This next bit seems unnecessary: derivative is allocated by ensure() in AbstractCachingDiffFunction
// before calculate() is called; and after the next block, derivativeNumerator is copied into it.
// if (derivative == null) {
// derivative = new double[x.length];
// } else {
// Arrays.fill(derivative, 0.0);
// }
if (derivativeNumerator == null) {
derivativeNumerator = new double[x.length];
for (int d = 0; d < data.length; d++) {
int[] features = data[d];
for (int feature : features) {
int i = indexOf(feature, labels[d]);
if (dataWeights == null) {
derivativeNumerator[i] -= 1;
} else {
derivativeNumerator[i] -= dataWeights[d];
}
}
}
}
copy(derivative, derivativeNumerator);
// Arrays.fill(derivative, 0.0);
double[] sums = new double[numClasses];
double[] probs = new double[numClasses];
// double[] counts = new double[numClasses];
// Arrays.fill(counts, 0.0);
for (int d = 0; d < data.length; d++) {
// activation
Arrays.fill(sums, 0.0);
int[] featuresArr = data[d];
for (int feature : featuresArr) {
for (int c = 0; c < numClasses; c++) {
int i = indexOf(feature, c);
sums[c] += x[i];
}
}
// expectation (slower routine replaced by fast way)
// double total = Double.NEGATIVE_INFINITY;
// for (int c=0; c<numClasses; c++) {
// total = SloppyMath.logAdd(total, sums[c]);
// }
double total = ArrayMath.logSum(sums);
for (int c = 0; c < numClasses; c++) {
probs[c] = Math.exp(sums[c] - total);
if (dataWeights != null) {
probs[c] *= dataWeights[d];
}
}
for (int feature : featuresArr) {
for (int c = 0; c < numClasses; c++) {
int i = indexOf(feature, c);
derivative[i] += probs[c];
}
}
int labelindex = labels[d];
double dV = sums[labelindex] - total;
if (dataWeights != null) {
dV *= dataWeights[d];
}
value -= dV;
}
value += prior.compute(x, derivative);
}
private void calculateCLiterable(double[] x) {
//System.out.println("Checking at: "+x[0]+" "+x[1]+" "+x[2]);
value = 0.0;
// [cdm Mar 2014] This next bit seems unnecessary: derivative is allocated by ensure() in AbstractCachingDiffFunction
// before calculate() is called; and after the next block, derivativeNumerator is copied into it.
// if (derivative == null) {
// derivative = new double[x.length];
// } else {
// Arrays.fill(derivative, 0.0);
// }
if (derivativeNumerator == null) {
derivativeNumerator = new double[x.length];
//use dataIterable if data is null & vice versa
//TODO: Make sure this work as expected!!
//int index = 0;
for (Datum<L, F> datum : dataIterable) {
Collection<F> features = datum.asFeatures();
for (F feature : features) {
int i = indexOf(featureIndex.indexOf(feature), labelIndex.indexOf(datum.label()));
if (dataWeights == null) {
derivativeNumerator[i] -= 1;
} /*else {
derivativeNumerator[i] -= dataWeights[index];
}*/
}
}
}
copy(derivative, derivativeNumerator);
// Arrays.fill(derivative, 0.0);
double[] sums = new double[numClasses];
double[] probs = new double[numClasses];
// double[] counts = new double[numClasses];
// Arrays.fill(counts, 0.0);
for (Datum<L, F> datum : dataIterable) {
// activation
Arrays.fill(sums, 0.0);
Collection<F> features = datum.asFeatures();
for (F feature : features) {
for (int c = 0; c < numClasses; c++) {
int i = indexOf(featureIndex.indexOf(feature), c);
sums[c] += x[i];
}
}
// expectation (slower routine replaced by fast way)
// double total = Double.NEGATIVE_INFINITY;
// for (int c=0; c<numClasses; c++) {
// total = SloppyMath.logAdd(total, sums[c]);
// }
double total = ArrayMath.logSum(sums);
for (int c = 0; c < numClasses; c++) {
probs[c] = Math.exp(sums[c] - total);
}
for (F feature : features) {
for (int c = 0; c < numClasses; c++) {
int i = indexOf(featureIndex.indexOf(feature), c);
derivative[i] += probs[c];
}
}
int label = this.labelIndex.indexOf(datum.label());
double dV = sums[label] - total;
value -= dV;
}
value += prior.compute(x, derivative);
}
public void calculateStochasticFiniteDifference(double[] x,double[] v, double h, int[] batch){
// THOUGHTS:
// does applying the renormalization (g(x+hv)-g(x)) / h at each step along the way
// introduce too much error to makes this method numerically accurate?
// akleeman Feb 23 2007
// Answer to my own question: Feb 25th
// Doesn't look like it!! With h = 1e-4 it seems like the Finite Difference makes almost
// exactly the same step as the exact hessian vector product calculated through AD.
// That said it's probably (in the case of the Log Conditional Objective function) logical
// to only use finite difference. Unless of course the function is somehow nearly singular,
// in which case finite difference could turn what is a convex problem into a singular proble... NOT GOOD.
if (values != null) {
rvfcalculate(x);
return;
}
value = 0.0;
if (priorDerivative == null) {
priorDerivative = new double[x.length];
}
double priorFactor = batch.length/(data.length*prior.getSigma()*prior.getSigma());
derivative = ArrayMath.multiply(x,priorFactor);
HdotV = ArrayMath.multiply(v,priorFactor);
//Arrays.fill(derivative, 0.0);
double[] sums = new double[numClasses];
double[] sumsV = new double[numClasses];
double[] probs = new double[numClasses];
double[] probsV = new double[numClasses];
for (int m : batch) {
//Sets the index based on the current batch
int[] features = data[m];
// activation
Arrays.fill(sums, 0.0);
Arrays.fill(sumsV, 0.0);
for (int c = 0; c < numClasses; c++) {
for (int feature : features) {
int i = indexOf(feature, c);
sums[c] += x[i];
sumsV[c] += x[i] + h * v[i];
}
}
double total = ArrayMath.logSum(sums);
double totalV = ArrayMath.logSum(sumsV);
for (int c = 0; c < numClasses; c++) {
probs[c] = Math.exp(sums[c] - total);
probsV[c] = Math.exp(sumsV[c] - totalV);
if (dataWeights != null) {
probs[c] *= dataWeights[m];
probsV[c] *= dataWeights[m];
}
for (int feature : features) {
int i = indexOf(feature, c);
//derivative[i] += (-1);
derivative[i] += probs[c];
HdotV[i] += (probsV[c] - probs[c]) / h;
if (c == labels[m]) {
derivative[i] -= 1;
}
}
}
double dV = sums[labels[m]] - total;
if (dataWeights != null) {
dV *= dataWeights[m];
}
value -= dV;
}
//Why was this being copied? -akleeman
//double[] tmpDeriv = new double[derivative.length];
//System.arraycopy(derivative,0,tmpDeriv,0,derivative.length);
value += ((double) batch.length)/((double) data.length)*prior.compute(x,priorDerivative);
}
public void calculateStochasticGradientLocal(double[] x, int[] batch) {
if (values != null) {
rvfcalculate(x);
return;
}
value = 0.0;
int batchSize = batch.length;
if (priorDerivative == null) {
priorDerivative = new double[x.length];
}
double priorFactor = batchSize/(data.length*prior.getSigma()*prior.getSigma());
derivative = ArrayMath.multiply(x,priorFactor);
//Arrays.fill(derivative, 0.0);
double[] sums = new double[numClasses];
//double[] sumsV = new double[numClasses];
double[] probs = new double[numClasses];
//double[] probsV = new double[numClasses];
for (int m : batch) {
//Sets the index based on the current batch
int[] features = data[m];
// activation
Arrays.fill(sums, 0.0);
//Arrays.fill(sumsV,0.0);
for (int c = 0; c < numClasses; c++) {
for (int feature : features) {
int i = indexOf(feature, c);
sums[c] += x[i];
}
}
double total = ArrayMath.logSum(sums);
//double totalV = ArrayMath.logSum(sumsV);
for (int c = 0; c < numClasses; c++) {
probs[c] = Math.exp(sums[c] - total);
//probsV[c] = Math.exp(sumsV[c]- totalV);
if (dataWeights != null) {
probs[c] *= dataWeights[m];
//probsV[c] *= dataWeights[m];
}
for (int feature : features) {
int i = indexOf(feature, c);
//derivative[i] += (-1);
derivative[i] += probs[c];
if (c == labels[m]) {
derivative[i] -= 1;
}
}
}
double dV = sums[labels[m]] - total;
if (dataWeights != null) {
dV *= dataWeights[m];
}
value -= dV;
}
value += ((double) batchSize)/((double) data.length)*prior.compute(x,priorDerivative);
}
@Override
public double valueAt(double[] x, double xscale, int[] batch) {
value = 0.0;
double[] sums = new double[numClasses];
for (int m : batch) {
//Sets the index based on the current batch
int[] features = data[m];
Arrays.fill(sums, 0.0);
for (int c = 0; c < numClasses; c++) {
for (int f = 0; f < features.length; f++) {
int i = indexOf(features[f], c);
if (values != null) {
sums[c] += x[i] * xscale * values[m][f];
} else {
sums[c] += x[i] * xscale;
}
}
}
double total = ArrayMath.logSum(sums);
double dV = sums[labels[m]] - total;
if (dataWeights != null) {
dV *= dataWeights[m];
}
value -= dV;
}
return value;
}
@Override
public double calculateStochasticUpdate(double[] x, double xscale, int[] batch, double gain) {
value = 0.0;
double[] sums = new double[numClasses];
double[] probs = new double[numClasses];
for (int m : batch) {
// Sets the index based on the current batch
int[] features = data[m];
// activation
Arrays.fill(sums, 0.0);
for (int c = 0; c < numClasses; c++) {
for (int f = 0; f < features.length; f++) {
int i = indexOf(features[f], c);
if (values != null) {
sums[c] += x[i] * xscale * values[m][f];
} else {
sums[c] += x[i] * xscale;
}
}
}
for (int f = 0; f < features.length; f++) {
int i = indexOf(features[f], labels[m]);
double v = (values != null) ? values[m][f] : 1;
double delta = (dataWeights != null) ? dataWeights[m] * v : v;
x[i] += delta * gain;
}
double total = ArrayMath.logSum(sums);
for (int c = 0; c < numClasses; c++) {
probs[c] = Math.exp(sums[c] - total);
if (dataWeights != null) {
probs[c] *= dataWeights[m];
}
for (int f = 0; f < features.length; f++) {
int i = indexOf(features[f], c);
double v = (values != null) ? values[m][f] : 1;
double delta = probs[c] * v;
x[i] -= delta * gain;
}
}
double dV = sums[labels[m]] - total;
if (dataWeights != null) {
dV *= dataWeights[m];
}
value -= dV;
}
return value;
}
@Override
public void calculateStochasticGradient(double[] x, int[] batch) {
if (derivative == null) {
derivative = new double[domainDimension()];
}
Arrays.fill(derivative, 0.0);
double[] sums = new double[numClasses];
double[] probs = new double[numClasses];
double[] counts = new double[numClasses];
Arrays.fill(counts, 0.0);
for (int d : batch) {
//Sets the index based on the current batch
int[] features = data[d];
// activation
Arrays.fill(sums, 0.0);
for (int c = 0; c < numClasses; c++) {
for (int feature : features) {
int i = indexOf(feature, c);
sums[c] += x[i];
}
}
// expectation (slower routine replaced by fast way)
// double total = Double.NEGATIVE_INFINITY;
// for (int c=0; c<numClasses; c++) {
// total = SloppyMath.logAdd(total, sums[c]);
// }
double total = ArrayMath.logSum(sums);
int ld = labels[d];
for (int c = 0; c < numClasses; c++) {
probs[c] = Math.exp(sums[c] - total);
for (int feature : features) {
int i = indexOf(feature, c);
derivative[i] += probs[ld] * probs[c];
}
}
// observed
for (int feature : features) {
int i = indexOf(feature, labels[d]);
derivative[i] -= probs[ld];
}
}
}
protected void calculateStochasticAlgorithmicDifferentiation(double[] x, double[] v, int[] batch) {
System.err.print("*");
//Initialize
value = 0.0;
//initialize any variables
DoubleAD[] derivativeAD = new DoubleAD[x.length];
for (int i = 0; i < x.length;i++) {
derivativeAD[i] = new DoubleAD(0.0,0.0);
}
DoubleAD[] xAD = new DoubleAD[x.length];
for (int i = 0; i < x.length;i++){
xAD[i] = new DoubleAD(x[i],v[i]);
}
// Initialize the sums
DoubleAD[] sums = new DoubleAD[numClasses];
for (int c = 0; c<numClasses;c++){
sums[c] = new DoubleAD(0,0);
}
DoubleAD[] probs = new DoubleAD[numClasses];
for (int c = 0; c<numClasses;c++) {
probs[c] = new DoubleAD(0,0);
}
//long curTime = System.currentTimeMillis();
// Copy the Derivative numerator, and set up the vector V to be used for Hess*V
for (int i = 0; i < x.length;i++){
xAD[i].set(x[i],v[i]);
derivativeAD[i].set(0.0,0.0);
}
//System.err.print(System.currentTimeMillis() - curTime + " - ");
//curTime = System.currentTimeMillis();
for (int d = 0; d <batch.length ; d++) {
//Sets the index based on the current batch
int m = (curElement + d) % data.length;
int[] features = data[m];
for (int c = 0; c<numClasses;c++){
sums[c].set(0.0,0.0);
}
for (int c = 0; c < numClasses; c++) {
for (int feature : features) {
int i = indexOf(feature, c);
sums[c] = ADMath.plus(sums[c], xAD[i]);
}
}
DoubleAD total = ADMath.logSum(sums);
for (int c = 0; c < numClasses; c++) {
probs[c] = ADMath.exp( ADMath.minus(sums[c], total) );
if (dataWeights != null) {
probs[c] = ADMath.multConst(probs[c], dataWeights[d]);
}
for (int feature : features) {
int i = indexOf(feature, c);
if (c == labels[m]) {
derivativeAD[i].plusEqualsConst(-1.0);
}
derivativeAD[i].plusEquals(probs[c]);
}
}
double dV = sums[labels[m]].getval() - total.getval();
if (dataWeights != null) {
dV *= dataWeights[d];
}
value -= dV;
}
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// DANGEROUS!!!!!!! Divide by Zero possible!!!!!!!!!!
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Need to modify the prior class to handle AD -akleeman
//System.err.print(System.currentTimeMillis() - curTime + " - ");
//curTime = System.currentTimeMillis();
double[] tmp = new double[x.length];
for(int i = 0; i < x.length; i++){
tmp[i] = derivativeAD[i].getval();
derivativeAD[i].plusEquals(ADMath.multConst(xAD[i], batch.length/(data.length * prior.getSigma()*prior.getSigma())));
derivative[i] = derivativeAD[i].getval();
HdotV[i] = derivativeAD[i].getdot();
}
value += ((double) batch.length)/((double) data.length)*prior.compute(x, tmp);
//System.err.print(System.currentTimeMillis() - curTime + " - ");
//System.err.println("");
}
/**
* Calculate conditional likelihood for datasets with real-valued features.
* Currently this can calculate CL only (no support for SCL).
* TODO: sum-conditional obj. fun. with RVFs.
*/
protected void rvfcalculate(double[] x) {
value = 0.0;
if (derivativeNumerator == null) {
derivativeNumerator = new double[x.length];
for (int d = 0; d < data.length; d++) {
final int[] features = data[d];
final double[] vals = values[d];
for (int f = 0; f < features.length; f++) {
int i = indexOf(features[f], labels[d]);
if (dataWeights == null) {
derivativeNumerator[i] -= vals[f];
} else {
derivativeNumerator[i] -= dataWeights[d] * vals[f];
}
}
}
}
copy(derivative, derivativeNumerator);
// Arrays.fill(derivative, 0.0);
double[] sums = new double[numClasses];
double[] probs = new double[numClasses];
// double[] counts = new double[numClasses];
// Arrays.fill(counts, 0.0);
for (int d = 0; d < data.length; d++) {
final int[] features = data[d];
final double[] vals = values[d];
// activation
Arrays.fill(sums, 0.0);
for (int f = 0; f < features.length; f++) {
final int feature = features[f];
final double val = vals[f];
for (int c = 0; c < numClasses; c++) {
int i = indexOf(feature, c);
sums[c] += x[i] * val;
}
}
// expectation (slower routine replaced by fast way)
// double total = Double.NEGATIVE_INFINITY;
// for (int c=0; c<numClasses; c++) {
// total = SloppyMath.logAdd(total, sums[c]);
// }
// it is faster to split these two loops. More striding
double total = ArrayMath.logSum(sums);
for (int c = 0; c < numClasses; c++) {
probs[c] = Math.exp(sums[c] - total);
if (dataWeights != null) {
probs[c] *= dataWeights[d];
}
}
for (int f = 0; f < features.length; f++) {
final int feature = features[f];
final double val = vals[f];
for (int c = 0; c < numClasses; c++) {
int i = indexOf(feature, c);
derivative[i] += probs[c] * val;
}
}
double dV = sums[labels[d]] - total;
if (dataWeights != null) {
dV *= dataWeights[d];
}
value -= dV;
}
value += prior.compute(x, derivative);
}
public LogConditionalObjectiveFunction(GeneralDataset<L, F> dataset) {
this(dataset, new LogPrior(LogPrior.LogPriorType.QUADRATIC));
}
public LogConditionalObjectiveFunction(GeneralDataset<L, F> dataset, LogPrior prior) {
this(dataset, prior, false);
}
public LogConditionalObjectiveFunction(GeneralDataset<L, F> dataset, float[] dataWeights, LogPrior prior) {
this(dataset, prior, false, dataWeights);
}
public LogConditionalObjectiveFunction(GeneralDataset<L, F> dataset, LogPrior prior, boolean useSumCondObjFun) {
this(dataset, prior, useSumCondObjFun, null);
}
/** Version passing in a GeneralDataset, which may be binary or real-valued features. */
public LogConditionalObjectiveFunction(GeneralDataset<L, F> dataset, LogPrior prior, boolean useSumCondObjFun,
float[] dataWeights) {
this.prior = prior;
this.useSummedConditionalLikelihood = useSumCondObjFun;
this.numFeatures = dataset.numFeatures();
this.numClasses = dataset.numClasses();
this.data = dataset.getDataArray();
this.labels = dataset.getLabelsArray();
this.values = dataset.getValuesArray();
if (dataWeights != null) {
this.dataWeights = dataWeights;
} else if (dataset instanceof WeightedDataset<?,?>) {
this.dataWeights = ((WeightedDataset<L, F>)dataset).getWeights();
} else {
this.dataWeights = null;
}
this.labelIndex = null;
this.featureIndex = null;
this.dataIterable = null;
}
//TODO: test this [none of our code actually even uses it].
/** Version where an Iterable is passed in for the data. Doesn't support dataWeights. */
public LogConditionalObjectiveFunction(Iterable<Datum<L, F>> dataIterable, LogPrior logPrior, Index<F> featureIndex, Index<L> labelIndex) {
this.prior = logPrior;
this.useSummedConditionalLikelihood = false;
this.numFeatures = featureIndex.size();
this.numClasses = labelIndex.size();
this.data = null;
this.dataIterable = dataIterable;
this.labelIndex = labelIndex;
this.featureIndex = featureIndex;
this.labels = null;//dataset.getLabelsArray();
this.values = null;//dataset.getValuesArray();
this.dataWeights = null;
}
public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels, boolean useSumCondObjFun) {
this(numFeatures, numClasses, data, labels, null, new LogPrior(LogPrior.LogPriorType.QUADRATIC), useSumCondObjFun);
}
public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels) {
this(numFeatures, numClasses, data, labels, new LogPrior(LogPrior.LogPriorType.QUADRATIC));
}
public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels, LogPrior prior) {
this(numFeatures, numClasses, data, labels, null, prior);
}
public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels, float[] dataWeights) {
this(numFeatures, numClasses, data, labels, dataWeights, new LogPrior(LogPrior.LogPriorType.QUADRATIC));
}
public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels, float[] dataWeights, LogPrior prior) {
this(numFeatures, numClasses, data, labels, dataWeights, prior, false);
}
/* For binary features. Supports dataWeights. */
public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels,
float[] dataWeights, LogPrior prior, boolean useSummedConditionalLikelihood) {
this.numFeatures = numFeatures;
this.numClasses = numClasses;
this.data = data;
this.values = null;
this.labels = labels;
this.prior = prior;
this.dataWeights = dataWeights;
this.labelIndex = null;
this.featureIndex = null;
this.dataIterable = null;
this.useSummedConditionalLikelihood = useSummedConditionalLikelihood;
}
public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels, int intPrior, double sigma, double epsilon) {
this(numFeatures, numClasses, data, null, labels, intPrior, sigma, epsilon);
}
/** For real-valued features. Passing in processed data set. */
public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, double[][] values, int[] labels, int intPrior, double sigma, double epsilon) {
this.numFeatures = numFeatures;
this.numClasses = numClasses;
this.data = data;
this.values = values;
this.labels = labels;
this.prior = new LogPrior(intPrior, sigma, epsilon);
this.labelIndex = null;
this.featureIndex = null;
this.dataIterable = null;
this.useSummedConditionalLikelihood = false;
this.dataWeights = null;
}
}