}
@Override
public MarkovRandomField buildMRF(String[] queryTerms) throws ConfigurationException {
// This is the MRF we're building.
MarkovRandomField mrf = new MarkovRandomField(queryTerms, env);
// Construct MRF feature by feature.
NodeList children = super.getModel().getChildNodes();
// Sum of query-dependent importance weights.
float totalImportance = 0.0f;
// Cliques that have query-dependent importance weights.
Set<CascadeClique> cliquesWithImportance = new HashSet<CascadeClique>();
int cascade_stage = 0;
int cascade_stage_proper = -1;
for (int i = 0; i < children.getLength(); i++) {
Node child = children.item(i);
if ("feature".equals(child.getNodeName())) {
// Get the feature id.
String featureID = XMLTools.getAttributeValue(child, "id", "");
if (featureID.equals("")) {
throw new RetrievalException("Each feature must specify an id attribute!");
}
// Get feature weight (default = 1.0).
float weight = XMLTools.getAttributeValue(child, "weight", 1.0f);
// Concept importance model (optional).
ConceptImportanceModel importanceModel = null;
// Get concept importance source (if applicable).
String importanceSource = XMLTools.getAttributeValue(child, "importance", "");
if (!importanceSource.equals("")) {
importanceModel = env.getImportanceModel(importanceSource);
if (importanceModel == null) {
throw new RetrievalException("ImportanceModel " + importanceSource + " not found!");
}
}
// Get CliqueSet type.
String cliqueSetType = XMLTools.getAttributeValue(child, "cliqueSet", "");
// Get Cascade stage (if any)
int cascadeStage = XMLTools.getAttributeValue(child, "cascadeStage", -1);
String pruner_and_params = XMLTools.getAttributeValue(child, "prune", "null");
String thePruner = (pruner_and_params.trim().split("\\s+"))[0];
String conceptBinType = XMLTools.getAttributeValue(child, "conceptBinType", "");
String conceptBinParams = XMLTools.getAttributeValue(child, "conceptBinParams", "");
String scoreFunction = XMLTools.getAttributeValue(child, "scoreFunction", null);
int width = XMLTools.getAttributeValue(child, "width", -1);
if (cascadeStage != -1) {
RetrievalEnvironment.setIsNew(true);
} else {
RetrievalEnvironment.setIsNew(false);
}
if (cascadeStage != -1) {
if (!conceptBinType.equals("") || !conceptBinParams.equals("")) {
if (conceptBinType.equals("") || conceptBinParams.equals("")) {
throw new RetrievalException("Most specify conceptBinType || conceptBinParams");
}
importanceModel = env.getImportanceModel("wsd");
if (importanceModel == null) {
throw new RetrievalException("ImportanceModel " + importanceSource + " not found!");
}
}
}
cascade_stage_proper = cascadeStage;
if (cascadeStage != -1 && conceptBinType.equals("") && conceptBinParams.equals("")) {
cascade_stage_proper = cascade_stage;
}
// Construct the clique set.
CascadeCliqueSet cliqueSet = (CascadeCliqueSet) (CascadeCliqueSet.create(cliqueSetType,
env, queryTerms, child, cascade_stage_proper, pruner_and_params));// , approxProximity);
// Get cliques from clique set.
List<Clique> cliques = cliqueSet.getCliques();
if (cascadeStage != -1 && conceptBinType.equals("") && conceptBinParams.equals("")) {
if (cliques.size() > 0) {
cascade_stage++;
}
} else if (cascadeStage != -1 && !conceptBinType.equals("") && !conceptBinParams.equals("")) {
if (cliques.size() > 0) {
int[] order = new int[cliques.size()];
double[] conceptWeights = new double[cliques.size()];
int cntr = 0;
String all_concepts = "";
for (Clique c : cliques) {
float importance = importanceModel.getCliqueWeight(c);
order[cntr] = cntr;
conceptWeights[cntr] = importance;
cntr++;
all_concepts += c.getConcept() + " ";
}
ivory.smrf.model.constrained.ConstraintModel.Quicksort(conceptWeights, order, 0,
order.length - 1);
int[] keptCliques = getCascadeCliques(conceptBinType, conceptBinParams, conceptWeights,
order, all_concepts, featureID, thePruner, width + "", scoreFunction);
List<Clique> cliques2 = Lists.newArrayList();
for (int k = 0; k < keptCliques.length; k++) {
int index = keptCliques[k];
cliques2.add(cliques.get(index));
}
cliques = Lists.newArrayList();
for (int k = 0; k < cliques2.size(); k++) {
cliques.add(cliques2.get(k));
}
if (keptCliques.length != 0) {
for (Clique c : cliques) {
((CascadeClique) c).setCascadeStage(cascade_stage);
}
cascade_stage++;
}
}
}
for (Clique c : cliques) {
double w = weight;
c.setParameterName(featureID); // Parameter id.
c.setParameterWeight(weight); // Weight.
c.setType(cliqueSet.getType()); // Clique type.
// Get clique weight.
if (!importanceSource.equals("")) {
float importance = importanceModel.getCliqueWeight(c);
if (weight == -1.0f) { // default value.
c.setParameterWeight(1.0f);
}
c.setImportance(importance);
totalImportance += importance;
cliquesWithImportance.add((CascadeClique) c);
w = importance;
}
if (w < pruningThresholdBigram && c.getType() != Clique.Type.Term) {
// System.out.println("Not add "+c);
} else {
// Add clique to MRF.
mrf.addClique(c);
// System.out.println("Add "+c);
}
}
}
}