// If the EntitySpecPojo or DocumentPojo is null return null
if ((esp == null) || (f == null)) return null;
try
{
EntityPojo e = new EntityPojo();
// Parse creation criteria script to determine if the entity should be added
if (esp.getCreationCriteriaScript() != null && JavaScriptUtils.containsScript(esp.getCreationCriteriaScript()))
{
boolean addEntity = executeEntityAssociationValidation(esp.getCreationCriteriaScript(), field, index);
if (!addEntity) {
return null;
}
}
// Entity.disambiguous_name
String disambiguatedName = null;
if (JavaScriptUtils.containsScript(esp.getDisambiguated_name()))
{
disambiguatedName = (String)getValueFromScript(esp.getDisambiguated_name(), field, index);
}
else
{
if ((_iterator != null) && (esp.getDisambiguated_name().startsWith("$metadata.") || esp.getDisambiguated_name().startsWith("${metadata."))) {
if (_context.isStandalone()) { // (minor message, while debugging only)
_context.getHarvestStatus().logMessage("Warning: in disambiguated_name, using global $metadata when iterating", true);
}
}
// Field - passed in via simple string array from getEntities
if (field != null)
{
disambiguatedName = getFormattedTextFromField(esp.getDisambiguated_name(), field);
}
else
{
disambiguatedName = getFormattedTextFromField(esp.getDisambiguated_name(), field);
}
}
// Only proceed if disambiguousName contains a meaningful value
if (disambiguatedName != null && disambiguatedName.length() > 0)
{
e.setDisambiguatedName(disambiguatedName);
}
else // Always log failure to get a dname - to remove this, specify a creationCriteriaScript
{
_context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required disambiguated_name from: ").append(esp.getDisambiguated_name()).toString(), true);
return null;
}
// Entity.frequency (count)
String freq = "1";
if (esp.getFrequency() != null)
{
if (JavaScriptUtils.containsScript(esp.getFrequency()))
{
freq = getValueFromScript(esp.getFrequency(), field, index).toString();
}
else
{
freq = getFormattedTextFromField(esp.getFrequency(), field);
}
// Since we've specified freq, we're going to enforce it
if (null == freq) { // failed to get it
if (null == esp.getCreationCriteriaScript()) {
_context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required frequency from: ").append(esp.getFrequency()).toString(), true);
return null;
}
}
}
// Try converting the freq string value to its numeric (double) representation
Double frequency = (double) 0;
try
{
frequency = Double.parseDouble(freq);
}
catch (Exception e1)
{
this._context.getHarvestStatus().logMessage(e1.getMessage(), true);
return null;
}
// Only proceed if frequency > 0
if (frequency > 0)
{
e.setFrequency(frequency.longValue()); // Cast to long from double
}
else
{
return null;
}
// Entity.actual_name
String actualName = null;
if (esp.getActual_name() != null)
{
if (JavaScriptUtils.containsScript(esp.getActual_name()))
{
actualName = (String)getValueFromScript(esp.getActual_name(), field, index);
}
else
{
if ((_iterator != null) && (esp.getActual_name().startsWith("$metadata.") || esp.getActual_name().startsWith("${metadata."))) {
if (_context.isStandalone()) { // (minor message, while debugging only)
_context.getHarvestStatus().logMessage("Warning: in actual_name, using global $metadata when iterating", true);
}
}
actualName = getFormattedTextFromField(esp.getActual_name(), field);
}
// Since we've specified actual name, we're going to enforce it (unless otherwise specified)
if (null == actualName) { // failed to get it
if (null == esp.getCreationCriteriaScript()) {
if (_context.isStandalone()) { // (minor message, while debugging only)
_context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required actual_name from: ").append(esp.getActual_name()).toString(), true);
}
return null;
}
}
}
// If actualName == null set it equal to disambiguousName
if (actualName == null) actualName = disambiguatedName;
e.setActual_name(actualName);
// Entity.type
String type = null;
if (esp.getType() != null)
{
if (JavaScriptUtils.containsScript(esp.getType()))
{
type = (String)getValueFromScript(esp.getType(), field, index);
}
else
{
type = getFormattedTextFromField(esp.getType(), field);
}
// Since we've specified type, we're going to enforce it (unless otherwise specified)
if (null == type) { // failed to get it
if (null == esp.getCreationCriteriaScript()) {
_context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required type from: ").append(esp.getType()).toString(), true);
return null;
}
}
}
else
{
type = "Keyword";
}
e.setType(type);
// Entity.index
String entityIndex = disambiguatedName + "/" + type;
e.setIndex(entityIndex.toLowerCase());
// Now check if we already exist, discard if so:
if (_entityMap.contains(e.getIndex())) {
return null;
}
// Entity.dimension
String dimension = null;
if (esp.getDimension() != null)
{
if (JavaScriptUtils.containsScript(esp.getDimension()))
{
dimension = (String)getValueFromScript(esp.getDimension(), field, index);
}
else
{
dimension = getFormattedTextFromField(esp.getDimension(), field);
}
// Since we've specified dimension, we're going to enforce it (unless otherwise specified)
if (null == dimension) { // failed to get it
if (null == esp.getCreationCriteriaScript()) {
_context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required dimension from: ").append(esp.getDimension()).toString(), true);
return null;
}
}
}
if (null == dimension) {
try {
e.setDimension(DimensionUtility.getDimensionByType(type));
}
catch (java.lang.IllegalArgumentException ex) {
e.setDimension(EntityPojo.Dimension.What);
}
}
else {
try {
EntityPojo.Dimension enumDimension = EntityPojo.Dimension.valueOf(dimension);
if (null == enumDimension) {
_context.getHarvestStatus().logMessage(new StringBuffer("Invalid dimension: ").append(dimension).toString(), true);
return null; // (invalid dimension)
}
else {
e.setDimension(enumDimension);
}
}
catch (Exception e2) {
_context.getHarvestStatus().logMessage(new StringBuffer("Invalid dimension: ").append(dimension).toString(), true);
return null; // (invalid dimension)
}
}
// Entity.relevance
String relevance = "0";
if (esp.getRelevance() != null)
{
if (JavaScriptUtils.containsScript(esp.getRelevance()))
{
relevance = (String)getValueFromScript(esp.getRelevance(), field, index);
}
else
{
relevance = getFormattedTextFromField(esp.getRelevance(), field);
}
// Since we've specified relevance, we're going to enforce it (unless otherwise specified)
if (null == relevance) { // failed to get it
if (null == esp.getCreationCriteriaScript()) {
_context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required relevance from: ").append(esp.getRelevance()).toString(), true);
return null;
}
}
}
try {
e.setRelevance(Double.parseDouble(relevance));
}
catch (Exception e1) {
this._context.getHarvestStatus().logMessage(e1.getMessage(), true);
return null;
}
// Entity.sentiment (optional field)
if (esp.getSentiment() != null)
{
String sentiment;
if (JavaScriptUtils.containsScript(esp.getSentiment()))
{
sentiment = (String)getValueFromScript(esp.getSentiment(), field, index);
}
else
{
sentiment = getFormattedTextFromField(esp.getSentiment(), field);
}
// (sentiment is optional, even if specified)
if (null != sentiment) {
try {
double d = Double.parseDouble(sentiment);
e.setSentiment(d);
if (null == e.getSentiment()) {
if (_context.isStandalone()) { // (minor message, while debugging only)
_context.getHarvestStatus().logMessage(new StringBuffer("Invalid sentiment: ").append(sentiment).toString(), true);
}
}
}
catch (Exception e1) {
this._context.getHarvestStatus().logMessage(e1.getMessage(), true);
return null;
}
}
}
// Entity Link data:
if (esp.getLinkdata() != null)
{
String linkdata = null;
if (JavaScriptUtils.containsScript(esp.getLinkdata()))
{
linkdata = (String)getValueFromScript(esp.getLinkdata(), field, index);
}
else
{
linkdata = getFormattedTextFromField(esp.getLinkdata(), field);
}
// linkdata is optional, even if specified
if (null != linkdata) {
String[] links = linkdata.split("\\s+");
e.setSemanticLinks(Arrays.asList(links));
}
}
// Extract Entity GEO or set Entity Geo equal to DocGeo if specified via useDocGeo
if (esp.getGeotag() != null)
{
GeoPojo geo = getEntityGeo(esp.getGeotag(), null, field);
if (null != geo) {
e.setGeotag(geo);
}
// (Allow this field to be intrinsically optional)
// If no ontology type is specified, derive it from getEntityGeo:
if (null == esp.getOntology_type()) {
esp.setOntology_type(esp.getGeotag().getOntology_type());
}
}
else if (esp.getUseDocGeo() == true)
{
GeoPojo geo = getEntityGeo(null, f, field);
if (null != geo) {
e.setGeotag(geo);
}
// (Allow this field to be intrinsically optional)
}
// Entity.ontological_type (
String ontology_type = null;
if (esp.getOntology_type() != null)
{
if (JavaScriptUtils.containsScript(esp.getOntology_type()))
{
ontology_type = (String)getValueFromScript(esp.getOntology_type(), field, index);
}
else
{
ontology_type = getFormattedTextFromField(esp.getOntology_type(), field);
}
// Allow this field to be intrinsically optional
}
// If ontological_type == null, go fetch it from the internal lookup
if (ontology_type == null) {
e.setOntology_type(GeoOntologyMapping.mapEntityToOntology(type));
}
else if ('p' == GeoOntologyMapping.encodeOntologyCode(ontology_type) && !ontology_type.equals("point")) {
// In this case we don't recognize the ontology type so we'll overwrite it
e.setOntology_type(GeoOntologyMapping.mapEntityToOntology(type));
}
e.setOntology_type(ontology_type);
// Add the index and geotag to geomap to get used by associations with matching indexes
if (e.getGeotag() != null)
{
_geoMap.put(e.getIndex(), e.getGeotag());
}
_entityMap.add(e.getIndex());
return e;
}
catch (Exception ex)
{