}
ObjectId communityId = doc.getCommunityId();
if (null != communityId)
{
EntityFeaturePojo feature = entityInCommunity.get(communityId);
if (null == feature)
{
feature = new EntityFeaturePojo();
feature.setCommunityId(communityId);
feature.setDimension(ent.getDimension());
feature.setDisambiguatedName(ent.getDisambiguatedName());
feature.setType(ent.getType());
feature.addAlias(ent.getDisambiguatedName());
entityInCommunity.put(feature.getCommunityId(), feature);
}
if ( feature.getGeotag() == null ) {
feature.setGeotag(ent.getGeotag());
feature.setOntology_type(ent.getOntology_type());
}
if (null != ent.getSemanticLinks()) {
feature.addToSemanticLinks(ent.getSemanticLinks());
}
feature.addAlias(ent.getActual_name());
feature.setDoccount(feature.getDoccount() + 1);
feature.setTotalfreq(feature.getTotalfreq() + ent.getFrequency());
}
}
}//TESTED
if (null != doc.getAssociations()) {
Iterator<AssociationPojo> evtIt = doc.getAssociations().iterator();
while (evtIt.hasNext()) {
AssociationPojo evt = evtIt.next();
if (null != deletedEntities) { // check we're not using these entities in our associations
if (null != evt.getEntity1_index() && deletedEntities.contains(evt.getEntity1_index())) {
evtIt.remove();
continue;
}//TESTED (cut and paste from tested code below)
if (null != evt.getEntity2_index() && deletedEntities.contains(evt.getEntity2_index())) {
evtIt.remove();
continue;
}//TESTED
if (null != evt.getGeo_index() && deletedEntities.contains(evt.getGeo_index())) {
evt.setGeo_index(null);
}//TESTED (trivial)
}//TESTED
boolean bAlreadyCountedFreq = false;
if ((null == evt.getEntity1_index()) && (null == evt.getEntity2_index())) {//skip this event if there is no ent1/en2
continue;
}
// Calc index (this is not remotely unique, of course, but good enough for now...):
String sEventFeatureIndex = AssociationAggregationUtils.getEventFeatureIndex(evt);
evt.setIndex(sEventFeatureIndex); //(temp save for applyAggregationToDocs below)
// Use index:
Map<ObjectId, AssociationFeaturePojo> eventInCommunity = _aggregatedEvents.get(sEventFeatureIndex);
if (null == eventInCommunity) {
eventInCommunity = new HashMap<ObjectId, AssociationFeaturePojo>();
_aggregatedEvents.put(sEventFeatureIndex, eventInCommunity);
intraDocStore.add(sEventFeatureIndex);
}
else if (intraDocStore.contains(sEventFeatureIndex)) {
bAlreadyCountedFreq = true;
}
else {
intraDocStore.add(sEventFeatureIndex);
}
ObjectId communityId = doc.getCommunityId();
if (null != communityId) {
AssociationFeaturePojo feature = eventInCommunity.get(communityId);
if (null == feature) {
feature = new AssociationFeaturePojo();
feature.setCommunityId(communityId);
feature.setIndex(sEventFeatureIndex);
feature.setEntity1_index(evt.getEntity1_index());
feature.setEntity2_index(evt.getEntity2_index());
feature.setVerb_category(evt.getVerb_category());
feature.setAssociation_type(evt.getAssociation_type());
feature.setGeo_index(evt.getGeo_index());
eventInCommunity.put(feature.getCommunityId(), feature);
}
if (!bAlreadyCountedFreq) {
feature.setDoccount(feature.getDoccount() + 1);
}
if (null != evt.getEntity1_index()) {
feature.addEntity1(evt.getEntity1_index());
}
if (null != evt.getEntity2_index()) {
feature.addEntity2(evt.getEntity2_index());
}
if (null != evt.getVerb()) {
feature.addVerb(evt.getVerb());
}
if (null != evt.getEntity1()) {
// Restrict length of entity string, in case it's a quotation
if (evt.getEntity1().length() > AssociationFeaturePojo.entity_MAXSIZE) {
int i = AssociationFeaturePojo.entity_MAXSIZE;
for (; i > AssociationFeaturePojo.entity_MAXSIZE - 10; --i) {
char c = evt.getEntity1().charAt(i);
if (c < 0x30) {
break;
}
}
feature.addEntity1(evt.getEntity1().substring(0, i+1));
}
else {
feature.addEntity1(evt.getEntity1());
}//TESTED (both clauses, 2.1.4.3a)
}
if (null != evt.getEntity2()) {
// Restrict length of entity string, in case it's a quotation
if (evt.getEntity2().length() > AssociationFeaturePojo.entity_MAXSIZE) {
int i = AssociationFeaturePojo.entity_MAXSIZE;
for (; i > AssociationFeaturePojo.entity_MAXSIZE - 10; --i) {
char c = evt.getEntity2().charAt(i);
if (c < 0x30) {
break;
}
}
feature.addEntity2(evt.getEntity2().substring(0, i+1));
}
else {
feature.addEntity2(evt.getEntity2());
}//TESTED (both clauses, 2.1.4.3a)
}
}
}//(end loop over associations)
}//TESTED