Package com.ikanow.infinit.e.data_model.store.document

Examples of com.ikanow.infinit.e.data_model.store.document.EntityPojo


     
      if (null != doc.getEntities())
      {
        Iterator<EntityPojo> entIt = doc.getEntities().iterator();
        while (entIt.hasNext())  {
          EntityPojo ent = entIt.next();
         
          // Some QA checking:
          if ((null == ent.getIndex()) || (null == ent.getDisambiguatedName()) || (null == ent.getType())
              || (null == ent.getDimension()) ||  (ent.getIndex().length() > 250))
          {
            if (null != ent.getIndex()) {
              if (null == deletedEntities) {
                deletedEntities = new HashSet<String>();
              }
              deletedEntities.add(ent.getIndex());
            }//TESTED
           
            entIt.remove(); // malformed / Entities can't be >500B (ie 250 16b characters)
            continue;
          }//TESTED
         
          if (null == ent.getFrequency()) { // can recover
            ent.setFrequency(1L);
          }

          Map<ObjectId, EntityFeaturePojo> entityInCommunity = _aggregatedEntities.get(ent.getIndex());
          if (null == entityInCommunity) {
            entityInCommunity = new HashMap<ObjectId, EntityFeaturePojo>();
            _aggregatedEntities.put(ent.getIndex(), entityInCommunity);
           
          }
          ObjectId communityId = doc.getCommunityId();
          if (null != communityId)
          {
            EntityFeaturePojo feature = entityInCommunity.get(communityId);
            if (null == feature)
            {
              feature = new EntityFeaturePojo();
              feature.setCommunityId(communityId);
              feature.setDimension(ent.getDimension());
              feature.setDisambiguatedName(ent.getDisambiguatedName());
              feature.setType(ent.getType());
              feature.addAlias(ent.getDisambiguatedName());             
              entityInCommunity.put(feature.getCommunityId(), feature);
            }
            if ( feature.getGeotag() == null )  {
              feature.setGeotag(ent.getGeotag());
              feature.setOntology_type(ent.getOntology_type());
            }
            if (null != ent.getSemanticLinks()) {
              feature.addToSemanticLinks(ent.getSemanticLinks());
            }
            feature.addAlias(ent.getActual_name());
            feature.setDoccount(feature.getDoccount() + 1);
            feature.setTotalfreq(feature.getTotalfreq() + ent.getFrequency());
          }
        }
      }//TESTED
      if (null != doc.getAssociations()) {
        Iterator<AssociationPojo> evtIt = doc.getAssociations().iterator();
View Full Code Here


          continue; // more aggressive filtering
        }
        if (Double.isInfinite(res.metric) || Double.isNaN(res.metric)) {
          continue;
        }
        EntityPojo entity = new EntityPojo();
        entity.setDimension(EntityPojo.Dimension.What);
        entity.setType("Keyword");
        entity.setDisambiguatedName(res.value.text);
        entity.setActual_name(res.value.text);
        entity.setFrequency(1L);
        entity.setRelevance(res.metric);
        partialDoc.getEntities().add(entity);
       
        //DEBUG
        //System.out.println(res.value.text + ": " + res.metric + "/" + res.link_rank + "/" + res.count_rank);
      }
View Full Code Here

//    130] Sitting tenant with none of the above (ie default)
//    140] Anything else!
     
   
    for (Map.Entry<String, Candidate> pair: dubiousLocations.entrySet()) {
      EntityPojo ent = pair.getValue().entity;
      Candidate candidate = pair.getValue();
     
// 2.1] Let's analyse the "sitting tenant"
     
      int nPrio = 130;
      GeoFeaturePojo currLeader = null;
      int nCase = 0; // (just for debugging, 0=st, 1=large city, 2=region, 3=other)
     
      if (otherRegions.contains(candidate.state)) { // Strong direct ref, winner!
        nPrio = 10; // winner!
      }//TESTED: "san antonio, texas/city" vs "texas"
      else if (otherCountriesOrRegionsReferenced.contains(candidate.state)) {
        // Indirect ref
        nPrio = 40; // good, but beatable...
      }//TESTED: "philadelphia (village), new york/city"
      else if (otherCountries.contains("united states")) { // Weak direct ref
        nPrio = 80; // better than nothing...       
      }//TESTED: "apache, oklahoma/city"
      else if (otherCountriesOrRegionsReferenced.contains("united states")) { // Weak indirect ref
        nPrio = 80; // better than nothing...       
      }//TESTED: "washington, d.c." have DC as stateorcounty, but US in countries list
     
      // Special case: we don't like "village":
      if ((80 != nPrio) && ent.getDisambiguatedName().contains("village") && !ent.getActual_name().contains("village"))
      {       
        nPrio = 80;       
      }//TESTED: "Downvoted: Philadelphia (village), New York from Philadelphia"
     
      // Debug
      if (_nDebugLevel >= 2) {
        System.out.println(pair.getKey() + " SittingTenantScore=" + nPrio);
      }
     
      // Alternatives
      if (nPrio > 10) {
       
        LinkedList<GeoFeaturePojo> geos = pair.getValue().candidates;
        for (GeoFeaturePojo geo: geos) {
         
          int nAltPrio = 140;
          int nAltCase = -1;
          String city = (null != geo.getCity()) ? geo.getCity().toLowerCase() : null;
          String region = (null != geo.getRegion()) ? geo.getRegion().toLowerCase() : null;
          String country = (null != geo.getCountry()) ? geo.getCountry().toLowerCase() : null;
         
// 2.2] CASE 1: I'm a city with pop > 1M (best score 15)
//           15] Large city with strong direct   
//           30] Large city with strong indirect
//           70] Large city with weak direct
//           72] Large city with weak indirect
//           75] Large city with no reference          
         
          if ((null != city) && (geo.getPopulation() >= 400000) && (nPrio > 15)) {
            nAltCase = 1;
           
            if ((null != region) && (otherRegions.contains(region))) {
              nAltPrio = 15; // strong direct
            }//TESTED: "dallas / Texas / United States = 15"
            else if ((null != region) && (otherCountriesOrRegionsReferenced.contains(region))) {
              nAltPrio = 30; // strong indirect
            }//TESTED: "sacramento / California / United State"
            else if ((null != country) && (otherCountries.contains(country))) {
              nAltPrio = 70; // weak direct
            }//TESTED: "berlin, germany", with "germany" directly mentioned
            else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) {
              nAltPrio = 72; // weak indirect
            }//TESTED: "los angeles / California / United States = 72"
            else {
              nAltPrio = 75; // just for being big!
            }//TESTED: "barcelona, spain"
          }

// 2.3] CASE 2: I'm a region (best score=20, can beat current score)
//           20] Region with direct
//           50] Region with indirect
//          120] Region with no reference, if there is only 1
         
          else if ((null == city) && (nPrio > 20)) {
            nAltCase = 2;
           
            if ((null != country) && (otherCountries.contains(country))) {
              nAltPrio = 20; // strong direct
            }//TESTED: (region) "Berlin, Germany" with "Germany" mentioned
            else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) {
              nAltPrio = 50; // strong indirect
            }//(haven't seen, but we'll live)
            else {
              nAltPrio = 120; // (just for being there)
            }//TESTED: "null / Portland / Jamaica = 120", also "Shanghai / China"
          }
         
// 2.4] CASE 3: I'm any foreign possibility (best score=60)
//           60] Another foreign possibility with strong direct
//           78] Another foreign possibility with strong indirect (>100K population - ie not insignificant)
//           90] Another foreign possibility with strong indirect
//          100] Another foreign possibility with weak direct
//          110] Another foreign possibility with weak indirect
         
          else if (nPrio > 60) {
            nAltCase = 3;
           
            if ((null != region) && (otherRegions.contains(region))) {
              nAltPrio = 60; // strong direct
             
              // Double check we're not falling into the trap below:
              if (!geo.getCountry_code().equals("US")) {
                Matcher m = this._statesRegex.matcher(geo.getRegion());
                if (m.matches()) { // non US state matching against (probably) US state, disregard)
                  nAltPrio = 140;
                }
              }//TESTED (same clause as below)
             
            }//TESTED: lol "philadelphia / Maryland / Liberia = 60" (before above extra clause)
             
            if (nAltPrio > 60) { // (may need to re-run test)
              if ((null != country) && (otherCountries.contains(country))) {
                if (geo.getPopulation() < 100000) {
                  nAltPrio = 90; // strong indirect
                } //TESTED: "washington / Villa Clara / Cuba"
                else {
                  nAltPrio = 78; // strong indirect, with boost!               
                } //TESTED: "geneva, Geneve, Switzerland", pop 180K
              }
              else if ((null != region) && (otherCountriesOrRegionsReferenced.contains(region))) {
                nAltPrio = 100; // weak direct
              }//TESTED: "lincoln / Lincolnshire / United Kingdom = 100"
              else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) {
                nAltPrio = 110; // weak indirect
              }//(haven't seen, but we'll live)           
            }
          }
          // Debug:
          if ((_nDebugLevel >= 2) && (nAltPrio < 140)) {
            System.out.println("----Alternative: " + geo.getCity() + " / " + geo.getRegion() + " / " + geo.getCountry() + " score=" + nAltPrio);
          }
         
          // Outcome of results:
         
          if (nAltPrio < nPrio) {
            currLeader = geo;
            nPrio = nAltPrio;
            nCase = nAltCase;
          }
        } // end loop over alternativse
       
        if (null != currLeader) { // Need to change
         
          if (1 == nCase) {
            this._nMovedToLargeCity++;
           
            //(Cities are lower case in georef DB for some reason)
             String city = WordUtils.capitalize(currLeader.getCity());
           
            if (currLeader.getCountry_code().equals("US")) { // Special case: is this just the original?
             
              String region = currLeader.getRegion();
              if (region.equals("District of Columbia")) { // Special special case
                region = "D.C.";
              }
              String sCandidate = city + ", " + region;
             
              if (!sCandidate.equals(ent.getDisambiguatedName())) {
                ent.setDisambiguatedName(sCandidate);             
                ent.setIndex(ent.getDisambiguatedName() + "/city");
                ent.setSemanticLinks(null);
                bChangedAnything = true;
              }//TESTED (lots, eg "Philadelphia (village), New York" -> "Philadelphia, PA"; Wash, Ill. -> Wash DC)
              else {
                this._nMovedToLargeCity--;
                _nStayedWithOriginal++;               
              }//TESTED ("Washington DC", "San Juan, Puerto Rico")
            }//TESTED (see above)
            else {
              ent.setDisambiguatedName(city + ", " + currLeader.getCountry());             
              ent.setIndex(ent.getDisambiguatedName() + "/city");
              ent.setSemanticLinks(null);
              bChangedAnything = true;
            }//TESTED: "london, california/city to London, United Kingdom"
          }
          else if (2 == nCase) {
            this._nMovedToRegion++;
            ent.setDisambiguatedName(currLeader.getRegion() + ", " + currLeader.getCountry());
            ent.setIndex(ent.getDisambiguatedName() + "/region");
            ent.setSemanticLinks(null);
            bChangedAnything = true;
           
          }//TESTED: "Moved madrid, new york/city to Madrid, Spain" (treats Madrid as region, like Berlin see above)
          else {
            //(Cities are lower case in georef DB for some reason)
             String city = WordUtils.capitalize(currLeader.getCity());
            
            this._nMovedToForeignCity++;
            ent.setDisambiguatedName(city + ", " + currLeader.getCountry());
            ent.setIndex(ent.getDisambiguatedName() + "/city");
            ent.setSemanticLinks(null);
            bChangedAnything = true;

          }//TESTED: "Moved geneva, new york/city to Geneva, Switzerland"
         
          if ((_nDebugLevel >= 1) && (null == ent.getSemanticLinks())) {
            System.out.println("++++ Moved " + pair.getKey() + " to " + ent.getDisambiguatedName());
          }
        }
        else {
          _nStayedWithOriginal++;
        }       
View Full Code Here

      if (null == (changingTo = changeDuplicateFromToMap.get(changingTo))) {
        changingTo = changePair.getValue(); // (change back again)       
      }

      // Make the change:
      EntityPojo toChangeEnt = toChange.entity;
      EntityPojo changingToEnt = changingTo.entity;
     
      // Preferred option, improve stats of "change to" and then delete "to change"
      EntityInfo.assimilate(changingToEnt, toChangeEnt);
      toChange.entity = changingTo.entity; // (need to support 1-word replacement below)
      doc.getEntities().remove(toChangeEnt);
      //TESTED: 1-hop and dedup-and-1-word-replace
     
      // Other option: swap the important fields over - the problem with
      // this is that you get multiple entities with the same name
      // so we'll not go with that
//      toChangeEnt.getGazateer_index() = changingToEnt.getGazateer_index();
//      toChangeEnt.getDisambiguous_name() = changingToEnt.getDisambiguous_name();
//      toChangeEnt.linkdata = changingToEnt.linkdata;
      // Leave the stats alone ... it's all a little bit confusing
     
      this._nDeduplications++;
      bChangedAnything = true;

         
    }//TESTED: "HOPPING: miss oldham/person TO frances oldham kelsey/person TO frances oldham kelsey/person"
   
// 3.1] The easiest case is one-word person entities, whether they've been
    // mapped to an actual dis-name or not....
    // If the one-word actual name maps to the first name, surname, nickname of a
    // "well qualified" entity (ie reasonable quality actual name)
    // (and there's only option, ie ignore "chao" vs "albert chao" and "anne chao")
   
    for (EntityInfo entInfo: oneWordEntities) {
      EntityPojo ent = entInfo.entity;
      List<EntityInfo> l = possibleMatches.get(entInfo.lastName);
      if (null != l) {

        // Debug:
        if (_nDebugLevel >= 2) {             
          System.out.println("Candidate matches for " + ent.getActual_name() + " / " + ent.getDisambiguatedName() + ": ");
        }
        String disName = null;
        EntityInfo changeTo = null;
        boolean bMultipleDisNames = false;
          // (If there are multiple dis names, but one of them is mine, then I'm good to go
          //  else I'm going to delete the one-word entity...)
       
        for (EntityInfo info: l) {
         
          // Debug:
          if (_nDebugLevel >= 2) {             
            System.out.println("\tEntity3: " + info.entity.getIndex() + " - "+ info.entity.getActual_name() + " / " + info.entity.getDisambiguatedName()
              + ": " + info.entity.getRelevance() + " / " + info.entity.getFrequency() + " / " + info.entity.getTotalfrequency());
          }
         
          if (ent.getDisambiguatedName().equals(info.entity.getDisambiguatedName())) {
            // Found my man - dis name match between Alchemy and a candidate
            changeTo = info;
            bMultipleDisNames = true; // (Make debug printing easier)
            break;
          } //TESTED: (Couldn't actually find an example of this, but it's simple enough!)
         
          if (null == disName) {
            disName = info.entity.getDisambiguatedName();
            changeTo = info;
          }
          else if (!bMultipleDisNames) {
            if (!disName.equals(info.entity.getDisambiguatedName())) {
             
             
              bMultipleDisNames = true;
              changeTo = null; // Not going to be able to assign a better version
            }
          }
         
        } // end loop over 1-word candidates
       
        if (null != changeTo) {   
         
          //Debug code
          if (_nDebugLevel >= 1) {             
            System.out.println("1REPLACE/" + bMultipleDisNames + ": "+ ent.getActual_name() + " WITH " + changeTo.entity.getIndex() + " - "+ changeTo.entity.getActual_name() + " / " + changeTo.entity.getDisambiguatedName()
              + ": " + changeTo.entity.getRelevance() + " / " + changeTo.entity.getFrequency() + " / " + changeTo.entity.getTotalfrequency());
          }
          // Preferred option, improve stats of "change to" and then delete "to change"
          EntityInfo.assimilate(changeTo.entity, ent);
          doc.getEntities().remove(ent);
          //TESTED
         
          // Other option: swap the important fields over - the problem with
          // this is that you get multiple entities with the same name
          // so we'll not go with that
//          ent.getGazateer_index() = changeTo.entity.getGazateer_index();
//          ent.getDisambiguous_name() = changeTo.entity.getDisambiguous_name();
//          ent.linkdata = changeTo.entity.linkdata;
          // Leave the stats alone ... it's all a little bit confusing
         
          this._nOneWordAssignments++;
          bChangedAnything = true;
        }
        else if (null == changeTo) { // Expensive, but hopefully don't need to do that often
         
          //Debug
          if (_nDebugLevel >= 1) {             
            System.out.println("DELETE " + ent.getActual_name() + " " + ent.getDisambiguatedName());
          }
         
          this._nOneWordDeletions++;
          doc.getEntities().remove(ent);
          bChangedAnything = true;

        }
      }//TESTED: various cases, changed and unchanged
     
      else { // No candidate matches, compare against other "who"s
       
        //Debug:
        if (_nDebugLevel >= 2) {             
          System.out.println("No person candidate matches for " + ent.getActual_name());       
        }
// 3.2] also need to compare against companies ie other "who's" (if no people matches)
       
        String gazIndex = null;
        EntityPojo changeTo = null;
        Set<EntityPojo> possWhoSet = null;
       
        if (null == entInfo.firstName) { // Won't try to replace weak prefix entities with company names, obviously
         
          String stripActualName = ent.getActual_name().replaceAll("[;;.,]+$", "");
         
          possWhoSet = possibleWhoMatches.get(stripActualName.toLowerCase());
         
          if (null != possWhoSet) {
            // Check there's only 1 candidate, else nothing to be done
            for (EntityPojo possWho: possWhoSet) {
 
              //Debug
              if (_nDebugLevel >= 2) {             
                System.out.println("Candidate company match: " + possWho.getIndex());
              }
              if (null == gazIndex) {
                gazIndex = possWho.getIndex();
                changeTo = possWho;
              }
              else if (!gazIndex.equals(possWho.getIndex())) {
                gazIndex = null;
                break;
              }
            }
          }//TESTED: "gala/person" vs "buckyball discovery gala/organization"
          //TESTED: multiple companies case: "Sinclair" vs "Sinclair Technologies Inc" and "Sinclair Holdings Inc"
        } //(end if a one-word vs weak prefix)
       
        if (null != gazIndex) { // Convert
         
          //Debug code
          if (_nDebugLevel >= 1) {             
            System.out.println("COREPLACE " + ent.getActual_name() + " WITH "+ changeTo.getActual_name() + " / " + changeTo.getDisambiguatedName()
              + ": " + changeTo.getRelevance() + " / " + changeTo.getFrequency() + " / " + changeTo.getTotalfrequency());         
          }
          // Preferred option, improve stats of "change to" and then delete "to change"
          EntityInfo.assimilate(changeTo, ent);
          doc.getEntities().remove(ent);
          //TESTED
View Full Code Here

    List<EntityPojo> ents = new ArrayList<EntityPojo>();
    if ( sc.entities != null)
    {
      for ( AlchemyEntityPojo ae : sc.entities)
      {
        EntityPojo ent = convertAlchemyEntToEntPojo(ae);
        if ( ent != null )
          ents.add(ent);
      }
    }
    return ents; 
View Full Code Here

 
  private static EntityPojo convertAlchemyEntToEntPojo(AlchemyEntityPojo pojoToConvert)
  {
    try
    {
      EntityPojo ent = new EntityPojo();
      ent.setActual_name(pojoToConvert.text);
      ent.setType(pojoToConvert.type);
      ent.setRelevance(Double.parseDouble(pojoToConvert.relevance));
      ent.setFrequency(Long.parseLong(pojoToConvert.count));
      if (null != pojoToConvert.sentiment) {
        if (null != pojoToConvert.sentiment.score) {
          ent.setSentiment(Double.parseDouble(pojoToConvert.sentiment.score));
        }
        else { // neutral
          ent.setSentiment(0.0);
        }
      }
      // (else no sentiment present)
     
      if ( pojoToConvert.disambiguated != null )
      {
        ent.setSemanticLinks(new ArrayList<String>());
        ent.setDisambiguatedName(pojoToConvert.disambiguated.name);
        if ( pojoToConvert.disambiguated.geo != null )
        {
          GeoPojo geo = new GeoPojo();
          String[] geocords = pojoToConvert.disambiguated.geo.split(" ");
          geo.lat = Double.parseDouble(geocords[0]);
          geo.lon = Double.parseDouble(geocords[1]);
          ent.setGeotag(geo);
        }
        //Add link data if applicable
        if ( pojoToConvert.disambiguated.census != null)
          ent.getSemanticLinks().add(pojoToConvert.disambiguated.census);
        if ( pojoToConvert.disambiguated.ciaFactbook != null)
          ent.getSemanticLinks().add(pojoToConvert.disambiguated.ciaFactbook);
        if ( pojoToConvert.disambiguated.dbpedia != null)
          ent.getSemanticLinks().add(pojoToConvert.disambiguated.dbpedia);
        if ( pojoToConvert.disambiguated.freebase != null)
          ent.getSemanticLinks().add(pojoToConvert.disambiguated.freebase);
        if ( pojoToConvert.disambiguated.opencyc != null)
          ent.getSemanticLinks().add(pojoToConvert.disambiguated.opencyc);
        if ( pojoToConvert.disambiguated.umbel != null)
          ent.getSemanticLinks().add(pojoToConvert.disambiguated.umbel);
        if ( pojoToConvert.disambiguated.yago != null)
          ent.getSemanticLinks().add(pojoToConvert.disambiguated.yago);
       
        if ( ent.getSemanticLinks().size() == 0)
          ent.setSemanticLinks(null); //If no links got added, remove the list
      }
      else
      {
        //sets the disambig name to actual name if
        //there was no disambig name for this ent
        //that way all entities have a disambig name
        ent.setDisambiguatedName(ent.getActual_name());
      }
      //Calculate Dimension based on ent type
      try {
        ent.setDimension(DimensionUtility.getDimensionByType(ent.getType()));
      }
      catch (java.lang.IllegalArgumentException e) {
        ent.setDimension(EntityPojo.Dimension.What);                 
      }
      return ent;
    }
    catch (Exception ex)
    {
View Full Code Here

          for (Object entryObj: dbo.toMap().entrySet()) {
            @SuppressWarnings("unchecked")
            Map.Entry<String, Object> entry = (Map.Entry<String, Object>)entryObj;
           
            String masterAlias = entry.getKey();
            EntityPojo masterAliasEntity = new EntityPojo();
            masterAliasEntity.setIndex(masterAlias);
           
            BasicDBObject entityFeatureObj = (BasicDBObject) entry.getValue();
            EntityFeaturePojo aliasInfo = null;
            try {
              aliasInfo = EntityFeaturePojo.fromDb(entityFeatureObj, EntityFeaturePojo.class);
View Full Code Here

            //check typegroup to see if it is an entity
            if ( typeGroup.equals("entities") )
            {
              try
              {
                EntityPojo ep = new EntityPojo();
                //get what fields we can         
                ep.setType(currNode.get("_type").getTextValue());
                try {
                  ep.setDimension(DimensionUtility.getDimensionByType(ep.getType()));
                }
                catch (java.lang.IllegalArgumentException e) {
                  ep.setDimension(EntityPojo.Dimension.What);                 
                }
                String name = "";
                JsonNode nameNode = null;
                try
                {
                  nameNode = currNode.get("name");
                  name = nameNode.getTextValue();
                }
                catch (Exception ex )
                {                 
                  logger.debug("Error parsing name node: " + currNode.toString());
                  continue;
                }
                ep.setActual_name(name);
                ep.setRelevance(Double.parseDouble(currNode.get("relevance").getValueAsText()));
                ep.setFrequency((long)currNode.get("instances").size());
                //attempt to get resolutions if they exist
                JsonNode resolutionNode = currNode.get("resolutions");
                if ( null != resolutionNode )
                {
                  //resolution nodes are arrays
                  JsonNode resolutionFirst = resolutionNode.get(0);
                  ep.setSemanticLinks(new ArrayList<String>());
                  ep.getSemanticLinks().add(resolutionFirst.get("id").getTextValue()); //this is a link to an alchemy page
                  ep.setDisambiguatedName(resolutionFirst.get("name").getTextValue());
                  //check if we need to create a geo object
                  if ( null != resolutionFirst.get("latitude") )
                  {
                    GeoPojo gp = new GeoPojo();
                    String lat = resolutionFirst.get("latitude").getValueAsText();
                    String lon = resolutionFirst.get("longitude").getValueAsText();
                    gp.lat = Double.parseDouble(lat);
                    gp.lon = Double.parseDouble(lon);
                    ep.setGeotag(gp);
                 
                }
                else {
                  ep.setDisambiguatedName(name); // use actual name)                 
                }
                entityNameMap.put(currNodeName.toLowerCase(), ep);
                entities.add(ep);
              }
              catch (Exception ex)
              {
                logger.error("Error creating event pojo from OpenCalaisNode: " + ex.getMessage(), ex);
              }
            }
            else if ( typeGroup.equals("relations") )
            {             
              eventNodes.add(currNode);           
            }
          }         
        }
        //handle events
        if (bAddRawEventsToMetadata) {
          // For now just re-process these into DB objects since we know that works...
          rawEventObjects = new BasicDBList();
        }
        for ( JsonNode eventNode : eventNodes )
        {         
          AssociationPojo event = parseEvent(eventNode);
          //remove useless events (an event is useless if it only has a verb (guessing currently)
          if ( null != event )
          {
            event = removeUselessEvents(event);
            if ( null != event )
            {
              events.add(event);
            }
          }
          if (bAddRawEventsToMetadata) {
            BasicDBObject eventDbo = (BasicDBObject) com.mongodb.util.JSON.parse(eventNode.toString());
            if (null != eventDbo) {
              BasicDBObject transformObj = new BasicDBObject();
              for (Map.Entry<String, Object> entries: eventDbo.entrySet()) {
                if (entries.getValue() instanceof String) {
                  String val = (String) entries.getValue();
                  EntityPojo transformVal = findMappedEntityName(val);
                  if (null != transformVal) {
                    transformObj.put(entries.getKey(), transformVal.getIndex());                   
                    transformObj.put(entries.getKey() + "__hash", val);                   
                  }
                  else {
                    transformObj.put(entries.getKey(), val);                   
                  }
View Full Code Here

        JsonNode ent1node = current_node.get(esp.entity1column);
        if ( ent1node.isArray() )
        {
          Iterator<JsonNode> entiter = ent1node.getElements();
          curr_ent = entiter.next().getTextValue().toLowerCase();
          EntityPojo matchEnt1 = findMappedEntityName(curr_ent);
          if ( null != matchEnt1)
          {
            ep.setEntity1(matchEnt1.getActual_name());
            ep.setEntity1_index(createEntityIndex(matchEnt1));
            if ( ep.getGeotag() == null && matchEnt1.getGeotag() != null) //try to set geotag if it already hasn't been
              ep.setGeotag(matchEnt1.getGeotag().deepCopy());
          }
          else
            ep.setEntity1(curr_ent);         
         
          if ( entiter.hasNext())
          {
            curr_ent = entiter.next().getTextValue().toLowerCase();
            EntityPojo matchEnt12 = findMappedEntityName(curr_ent);
            if ( null != matchEnt12 )
            {
              ep.setEntity2(matchEnt12.getActual_name());
              ep.setEntity2_index(createEntityIndex(matchEnt12));
              if ( ep.getGeotag() == null && matchEnt12.getGeotag() != null) //try to set geotag if it already hasn't been
                ep.setGeotag(matchEnt12.getGeotag().deepCopy());
            }
            else
              ep.setEntity2(curr_ent);           
          }
        }
        else
        {
          curr_ent = current_node.get(esp.entity1column).getTextValue().toLowerCase();
          EntityPojo matchEnt1Only = findMappedEntityName(curr_ent);
          if ( null != matchEnt1Only )
          {
            ep.setEntity1(matchEnt1Only.getActual_name());
            ep.setEntity1_index(createEntityIndex(matchEnt1Only));
            if ( ep.getGeotag() == null && matchEnt1Only.getGeotag() != null ) //try to set geotag if it already hasn't been
              ep.setGeotag(matchEnt1Only.getGeotag().deepCopy());
          }
          else
            ep.setEntity1(curr_ent);         
        }
      }     
      //entity 2     
      if ( null != esp.entity2column && null != current_node.get(esp.entity2column)  )
      {
        JsonNode ent2node = current_node.get(esp.entity2column);
        if ( ent2node.isTextual() )
        {
          curr_ent = current_node.get(esp.entity2column).getTextValue().toLowerCase();
          EntityPojo matchEnt2 = findMappedEntityName(curr_ent);
          if ( null != matchEnt2 )
          {
            ep.setEntity2(matchEnt2.getActual_name());
            ep.setEntity2_index(createEntityIndex(matchEnt2));
            if ( ep.getGeotag() == null && matchEnt2.getGeotag() != null ) //try to set geotag if it already hasn't been
              ep.setGeotag(matchEnt2.getGeotag().deepCopy());
          }
          else
            ep.setEntity2(curr_ent);
        }
      }
      //verb and verb category (if there is a verb cat, assign that and then get column value)
      if ( null != esp.verbcategory )
      {
        ep.setVerb_category(esp.verbcategory);
       
        if ( null != esp.verbcolumn && null != current_node.get(esp.verbcolumn) )
        {
          JsonNode verbnode = current_node.get(esp.verbcolumn);
          if ( verbnode.isTextual() )
          {
            ep.setVerb(current_node.get(esp.verbcolumn).getTextValue().toLowerCase());
            EntityPojo verbent = findMappedEntityName(ep.getVerb());
            if ( verbent != null )
              ep.setVerb(verbent.getActual_name());
          }
        }
      }
      else if ( null != esp.verbcolumn && null != current_node.get(esp.verbcolumn) )
      {
        ep.setVerb(current_node.get(esp.verbcolumn).getTextValue().toLowerCase());
      }
      //location
      if ( null != esp.locationcolumn && null != current_node.get(esp.locationcolumn) )
      {
        curr_ent = current_node.get(esp.locationcolumn).getTextValue().toLowerCase();
        EntityPojo geoEnt = findMappedEntityName(curr_ent);
        if ( geoEnt != null && geoEnt.getGeotag() != null )
        {
          ep.setGeo_index(createEntityIndex(geoEnt));       
          ep.setGeotag(geoEnt.getGeotag().deepCopy()); //location always over-rides geotag location
        }
      }     
      //time
      if ( null != esp.timecolumnstart && null != current_node.get(esp.timecolumnstart) )
      {
View Full Code Here

    List<EntityPojo> ents = new ArrayList<EntityPojo>();
    if ( sc.keywords != null)
    {
      for ( AlchemyKeywordPojo ae : sc.keywords)
      {
        EntityPojo ent = convertAlchemyKeywordToEntPojo(ae);
        if ( ent != null )
          ents.add(ent);
      }
    }
    return ents; 
View Full Code Here

TOP

Related Classes of com.ikanow.infinit.e.data_model.store.document.EntityPojo

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.