Package com.ikanow.infinit.e.data_model.store.document

Examples of com.ikanow.infinit.e.data_model.store.document.EntityPojo


 
  private static EntityPojo convertAlchemyKeywordToEntPojo(AlchemyKeywordPojo pojoToConvert)
  {
    try
    {
      EntityPojo ent = new EntityPojo();
      ent.setActual_name(pojoToConvert.text);
      ent.setType("Keyword");
      ent.setRelevance(Double.parseDouble(pojoToConvert.relevance));
      ent.setFrequency(1L);
      if (null != pojoToConvert.sentiment) {
        if (null != pojoToConvert.sentiment.score) {
          ent.setSentiment(Double.parseDouble(pojoToConvert.sentiment.score));
        }
        else { // neutral
          ent.setSentiment(0.0);
        }
      }
      // (else no sentiment present)
     
      ent.setDisambiguatedName(pojoToConvert.text);
      ent.setActual_name(pojoToConvert.text);
     
      ent.setDimension(EntityPojo.Dimension.What);
      return ent;
    }
    catch (Exception ex)
    {
      logger.error("Line: [" + ex.getStackTrace()[2].getLineNumber() + "] " + ex.getMessage());
View Full Code Here


          {             
            String field = entityRecords.getString(i);
            long nIndex = Long.valueOf(i);
           
            if (null != esp.getType()) { // (else cannot be a valid entity, must just be a list)
              EntityPojo entity = getEntity(esp, field, String.valueOf(i), f);
              if (entity != null) entities.add(entity)
            }
           
            // Does the association break out into multiple associations?
            if (esp.getEntities() != null)
            {
              // Iterate over the associations and call getAssociations recursively
              for (EntitySpecPojo subEsp : esp.getEntities())
              { 
                if (null != subEsp.getIterateOver()) {
                  if (null == subEsp.getCreationCriteriaScript()) {
                    _context.getHarvestStatus().logMessage(new StringBuffer("In iterator ").
                        append(esp.getIterateOver()).append(", trying to loop over field '").
                        append(subEsp.getIterateOver()).append("' in array of primitives.").toString(), true);
                  }
                  else {
                    this.executeEntityAssociationValidation(subEsp.getCreationCriteriaScript(), field, Long.toString(nIndex));
                  }
                  // (any creation criteria script indicates user accepts it can be either)
                }
                if (null != subEsp.getDisambiguated_name()) {
                  EntityPojo entity = getEntity(subEsp, field, String.valueOf(i), f);
                  if (entity != null) entities.add(entity)
                }
              }                   
            }//TESTED (error case, mixed object)
          }
        }

        /*
         *  EntityRecords is a JSONArray
         */
        else if (objType.equalsIgnoreCase("class org.json.JSONObject"))
        {
          // Iterate over array elements and extract entities
          for (int i = 0; i < entityRecords.length(); ++i)
          {
            // Get JSONObject containing entity fields and pass entityElement
            // into the script engine so scripts can access it
            JSONObject savedIterator = null;
            if (_scriptEngine != null)
            {
              _iterator = savedIterator = entityRecords.getJSONObject(i);
            }

            if (null != esp.getType()) { // (else cannot be a valid entity, must just be a list)
              EntityPojo entity = getEntity(esp, null, String.valueOf(i), f);
              if (entity != null) entities.add(entity);
            }
           
            // Does the entity break out into multiple entities?
            if (esp.getEntities() != null)
            {
              // Iterate over the entities and call getEntities recursively
              for (EntitySpecPojo subEsp : esp.getEntities())
              { 
                _iterator = savedIterator; // (reset this)
               
                List<EntityPojo> subEntities = getEntities(subEsp, f, _iterator);
                for (EntityPojo e : subEntities)
                {
                  entities.add(e);
                }
              }
            }
          }
        }

        if (_iterator != currObj) { // (ie at the top level)
          _iterator = null;
        }
      }
      catch (Exception e)
      {
        //e.printStackTrace();
        //System.out.println(e.getMessage());
        //logger.error("Exception: " + e.getMessage());
      }
    }
   
    // Single entity
    else
    {
      // Does the entity break out into multiple entities?
      if (esp.getEntities() != null)
      {
        // Iterate over the entities and call getEntities recursively
        for (EntitySpecPojo subEsp : esp.getEntities())
        { 
          List<EntityPojo> subEntities = getEntities(subEsp, f, currObj);
          for (EntityPojo e : subEntities)
          {
            entities.add(e);
          }
        }
      }
      else
      {
        EntityPojo entity = getEntity(esp, null, null, f);
        if (entity != null) entities.add(entity)
      }
    }
   
    return entities;
View Full Code Here

      Matcher m = currKeywordRegex.matcher(batchedDoc.fullText);

      while (m.find()) {
       
        String name = m.group().toLowerCase();
        EntityPojo ent = currKeywordMap.get(name);
       
        if ((null != ent) && (nDoc != ent.getDoccount())) { // (see below)
          if (null == batchedDoc.doc.getEntities()) {
            batchedDoc.doc.setEntities(new ArrayList<EntityPojo>());
          }
          batchedDoc.doc.getEntities().add(ent);
          ent.setDoccount(nDoc);
            // use this as an efficient check to only add each entity once per doc
            // doccount gets overwritten by the generic processing module so fine to abuse this
        }
        // (else probably an internal logic error ie shouldn't happen)
       
View Full Code Here

    // If the EntitySpecPojo or DocumentPojo is null return null
    if ((esp == null) || (f == null)) return null;
   
    try
    {
      EntityPojo e = new EntityPojo();
     
      // Parse creation criteria script to determine if the entity should be added
      if (esp.getCreationCriteriaScript() != null && JavaScriptUtils.containsScript(esp.getCreationCriteriaScript()))
      {
        boolean addEntity = executeEntityAssociationValidation(esp.getCreationCriteriaScript(), field, index);
        if (!addEntity) {
          return null;
        }
      }
     
      // Entity.disambiguous_name
      String disambiguatedName = null;
      if (JavaScriptUtils.containsScript(esp.getDisambiguated_name()))
      {
        disambiguatedName = (String)getValueFromScript(esp.getDisambiguated_name(), field, index);
      }
      else
      {
        if ((_iterator != null) && (esp.getDisambiguated_name().startsWith("$metadata.") || esp.getDisambiguated_name().startsWith("${metadata."))) {
          if (_context.isStandalone()) { // (minor message, while debugging only)
            _context.getHarvestStatus().logMessage("Warning: in disambiguated_name, using global $metadata when iterating", true);
          }
        }
        // Field - passed in via simple string array from getEntities
        if (field != null)
        {
          disambiguatedName = getFormattedTextFromField(esp.getDisambiguated_name(), field);
        }
        else
        {
          disambiguatedName = getFormattedTextFromField(esp.getDisambiguated_name(), field);
        }
      }
     
      // Only proceed if disambiguousName contains a meaningful value
      if (disambiguatedName != null && disambiguatedName.length() > 0)
      {
        e.setDisambiguatedName(disambiguatedName);
      }
      else // Always log failure to get a dname - to remove this, specify a creationCriteriaScript
      {
        _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required disambiguated_name from: ").append(esp.getDisambiguated_name()).toString(), true);
        return null;
      }
     
      // Entity.frequency (count)
      String freq = "1";
      if (esp.getFrequency() != null)
      {
        if (JavaScriptUtils.containsScript(esp.getFrequency()))
        {
          freq = getValueFromScript(esp.getFrequency(), field, index).toString();
        }
        else
        {
          freq = getFormattedTextFromField(esp.getFrequency(), field);
        }
        // Since we've specified freq, we're going to enforce it
        if (null == freq) { // failed to get it
          if (null == esp.getCreationCriteriaScript()) {
            _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required frequency from: ").append(esp.getFrequency()).toString(), true);
            return null;
          }
        }
      }

      // Try converting the freq string value to its numeric (double) representation
      Double frequency = (double) 0;
      try 
      {
        frequency = Double.parseDouble(freq);
      }
      catch (Exception e1)
      {
        this._context.getHarvestStatus().logMessage(e1.getMessage(), true);
        return null;
      }
     
      // Only proceed if frequency > 0
      if (frequency > 0)
      {
        e.setFrequency(frequency.longValue()); // Cast to long from double
      }
      else
      {
        return null;  
     
     
      // Entity.actual_name
      String actualName = null;
      if (esp.getActual_name() != null)
      {
        if (JavaScriptUtils.containsScript(esp.getActual_name()))
        {
          actualName = (String)getValueFromScript(esp.getActual_name(), field, index);
        }
        else
        {
          if ((_iterator != null) && (esp.getActual_name().startsWith("$metadata.") || esp.getActual_name().startsWith("${metadata."))) {
            if (_context.isStandalone()) { // (minor message, while debugging only)
              _context.getHarvestStatus().logMessage("Warning: in actual_name, using global $metadata when iterating", true);
            }
          }
          actualName = getFormattedTextFromField(esp.getActual_name(), field);
        }
        // Since we've specified actual name, we're going to enforce it (unless otherwise specified)
        if (null == actualName) { // failed to get it
          if (null == esp.getCreationCriteriaScript()) {
            if (_context.isStandalone()) { // (minor message, while debugging only)
              _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required actual_name from: ").append(esp.getActual_name()).toString(), true);
            }
            return null;
          }
        }
      }
      // If actualName == null set it equal to disambiguousName
      if (actualName == null) actualName = disambiguatedName;
      e.setActual_name(actualName);
     
      // Entity.type
      String type = null;
      if (esp.getType() != null)
      {
        if (JavaScriptUtils.containsScript(esp.getType()))
        {
          type = (String)getValueFromScript(esp.getType(), field, index);
        }
        else
        {
          type = getFormattedTextFromField(esp.getType(), field);
        }
        // Since we've specified type, we're going to enforce it (unless otherwise specified)
        if (null == type) { // failed to get it
          if (null == esp.getCreationCriteriaScript()) {
            _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required type from: ").append(esp.getType()).toString(), true);
            return null;
          }
        }
      }
      else
      {
        type = "Keyword";
      }
      e.setType(type);
     
      // Entity.index
      String entityIndex = disambiguatedName + "/" + type;
      e.setIndex(entityIndex.toLowerCase());
     
      // Now check if we already exist, discard if so:
      if (_entityMap.contains(e.getIndex())) {
        return null;
      }

      // Entity.dimension
      String dimension = null;
      if (esp.getDimension() != null)
      {
        if (JavaScriptUtils.containsScript(esp.getDimension()))
        {
          dimension = (String)getValueFromScript(esp.getDimension(), field, index);
        }
        else
        {
          dimension = getFormattedTextFromField(esp.getDimension(), field);
        }
        // Since we've specified dimension, we're going to enforce it (unless otherwise specified)
        if (null == dimension) { // failed to get it
          if (null == esp.getCreationCriteriaScript()) {
            _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required dimension from: ").append(esp.getDimension()).toString(), true);
            return null;
          }
        }
      }
      if (null == dimension) {
        try {
          e.setDimension(DimensionUtility.getDimensionByType(type));
        }
        catch (java.lang.IllegalArgumentException ex) {
          e.setDimension(EntityPojo.Dimension.What);                 
        }
      }
      else {
        try {
          EntityPojo.Dimension enumDimension = EntityPojo.Dimension.valueOf(dimension);
          if (null == enumDimension) {
            _context.getHarvestStatus().logMessage(new StringBuffer("Invalid dimension: ").append(dimension).toString(), true);
            return null; // (invalid dimension)
          }
          else {
            e.setDimension(enumDimension);
          }
        }
        catch (Exception e2) {
          _context.getHarvestStatus().logMessage(new StringBuffer("Invalid dimension: ").append(dimension).toString(), true);
          return null; // (invalid dimension)         
        }
      }
     
      // Entity.relevance
      String relevance = "0";
      if (esp.getRelevance() != null)
      {
        if (JavaScriptUtils.containsScript(esp.getRelevance()))
        {
          relevance = (String)getValueFromScript(esp.getRelevance(), field, index);
        }
        else
        {
          relevance = getFormattedTextFromField(esp.getRelevance(), field);
        }
        // Since we've specified relevance, we're going to enforce it (unless otherwise specified)
        if (null == relevance) { // failed to get it
          if (null == esp.getCreationCriteriaScript()) {
            _context.getHarvestStatus().logMessage(new StringBuffer("Failed to get required relevance from: ").append(esp.getRelevance()).toString(), true);
            return null;
          }
        }
      }
      try {
        e.setRelevance(Double.parseDouble(relevance));
      }
      catch (Exception e1) {
        this._context.getHarvestStatus().logMessage(e1.getMessage(), true);
        return null;       
      }

      // Entity.sentiment (optional field)
      if (esp.getSentiment() != null)
      {
        String sentiment;
        if (JavaScriptUtils.containsScript(esp.getSentiment()))
        {
          sentiment = (String)getValueFromScript(esp.getSentiment(), field, index);
        }
        else
        {
          sentiment = getFormattedTextFromField(esp.getSentiment(), field);
        }
        // (sentiment is optional, even if specified)
        if (null != sentiment) {
          try {
            double d = Double.parseDouble(sentiment);
            e.setSentiment(d);
            if (null == e.getSentiment()) {
              if (_context.isStandalone()) { // (minor message, while debugging only)
                _context.getHarvestStatus().logMessage(new StringBuffer("Invalid sentiment: ").append(sentiment).toString(), true);
              }             
            }
          }
          catch (Exception e1) {
            this._context.getHarvestStatus().logMessage(e1.getMessage(), true);
            return null;       
          }
        }
      }

      // Entity Link data:
     
      if (esp.getLinkdata() != null)
      {
       
        String linkdata = null;
        if (JavaScriptUtils.containsScript(esp.getLinkdata()))
        {
          linkdata = (String)getValueFromScript(esp.getLinkdata(), field, index);
        }
        else
        {
          linkdata = getFormattedTextFromField(esp.getLinkdata(), field);
        }
        // linkdata is optional, even if specified
        if (null != linkdata) {
          String[] links = linkdata.split("\\s+");
          e.setSemanticLinks(Arrays.asList(links));
        }
      }
     
     
      // Extract Entity GEO or set Entity Geo equal to DocGeo if specified via useDocGeo
      if (esp.getGeotag() != null)
      { 
        GeoPojo geo = getEntityGeo(esp.getGeotag(), null, field);
        if (null != geo) {
          e.setGeotag(geo);
        }
        // (Allow this field to be intrinsically optional)
       
        // If no ontology type is specified, derive it from getEntityGeo:
        if (null == esp.getOntology_type()) {
          esp.setOntology_type(esp.getGeotag().getOntology_type());
        }
      }
      else if (esp.getUseDocGeo() == true)
      {
        GeoPojo geo = getEntityGeo(null, f, field);
        if (null != geo) {
          e.setGeotag(geo);
        }
        // (Allow this field to be intrinsically optional)
      }

      // Entity.ontological_type (
      String ontology_type = null;
      if (esp.getOntology_type() != null)
      {
        if (JavaScriptUtils.containsScript(esp.getOntology_type()))
        {
          ontology_type = (String)getValueFromScript(esp.getOntology_type(), field, index);
        }
        else
        {
          ontology_type = getFormattedTextFromField(esp.getOntology_type(), field);
        }
        // Allow this field to be intrinsically optional
      }
      // If ontological_type == null, go fetch it from the internal lookup
      if (ontology_type == null) {
        e.setOntology_type(GeoOntologyMapping.mapEntityToOntology(type));
      }
      else if ('p' == GeoOntologyMapping.encodeOntologyCode(ontology_type) && !ontology_type.equals("point")) {
        // In this case we don't recognize the ontology type so we'll overwrite it
        e.setOntology_type(GeoOntologyMapping.mapEntityToOntology(type));       
      }
      e.setOntology_type(ontology_type);     
           
      // Add the index and geotag to geomap to get used by associations with matching indexes
      if (e.getGeotag() != null)
      {
        _geoMap.put(e.getIndex(), e.getGeotag());
      }
      _entityMap.add(e.getIndex());
     
      return e;
    }
    catch (Exception ex)
    {
View Full Code Here

      if ((null != filter) && (null != doc.getEntities()) &&
          ((null != filter.entityFilter)||(null != filter.entityGeoFilter)))
      {
        Iterator<EntityPojo> it = doc.getEntities().iterator();
        while (it.hasNext()) {
          EntityPojo ent = it.next();
          Pattern whichRegex = null;
          String whichPattern = null;
          if (null != ent.getGeotag() && (null != filter.entityGeoFilterRegex)) {
            whichRegex = filter.entityGeoFilterRegex;
            whichPattern = filter.entityGeoFilter;
          }
          else {
            whichRegex = filter.entityFilterRegex;
            whichPattern = filter.entityFilter;               
          } // (end which regex to pick)
          if (null != whichRegex) {
            if (whichPattern.startsWith("-")) {
              if (whichRegex.matcher(ent.getIndex()).find()) {
                it.remove();
                continue;
              }
            }
            else if (!whichRegex.matcher(ent.getIndex()).find()) {
              it.remove();
              continue;
            }         
          } // (end if regex exists)
        }//TESTED positive and negative geo and normal entities
View Full Code Here

    DocumentPojoIndexMap docMap = new DocumentPojoIndexMap();
    System.out.println("DOC_INDEX=" + IndexManager.mapToIndex(doc, docMap));
   
    ////////////////////////////////////////////////
    // Check use of enums in Entity pojo works
    EntityPojo testEnt = new EntityPojo();
    testEnt.setDimension(EntityPojo.Dimension.Where);
    System.out.println("ENT1=" + new GsonBuilder().setPrettyPrinting().create().toJson(testEnt));   
    System.out.println("DIM=" + testEnt.getDimension());
    BasicDBObject testEntDb = new BasicDBObject("dimension", "Who");
    testEnt = new Gson().fromJson(testEntDb.toString(), EntityPojo.class);
    System.out.println("ENT2=" + new GsonBuilder().setPrettyPrinting().create().toJson(testEnt));
    try {
      testEntDb = new BasicDBObject("dimension", "what");
View Full Code Here

TOP

Related Classes of com.ikanow.infinit.e.data_model.store.document.EntityPojo

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.