Package com.ikanow.infinit.e.data_model.store.document

Examples of com.ikanow.infinit.e.data_model.store.document.DocumentPojo


    }
    return newDoc;   
  }//TESTED

  private static LinkedList<DocumentPojo> enrichDocByCloning(List<DocumentPojo> docsToReplace) {
    DocumentPojo newDoc = null;
    BasicDBObject dbo = null;
    String sContent = null;
    LinkedList<DocumentPojo> newDocs = new LinkedList<DocumentPojo>();
    for (DocumentPojo docToReplace: docsToReplace) {
View Full Code Here


    }   
    return null;
  }//TESTED

  private static DocumentPojo duplicateDocument(DocumentPojo docToReplace, BasicDBObject dbo, String content, boolean bClone) {
    DocumentPojo newDoc = DocumentPojo.fromDb(dbo, DocumentPojo.class);
    newDoc.setFullText(content);
    newDoc.setId(null); // (ie ensure it's unique)

    if (bClone) { // Cloned docs have special source key formats (and also need to update their community)
      ObjectId docCommunity = docToReplace.getCommunityId();
      newDoc.setSourceKey(docToReplace.getSourceKey());
      newDoc.setCommunityId(docCommunity);
      newDoc.setIndex(new StringBuffer("doc_").append(docCommunity).toString());     
    }   
    else { // For cloned documents, published etc can be taken from the master document, ie newDoc is already accurate
      // Copy over timing details from new document (set by the harvesters)
      newDoc.setPublishedDate(docToReplace.getPublishedDate());
      newDoc.setCreated(docToReplace.getCreated());
      newDoc.setModified(docToReplace.getModified());     
    }
    return newDoc;
  }//TESTED
View Full Code Here

    // Remove any docs from update list that didn't get updated
    if ( updateDocs != null )
    {
      Iterator<DocumentPojo> it = updateDocs.iterator();
      while (it.hasNext()) {
        DocumentPojo d = it.next();
        if (null == d.getTempSource()) { //this doc got deleted
          it.remove();
        }
      }
    }
  }
View Full Code Here

//    docApiDbo.remove("source");
//    docApiDbo.remove("mediaType");
    // (sourceKey in <key>#<community> format)
//    docApiDbo.put("sourceKey", docApiDbo.getString("sourceKey")+"#doc_api_test");
    // (display results of API mappings)
    DocumentPojo docApi = DocumentPojo.fromDb(docApiDbo, DocumentPojo.class);
    ResponsePojo rp3 = new ResponsePojo(null, docApi, new DocumentPojoApiMap());

    System.out.println("TIME_DOC_API1_CREATED="+docApi.getCreated());
    System.out.println("DOC_API1=" + rp3.toApi());
    DocumentPojoApiMap.mapToApi(docApiDbo);
    System.out.println("DOC_API2=" + BaseApiPojo.getDefaultBuilder().setPrettyPrinting().create().toJson(docApiDbo));
    DocumentPojo docFromApi = ApiManager.mapFromApi(ApiManager.mapToApi(docApi, null), DocumentPojo.class, null);
    System.out.println("TIME_DOC_API1_CREATED_INV="+docFromApi.getCreated());
   
/////////////////////////////////////////////////////////////////////////////
   
// DB testing:
    System.out.println("Open Community DB collection");
    //OLD:
    //CollectionManager cm = new CollectionManager();
    //DBCollection communityDb = cm.getCommunities();
    //NEW:
    DBCollection communityDb = DbManager.getSocial().getCommunity();
   
    //DB: read/write community object
    ////////////////////////////////////////////////
    //CANONICAL EXAMPLE:
    CommunityPojo cp = CommunityPojo.fromDb(communityDb.findOne(), CommunityPojo.class);
    System.out.println("CP1=" + cp.toDb()); // (converts DBObject to string ie BSON->JSON - should have { $oid } and { $date } objectid/date formats)
    ////////////////////////////////////////////////
    System.out.println("CP2=" + new Gson().toJson(cp)); // (will have complex object id format and string dates)
    //DB: read/write list of community objects
    ////////////////////////////////////////////////
    //CANONICAL EXAMPLE:
    List<CommunityPojo> cpl = CommunityPojo.listFromDb(communityDb.find().limit(3), CommunityPojo.listType());
    System.out.println("CPL1=" + CommunityPojo.listToDb(cpl, CommunityPojo.listType()));
    ////////////////////////////////////////////////
    System.out.println("CPL2=" + BaseDbPojo.getDefaultBuilder().create().toJson(cpl)); // (will have complex object id format and string dates)

    //Expect to see another delay here with the old method, new method should roll on...
    System.out.println("Open Document DB collection");
    //OLD:
    //CollectionManager cm2 = new CollectionManager();
    //DBCollection documentDb = cm2.getFeeds();
    //NEW:
    DBCollection documentDb = DbManager.getDocument().getMetadata();
   
    //DB: Read/write feed with metadata
    BasicDBObject query = new BasicDBObject("metadata", new BasicDBObject("$exists", true)); // (complex query so can't represent using pojos)
    query.put("entities", new BasicDBObject("$size", 3));
    ////////////////////////////////////////////////
    //CANONICAL EXAMPLE:
    DocumentPojo doc = DocumentPojo.fromDb(documentDb.findOne(query), DocumentPojo.class);
    System.out.println("DOC1="+doc.toDb());
    BasicDBList dblTest = (BasicDBList) doc.toDb().get("entities");
    BasicDBObject dboTest = (BasicDBObject) dblTest.get(0);
    if (!dboTest.get("doccount").getClass().toString().equals("class java.lang.Long")) {
      throw new RuntimeException(dboTest.get("doccount").getClass().toString() + " SHOULD BE LONG");
    }
    ////////////////////////////////////////////////
    System.out.println("DOC2="+new Gson().toJson(doc));
    doc = DocumentPojo.fromDb(documentDb.findOne(query), new TypeToken<DocumentPojo>(){}); // (alternative to the prettier DocumentPojo.class, needed for container classes)
    System.out.println("DOC3="+doc.toDb());
    //DB: list example for doc
    Set<DocumentPojo> docset = DocumentPojo.listFromDb(documentDb.find(query).limit(3), new TypeToken<Set<DocumentPojo>>(){});
    System.out.println("DOCSET="+DocumentPojo.listToDb(docset, new TypeToken<Set<DocumentPojo>>(){}));
   
    // Shares - demonstrate mapping of _ids across to the API (and that binary data is discarded):
    List<SharePojo> shares = SharePojo.listFromDb(DbManager.getSocial().getShare().find().limit(10), SharePojo.listType());
    System.out.println("SHARE="+ApiManager.mapListToApi(shares, SharePojo.listType(), new SharePojoApiMap(null)));
   
// Index testing:
   
    ////////////////////////////////////////////////
    //CANONICAL EXAMPLE:
    DocumentPojoIndexMap docMap = new DocumentPojoIndexMap();
    System.out.println("DOC_INDEX=" + IndexManager.mapToIndex(doc, docMap));
   
    ////////////////////////////////////////////////
    // Check use of enums in Entity pojo works
    EntityPojo testEnt = new EntityPojo();
    testEnt.setDimension(EntityPojo.Dimension.Where);
    System.out.println("ENT1=" + new GsonBuilder().setPrettyPrinting().create().toJson(testEnt));   
    System.out.println("DIM=" + testEnt.getDimension());
    BasicDBObject testEntDb = new BasicDBObject("dimension", "Who");
    testEnt = new Gson().fromJson(testEntDb.toString(), EntityPojo.class);
    System.out.println("ENT2=" + new GsonBuilder().setPrettyPrinting().create().toJson(testEnt));
    try {
      testEntDb = new BasicDBObject("dimension", "what");
      testEnt = BaseDbPojo.getDefaultBuilder().create().fromJson(testEntDb.toString(), EntityPojo.class);
      System.out.println("***FAIL=" + BaseDbPojo.getDefaultBuilder().setPrettyPrinting().create().toJson(testEnt));
    }
    catch (Exception e) {
      System.out.println("ENT3: Correctly failed with illegal dimension type");
    }
   
    ////////////////////////////////////////////////
    // Metadata transformations based on type:
   
    String metadataObjStr = "{ 'test__long': 3, 'test_long': '3', 'error__long': { 'field1': 'no'}, "+
      "'test_arrayObj': [ { 'field1': 'test' } ], 'test_nestedArrayObj': [ [ { 'field1': 'test' } ]  ], "+
      "'test_array': [ 'val' ], 'test_nestedArray': [ [ 'val' ] ], "+
      "'test_obj': { 'field1': 'string' }, 'test_nestedObj': { 'field1': 'string', 'field2': { 'field3': 'string' }},"+
      "'test_null1': {}, test_null2: null"+
      "}";
   
    BasicDBObject metadataObj = (BasicDBObject) com.mongodb.util.JSON.parse(metadataObjStr);
   
    doc.addToMetadata("TestMeta", metadataObj);
    System.out.println("DOC_META=" + docMap.extendBuilder(BaseApiPojo.getDefaultBuilder()).setPrettyPrinting().create().toJson(doc));
   
// Changes to new ElasticSearch construct (particularly for bulk add)
   
    ElasticSearchManager indexManager = IndexManager.createIndex("test", null, false, null, null, ImmutableSettings.settingsBuilder());
View Full Code Here

      tmp = json.getAsJsonObject().get(DocumentPojo.metadata_);
      if (null != tmp) {
        json.getAsJsonObject().remove(DocumentPojo.metadata_);
      }
     
      DocumentPojo doc = BaseApiPojo.getDefaultBuilder().create().fromJson(json, DocumentPojo.class);
     
      // ...And add metadata back again...
      if (null != tmp) {
        JsonObject tmpMeta = tmp.getAsJsonObject();
        for (Entry<String, JsonElement> entry: tmpMeta.entrySet()) {
          if (entry.getValue().isJsonArray()) {
            doc.addToMetadata(entry.getKey(), MongoDbUtil.encodeArray(entry.getValue().getAsJsonArray()));
          }
          else {
            BasicDBList dbl = new BasicDBList();
            dbl.add(MongoDbUtil.encodeUnknown(entry.getValue()));
            doc.addToMetadata(entry.getKey(), dbl);
          }
        }//TOTEST       
      }
     
      // Finally handle updateId/_id swap
      ObjectId updateId = doc.getUpdateId();
      if (null != updateId) {
        doc.setUpdateId(doc.getId()); // (this is now the immutable _id)
        doc.setId(updateId); // this points to the _id in the DB
      }
     
      return doc;
    }//TESTED (by hand only, no formal record)   
View Full Code Here

    }
    completeEnrichmentProcess(source, toAdd, toUpdate, toRemove);

    // (Now we've completed enrichment either normally or by cloning, add the dups back to the normal documents for generic processing)
    LinkedList<DocumentPojo> groupedDups = new LinkedList<DocumentPojo>(); // (ie clones)
    DocumentPojo masterDoc = null; // (just looking for simple pointer matching here)

    for (DocumentPojo dupDoc: toDuplicate) {
      if (null == dupDoc.getCloneFrom()) {
        toAdd.add(dupDoc);       
      }
      else if (null != dupDoc.getCloneFrom().getTempSource()) { //(Else doc was removed from toAdd list due to extraction errors)
        if (null == masterDoc) { // First time through
          masterDoc = dupDoc.getCloneFrom();
        }
        else if (!masterDoc.getUrl().equals(dupDoc.getUrl())) { // New group!
          groupedDups = enrichDocByCloning(groupedDups);
          if (null != groupedDups) {
            toAdd.addAll(groupedDups);
            groupedDups.clear();
          }
View Full Code Here

TOP

Related Classes of com.ikanow.infinit.e.data_model.store.document.DocumentPojo

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.