Package org.solbase.indexer

Examples of org.solbase.indexer.ParsedDoc


        }
      } else {
        try {
          logger.info("adding doc: " + docNumber);
         
          ParsedDoc parsedDoc = writer.parseDoc(doc, schema.getAnalyzer(), indexName, docNumber, indexUtil.getSortFieldNames());
          List<TermDocMetadata> termDocMetas = parsedDoc.getTermDocMetadatas();
          // TODO: possible problem
          // doc is not in cache, cluster isn't responsible for update store
          // doc never gets updated in hbase, nor cache
          // for loop below will update tv with this new doc.
          // when searched, it will throw null point exception on this doc
View Full Code Here


      if(cachedObj == null || cachedObj.getValue() == null) {
        // document doesn't exist, so let's just bail out here
        return true;
      }
     
      ParsedDoc parsedDoc = new ParsedDoc(newDoc);
      parsedDoc.setIndexName(indexName);
      parsedDoc.setIndexUtil(indexUtil);
      parsedDoc.setIndexWriter(writer);
      parsedDoc.setUpdateStore(updateStore);
     
      int shardNum = SolbaseShardUtil.getShardNum(indexName);
      int startDocId = SolbaseShardUtil.getStartDocId(shardNum);
      int endDocId = SolbaseShardUtil.getEndDocId(shardNum);
     
View Full Code Here

    try {
      CachedObjectWrapper<Document, Long> wrapper = ReaderCache.getDocument(docId, null, indexName, 0, 0);
     
      boolean updateStore = cmd.getUpdateStore();
     
      ParsedDoc parsedDoc = new ParsedDoc();
      parsedDoc.setIndexName(indexName);
      parsedDoc.setIndexUtil(indexUtil);
      parsedDoc.setIndexWriter(writer);
      parsedDoc.setUpdateStore(updateStore);

      int shardNum = SolbaseShardUtil.getShardNum(indexName);
      int startDocId = SolbaseShardUtil.getStartDocId(shardNum);
      int endDocId = SolbaseShardUtil.getEndDocId(shardNum);
     
View Full Code Here

    }
   
    // Finally, Store meta-data so we can delete this document
    documentPut.add(Bytes.toBytes("allTerms"), Bytes.toBytes("allTerms"), SolbaseUtil.toBytes(allIndexedTerms).array());

    ParsedDoc parsedDoc = new ParsedDoc(metadatas, doc, documentPut, fieldCache.entrySet(), allIndexedTerms);
    return parsedDoc;

  }
View Full Code Here

    // incrementing chunking sequence (lucene doc id)
    this.idCounter++;

    try {
      ParsedDoc parsedDoc = indexerUtil.getIndexWriter().parseDoc(doc, indexerUtil.getAnalyzer(), "", docId, indexerUtil.getSortFieldNames());

      List<TermDocMetadata> metadatas = parsedDoc.getTermDocMetadatas();
     
      MapWritable mapWritable = new MapWritable();
      DocumentPutWritable docWritable = new DocumentPutWritable(parsedDoc.getFieldsMap(), parsedDoc.getAllTerms(), docId, globalId);
      mapWritable.put(new BytesWritable(Bytes.toBytes("doc")), docWritable);

      for (TermDocMetadata metadata : metadatas) {
        byte[] key = metadata.getFieldTermKey();
        ByteBuffer buf = metadata.serialize();
View Full Code Here

  private ParsedDoc deleteDocument(Document oldDoc, int docId, String indexName, IndexWriter writer, SolbaseIndexUtil indexUtil, boolean updateStore, int startDocId, int endDocId){
    try {
      // clone so read won't conflict
      oldDoc = new Document(oldDoc);
      oldDoc.removeField("docId");
      ParsedDoc parsedDoc = writer.parseDoc(oldDoc, schema.getAnalyzer(), indexName, docId, indexUtil.getSortFieldNames());

      List<TermDocMetadata> metadatas = parsedDoc.getTermDocMetadatas();

      // TODO: doing duplicate work here - once from updateObject and again from updateObjectStore
      for (TermDocMetadata metadata : metadatas) {
        ReaderCache.updateTermDocsMetadata(metadata.getTerm(), metadata, indexName, writer, LayeredCache.ModificationType.DELETE, updateStore, startDocId, endDocId);
      }
View Full Code Here

        oldDoc = new Document(oldDoc);
        oldDoc.removeField("docId");

        // parsing old doc to get all terms
        try {
          ParsedDoc oldParsedDoc = writer.parseDoc(oldDoc, schema.getAnalyzer(), indexName, docNumber, indexUtil.getSortFieldNames());

          List<Term> oldTerms = oldParsedDoc.getAllTerms();
          List<TermDocMetadata> oldTermDocMetas = oldParsedDoc.getTermDocMetadatas();

          Document mergedDoc = mergeOldAndNew(oldDoc, newDoc);
          ParsedDoc parsedDoc = writer.parseDoc(mergedDoc, schema.getAnalyzer(), indexName, docNumber, indexUtil.getSortFieldNames());

          List<TermDocMetadata> newTermDocMetas = parsedDoc.getTermDocMetadatas();
          List<Term> newTerms = parsedDoc.getAllTerms();

          List<Term> updateList = new ArrayList<Term>(oldTerms);
          List<Term> deleteList = new ArrayList<Term>(oldTerms);
          List<Term> addList = new ArrayList<Term>(newTerms);

          Collections.copy(updateList, oldTerms);
          Collections.copy(deleteList, oldTerms);
          Collections.copy(addList, newTerms);

          updateList.retainAll(newTerms);
          deleteList.removeAll(newTerms);
          addList.removeAll(oldTerms);
          int shardNum = SolbaseShardUtil.getShardNum(indexName);
          int startDocId = SolbaseShardUtil.getStartDocId(shardNum);
          int endDocId = SolbaseShardUtil.getEndDocId(shardNum);
          // updating tv first
          for (TermDocMetadata termDocMeta : newTermDocMetas) {
            Term term = termDocMeta.getTerm();
            if (updateList.contains(term)) {
              logger.debug("updating this term: " + term.toString());
              ReaderCache.updateTermDocsMetadata(term, termDocMeta, indexName, writer, LayeredCache.ModificationType.UPDATE, updateStore, startDocId, endDocId);
            } else if (addList.contains(term)) {
              ReaderCache.updateTermDocsMetadata(term, termDocMeta, indexName, writer, LayeredCache.ModificationType.ADD, updateStore, startDocId, endDocId);
            }
          }

          // clean up deletes
          if (deleteList.size() > 0) {
            for (TermDocMetadata termDocMeta : oldTermDocMetas) {
              Term term = termDocMeta.getTerm();

              if (deleteList.contains(term)) {
                ReaderCache.updateTermDocsMetadata(term, termDocMeta, indexName, writer, LayeredCache.ModificationType.DELETE, updateStore, startDocId, endDocId);

              }
            }
          }

          parsedDoc.getDocument().add(docIdField);

          return parsedDoc;
        } catch (NullPointerException e) {
          return null;
        }
      } else {
        Document mergedDoc = mergeOldAndNew(oldDoc, newDoc);

        ParsedDoc parsedDoc = writer.parseDoc(mergedDoc, schema.getAnalyzer(), indexName, docNumber, indexUtil.getSortFieldNames());

        return parsedDoc;
      }
    } catch (IOException e) {
      // TODO Auto-generated catch block
View Full Code Here

TOP

Related Classes of org.solbase.indexer.ParsedDoc

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.