Package org.solbase

Source Code of org.solbase.SolbaseIndexWriter

/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.solbase;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.ResourceBundle;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicLong;

import javax.naming.OperationNotSupportedException;

import net.rubyeye.xmemcached.exception.MemcachedException;

import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.CommitUpdateCommand;
import org.apache.solr.update.DeleteUpdateCommand;
import org.apache.solr.update.MergeIndexesCommand;
import org.apache.solr.update.RollbackUpdateCommand;
import org.apache.solr.update.UpdateHandler;
import org.solbase.cache.CachedObjectWrapper;
import org.solbase.cache.LayeredCache;
import org.solbase.common.SolbaseException;
import org.solbase.indexer.ParsedDoc;
import org.solbase.indexer.SolbaseIndexUtil;
import org.solbase.lucenehbase.ReaderCache;
import org.solbase.lucenehbase.TermDocMetadata;

public class SolbaseIndexWriter extends UpdateHandler {
  // To manage cached reads
  private static final LinkedBlockingQueue<String> flushQueue = new LinkedBlockingQueue<String>();
  private final ExecutorService flushMonitor = Executors.newSingleThreadExecutor();

  private final org.solbase.lucenehbase.IndexWriter writer;
  private final static Logger logger = Logger.getLogger(SolbaseIndexWriter.class);

  private static SolbaseIndexUtil indexUtil;

  // stats
  AtomicLong addCommands = new AtomicLong();
  AtomicLong addCommandsCumulative = new AtomicLong();
  AtomicLong deleteByIdCommands = new AtomicLong();
  AtomicLong deleteByIdCommandsCumulative = new AtomicLong();
  AtomicLong deleteByQueryCommands = new AtomicLong();
  AtomicLong deleteByQueryCommandsCumulative = new AtomicLong();
  AtomicLong expungeDeleteCommands = new AtomicLong();
  AtomicLong mergeIndexesCommands = new AtomicLong();
  AtomicLong commitCommands = new AtomicLong();
  AtomicLong optimizeCommands = new AtomicLong();
  AtomicLong rollbackCommands = new AtomicLong();
  AtomicLong numDocsPending = new AtomicLong();
  AtomicLong numErrors = new AtomicLong();
  AtomicLong numErrorsCumulative = new AtomicLong();

  public SolbaseIndexWriter(SolrCore core) {
    super(core);

    // set SolbaseIndexUtil
    if(ResourceBundle.getBundle("solbase") != null){
      String className = ResourceBundle.getBundle("solbase").getString("class.solbaseIndexUtil");
      if(className != null){
        try {
          indexUtil = (SolbaseIndexUtil) Class.forName(className).newInstance();
        } catch (InstantiationException e) {
          e.printStackTrace();
        } catch (IllegalAccessException e) {
          e.printStackTrace();
        } catch (ClassNotFoundException e) {
          e.printStackTrace();
        }
      }
    }
   
    try {
      writer = new org.solbase.lucenehbase.IndexWriter();

      flushMonitor.execute(new Runnable() {

        public void run() {
          Map<String, Long> lastCoreFlush = new HashMap<String, Long>();

          while (true) {
            try {
              String core = flushQueue.take();

              Long lastFlush = lastCoreFlush.get(core);
              if (lastFlush == null || lastFlush <= (System.currentTimeMillis() - SolbaseUtil.cacheInvalidationInterval)) {
                flush(core);
                lastCoreFlush.put(core, System.currentTimeMillis());
                logger.info("Flushed cache: " + core);
              }
            } catch (InterruptedException e) {
              continue;
            } catch (IOException e) {
              logger.error(e);
            }
          }
        }

        private void flush(String core) throws IOException {
          HTableInterface table = SolbaseUtil.getSchemaInfoTable();
          try {
            Put schemaPut = new Put(Bytes.toBytes("cache"));

            schemaPut.add(Bytes.toBytes("info"), Bytes.toBytes("schema"), Bytes.toBytes(""));

            table.put(schemaPut);
          } finally {
            SolbaseUtil.releaseTable(table);
          }
        }

      });

    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }

  public void setIndexUtil(SolbaseIndexUtil util) {
    indexUtil = util;
  }

  public int addDoc(AddUpdateCommand cmd) throws IOException
    addCommands.incrementAndGet();
    addCommandsCumulative.incrementAndGet();
    int rc = -1;

    // no duplicates allowed
    SchemaField uniqueField = core.getSchema().getUniqueKeyField();

    if (uniqueField == null)
      throw new IOException("Solbase requires a unique field");

    // if there is no ID field, use allowDups
    if (idField == null) {
      throw new IOException("Solbase requires a unique field");
    }

    try {
      String indexName = core.getName();
      writer.setIndexName(indexName);

      Document doc = cmd.getLuceneDocument(schema);
     
      String idFieldName = idTerm.field();
     
      // solbase specific fields. should remove it after using
      boolean updateStore = false;
      String updateVal = doc.get("updateStore");
      if(updateVal != null){
        // updating hbase after cache is updated
        updateStore = true;
      }
     
      int docNumber = Integer.parseInt(doc.get(idFieldName));
     
      // if edit field is present, it's for modification instead of blind add
      String editVal = doc.get("edit");
     
      // we don't need following fields. only used for update api
      doc.removeField("docId");
      doc.removeField("edit");
      doc.removeField("updateStore");
   
      // set indexutil to writer
      writer.setIndexUtil(indexUtil);
     
      String globaId = doc.getField("global_uniq_id").stringValue();
      int shardNum = SolbaseShardUtil.getShardNum(indexName);
      int startDocId = SolbaseShardUtil.getStartDocId(shardNum);
      int endDocId = SolbaseShardUtil.getEndDocId(shardNum);
     
      if(editVal != null){
        logger.info("updating doc: " + docNumber);
        if(editDoc(doc, indexName, docNumber, updateStore)){
          rc = 1;
        }
      } else {
        try {
          logger.info("adding doc: " + docNumber);
         
          ParsedDoc parsedDoc = writer.parseDoc(doc, schema.getAnalyzer(), indexName, docNumber, indexUtil.getSortFieldNames());
          List<TermDocMetadata> termDocMetas = parsedDoc.getTermDocMetadatas();
          // TODO: possible problem
          // doc is not in cache, cluster isn't responsible for update store
          // doc never gets updated in hbase, nor cache
          // for loop below will update tv with this new doc.
          // when searched, it will throw null point exception on this doc
          // therefore, update store first if adding doc (replication can still cause this issue if back'd up)
          ReaderCache.updateDocument(docNumber, parsedDoc, indexName, writer, LayeredCache.ModificationType.ADD, updateStore, startDocId, endDocId);

          for (TermDocMetadata termDocMeta : termDocMetas) {
            ReaderCache.updateTermDocsMetadata(termDocMeta.getTerm(), termDocMeta, indexName, writer, LayeredCache.ModificationType.ADD, updateStore, startDocId, endDocId);
          }

          rc = 1;
          logger.info("adding doc: " + docNumber);
         
        } catch (NumberFormatException e) {
          logger.info("adding doc failed: " + docNumber);
          logger.info(e.toString());
        } catch (InterruptedException e) {
          logger.info("adding doc failed: " + docNumber);
          logger.info(e.toString());
        } catch (MemcachedException e) {
          logger.info("adding doc failed: " + docNumber);
          logger.info(e.toString());
        } catch (TimeoutException e) {
          logger.info("adding doc failed: " + docNumber);
          logger.info(e.toString());
        } catch (SolbaseException e) {
          logger.info("adding doc failed: " + docNumber);
          logger.info(e.toString());
        }
      }
    } finally {
      if (rc != 1) {
        numErrors.incrementAndGet();
        numErrorsCumulative.incrementAndGet();
      }
    }
 
    return rc;

  }

  /**
   * Doing edit logic here. instead of blindingly inserting, we need to compare new doc with old doc and do appropriate modification
   * to tv and doc
   * @param newDoc
   * @param indexName
   * @return
   */
  public boolean editDoc(Document newDoc, String indexName, int docNumber, boolean updateStore){
   
    try {
      CachedObjectWrapper<Document, Long> cachedObj = ReaderCache.getDocument(docNumber, null, indexName, 0, 0);
      if(cachedObj == null || cachedObj.getValue() == null) {
        // document doesn't exist, so let's just bail out here
        return true;
      }
     
      ParsedDoc parsedDoc = new ParsedDoc(newDoc);
      parsedDoc.setIndexName(indexName);
      parsedDoc.setIndexUtil(indexUtil);
      parsedDoc.setIndexWriter(writer);
      parsedDoc.setUpdateStore(updateStore);
     
      int shardNum = SolbaseShardUtil.getShardNum(indexName);
      int startDocId = SolbaseShardUtil.getStartDocId(shardNum);
      int endDocId = SolbaseShardUtil.getEndDocId(shardNum);
     
      ReaderCache.updateDocument(docNumber, parsedDoc, indexName, writer, LayeredCache.ModificationType.UPDATE, updateStore, startDocId, endDocId);
       
      return true;
    } catch (IOException e) {
      logger.info("edit doc failed: " + docNumber);
      logger.info(e.toString());
    } catch (InterruptedException e) {
      logger.info("edit doc failed: " + docNumber);
      logger.info(e.toString());
    } catch (MemcachedException e) {
      logger.info("edit doc failed: " + docNumber);
      logger.info(e.toString());
    } catch (TimeoutException e) {
      logger.info("edit doc failed: " + docNumber);
      logger.info(e.toString());
    } catch (SolbaseException e) {
      logger.info("edit doc failed: " + docNumber);
      logger.info(e.toString());
    }
   
    return false;
  }
 
  public void close() throws IOException {
    // no-op
  }

  public void commit(CommitUpdateCommand cmd) throws IOException {
    // no-op
  }

  public void delete(DeleteUpdateCommand cmd) throws IOException {
    deleteByIdCommands.incrementAndGet();
    deleteByIdCommandsCumulative.incrementAndGet();

    if (!cmd.fromPending && !cmd.fromCommitted) {
      numErrors.incrementAndGet();
      numErrorsCumulative.incrementAndGet();
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "meaningless command: " + cmd);
    }
    if (!cmd.fromPending || !cmd.fromCommitted) {
      numErrors.incrementAndGet();
      numErrorsCumulative.incrementAndGet();
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "operation not supported" + cmd);
    }

    // Delete all terms/fields/etc
    String indexName = core.getName();
    writer.setIndexName(indexName);
    writer.setIndexUtil(indexUtil);
   
    int docId = Integer.parseInt(cmd.id);
   
    logger.info("deleting doc: " + docId);
   
    try {
      CachedObjectWrapper<Document, Long> wrapper = ReaderCache.getDocument(docId, null, indexName, 0, 0);
     
      boolean updateStore = cmd.getUpdateStore();
     
      ParsedDoc parsedDoc = new ParsedDoc();
      parsedDoc.setIndexName(indexName);
      parsedDoc.setIndexUtil(indexUtil);
      parsedDoc.setIndexWriter(writer);
      parsedDoc.setUpdateStore(updateStore);

      int shardNum = SolbaseShardUtil.getShardNum(indexName);
      int startDocId = SolbaseShardUtil.getStartDocId(shardNum);
      int endDocId = SolbaseShardUtil.getEndDocId(shardNum);
     
      ReaderCache.updateDocument(docId, parsedDoc, indexName, writer, LayeredCache.ModificationType.DELETE, updateStore, startDocId, endDocId);
   
    } catch (InterruptedException e) {
      logger.info("delete doc failed: " + docId);
      logger.info(e.toString());
    } catch (MemcachedException e) {
      logger.info("delete doc failed: " + docId);
      logger.info(e.toString());
    } catch (TimeoutException e) {
      logger.info("delete doc failed: " + docId);
      logger.info(e.toString());
    } catch (SolbaseException e) {
      logger.info("delete doc failed: " + docId);
      logger.info(e.toString());
    }
  }

  public void deleteByQuery(DeleteUpdateCommand cmd) throws IOException {
    try {
      throw new OperationNotSupportedException();
    } catch (OperationNotSupportedException e) {
      e.printStackTrace();
    }
  }

  public int mergeIndexes(MergeIndexesCommand cmd) throws IOException {
    return 0;
  }

  public void rollback(RollbackUpdateCommand cmd) throws IOException {
    // TODO - implement rollback in case of failure
  }

  public Category getCategory() {
    return Category.UPDATEHANDLER;
  }

  public String getDescription() {
    return "Update handler for Solbase";
  }

  public URL[] getDocs() {
    return null;
  }

  public String getName() {
    return SolbaseIndexWriter.class.getName();
  }

  public String getSource() {
    return null;
  }

  public String getSourceId() {
    return null;
  }

  public NamedList<Long> getStatistics() {
    NamedList<Long> lst = new SimpleOrderedMap<Long>();

    lst.add("rollbacks", rollbackCommands.get());
    lst.add("adds", addCommands.get());
    lst.add("deletesById", deleteByIdCommands.get());
    lst.add("deletesByQuery", deleteByQueryCommands.get());
    lst.add("errors", numErrors.get());
    lst.add("cumulative_adds", addCommandsCumulative.get());
    lst.add("cumulative_deletesById", deleteByIdCommandsCumulative.get());
    lst.add("cumulative_deletesByQuery", deleteByQueryCommandsCumulative.get());
    lst.add("cumulative_errors", numErrorsCumulative.get());
    return lst;
  }

  public String getVersion() {
    return core.getVersion();
  }

  @SuppressWarnings("unused")
  private void clearCache(String core) {
    SolbaseIndexWriter.flushQueue.add(core);
  }

 
  public static void main(String[] args){
    try {
      @SuppressWarnings("deprecation")
      /*
      HBaseConfiguration conf = new HBaseConfiguration();
      //conf.set("hbase.zookeeper.quorum", "den2zksb001");
      conf.set("hbase.zookeeper.quorum", "den3dhdptk01.int.photobucket.com");
      conf.set("hbase.zookeeper.property.clientPort", "2181");
      conf.setInt("hbase.client.retries.number", 7);
      conf.setInt("ipc.client.connect.max.retries", 3);

     
     
      HTablePool hTablePool = new HTablePool(conf, 10);
     
      HTableInterface seq = hTablePool.getTable("DocKeyIdMap");     

      String globalId = "0089210673:0000540572:309AB023-orig.jpg";
      Get get = new Get(Bytes.toBytes(globalId));
     
      Result result = seq.get(get);
     
      byte[] docId = result.getValue(Bytes.toBytes("docId"), Bytes.toBytes(""));

      int docNumber = 384900472;
     
      SolrInputDocument doc = new SolrInputDocument();     
      if(docId != null) {
        // we've indexed this doc, so it is edit
        System.out.println(Bytes.toInt(docId));
        docNumber = Bytes.toInt(docId);
        doc.addField("edit", true);
      }
      */
      // using seperate connector to leverage different http thread pool for updates
      CommonsHttpSolrServer solbaseServer = new CommonsHttpSolrServer("http://den2sch21:8080/solbase/pbimages~4");

      /*
      doc.addField("docId", docNumber);
      doc.addField("global_uniq_id", globalId);
      doc.addField("title", "tom");
      doc.addField("description", "Uploaded with Snapbucket");
      doc.addField("tags", "Snapbucket");
      doc.addField("path", "/albums/tt262/koh_tester/309AB021-orig.jpg");
      doc.addField("subdomain", "i618");
      doc.addField("lastModified", new Integer(SolbaseUtil.getEpochSinceSolbase(System.currentTimeMillis() / 60000)).toString());
      doc.addField("media_type", new Integer(1).toString());
      doc.addField("total_view_count", new Long(10).toString());
      doc.addField("sevendays_view_count", new Integer(5).toString());
      doc.addField("total_likes_count", new Long(5).toString());
      doc.addField("sevendays_likes_count", new Integer(1).toString());
      doc.addField("total_comments_count", new Long(5).toString());
      doc.addField("sevendays_comments_count", new Integer(1).toString());
      doc.addField("contents", "audi tom solbase Uploaded with Snapbucket ");
      // whether we want to store to hbase or not
      doc.addField("updateStore", true);
     
      solbaseServer.add(doc);

  */
      // for delete only
     
      List<String> ids = new ArrayList<String>();
      ids.add(127995479 + ""); // term vector didn't get deleted doc id
      ids.add(134876977 + ""); // term vector did get deleted doc id
      solbaseServer.deleteById(ids, true);
    } catch (MalformedURLException e) {

    } catch (SolrServerException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
}
TOP

Related Classes of org.solbase.SolbaseIndexWriter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.