Package com.google.enterprise.connector.traversal

Source Code of com.google.enterprise.connector.traversal.QueryTraverser

// Copyright 2006 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.enterprise.connector.traversal;

import com.google.enterprise.connector.pusher.FeedException;
import com.google.enterprise.connector.pusher.PushException;
import com.google.enterprise.connector.pusher.Pusher;
import com.google.enterprise.connector.pusher.Pusher.PusherStatus;
import com.google.enterprise.connector.pusher.PusherFactory;
import com.google.enterprise.connector.spi.Document;
import com.google.enterprise.connector.spi.DocumentList;
import com.google.enterprise.connector.spi.RepositoryDocumentException;
import com.google.enterprise.connector.spi.RepositoryException;
import com.google.enterprise.connector.spi.SkippedDocumentException;
import com.google.enterprise.connector.spi.SpiConstants;
import com.google.enterprise.connector.spi.TraversalContext;
import com.google.enterprise.connector.spi.TraversalManager;
import com.google.enterprise.connector.spi.Value;
import com.google.enterprise.connector.util.Clock;

import java.util.logging.Level;
import java.util.logging.Logger;

/**
* Traverser for a repository implemented using a TraversalManager
*/
public class QueryTraverser implements Traverser {
  private static final Logger LOGGER =
      Logger.getLogger(QueryTraverser.class.getName());

  private final PusherFactory pusherFactory;
  private final TraversalManager queryTraversalManager;
  private final TraversalStateStore stateStore;
  private final String connectorName;
  private final TraversalContext traversalContext;
  private final Clock clock;

  // Synchronize access to cancelWork.
  private final Object cancelLock = new Object();
  private boolean cancelWork = false;

  /**
   * The {@code DocumentStore} parameter is ignored and may be null.
   *
   * @deprecated Use the overload without the {@code DocumentStore} parameter
   */
  @SuppressWarnings("deprecation")
  @Deprecated
  public QueryTraverser(PusherFactory pusherFactory,
      TraversalManager traversalManager, TraversalStateStore stateStore,
      String connectorName, TraversalContext traversalContext, Clock clock,
      com.google.enterprise.connector.database.DocumentStore documentStore) {
    this(pusherFactory, traversalManager, stateStore, connectorName,
        traversalContext, clock);
  }

  public QueryTraverser(PusherFactory pusherFactory,
      TraversalManager traversalManager, TraversalStateStore stateStore,
      String connectorName, TraversalContext traversalContext, Clock clock) {
    this.pusherFactory = pusherFactory;
    this.queryTraversalManager = traversalManager;
    this.stateStore = stateStore;
    this.connectorName = connectorName;
    this.traversalContext = traversalContext;
    this.clock = clock;
  }

  @Override
  public void cancelBatch() {
    synchronized(cancelLock) {
      cancelWork = true;
    }
    LOGGER.fine("Cancelling traversal for connector " + connectorName);
  }

  public boolean isCancelled() {
    synchronized(cancelLock) {
      return cancelWork;
    }
  }

  @Override
  public BatchResult runBatch(BatchSize batchSize) {
    final long startTime = clock.getTimeMillis();
    final long timeoutTime = startTime
      + traversalContext.traversalTimeLimitSeconds() * 1000;

    if (isCancelled()) {
        LOGGER.warning("Attempting to run a cancelled QueryTraverser");
      return new BatchResult(TraversalDelayPolicy.ERROR);
    }
    try {
      queryTraversalManager.setBatchHint(batchSize.getHint());
    } catch (RepositoryException e) {
      LOGGER.log(Level.WARNING, "Unable to set batch hint", e);
    }

    String connectorState;
    try {
      if (stateStore != null) {
        connectorState = stateStore.getTraversalState();
      } else {
        throw new IllegalStateException("null TraversalStateStore");
      }
    } catch (IllegalStateException ise) {
      // We get here if the store for the connector is disabled.
      // That happens if the connector was deleted while we were asleep.
      // Our connector seems to have been deleted.  Don't process a batch.
      LOGGER.fine("Halting traversal for connector " + connectorName
                  + ": " + ise.getMessage());
      return new BatchResult(TraversalDelayPolicy.ERROR);
    }

    DocumentList resultSet = null;
    if (connectorState == null) {
      try {
        LOGGER.fine("START TRAVERSAL: Starting traversal for connector "
                    + connectorName);
        resultSet = queryTraversalManager.startTraversal();
      } catch (Exception e) {
        LOGGER.log(Level.WARNING, "startTraversal threw exception: ", e);
        return new BatchResult(TraversalDelayPolicy.ERROR);
      }
    } else {
      try {
        LOGGER.fine("RESUME TRAVERSAL: Resuming traversal for connector "
            + connectorName + " from checkpoint " + connectorState);
        resultSet = queryTraversalManager.resumeTraversal(connectorState);
      } catch (Exception e) {
        LOGGER.log(Level.WARNING, "resumeTraversal threw exception: ", e);
        return new BatchResult(TraversalDelayPolicy.ERROR);
      }
    }

    // If the traversal returns null, that means that the repository has
    // no new content to traverse.
    if (resultSet == null) {
      LOGGER.fine("Result set from connector " + connectorName
                  + " is NULL, no documents returned for traversal.");
      return new BatchResult(TraversalDelayPolicy.POLL, 0);
    }

    Pusher pusher = null;
    BatchResult result = null;
    int counter = 0;
    try {
      // Get a Pusher for feeding the returned Documents.
      pusher = pusherFactory.newPusher(connectorName);

      while (true) {
        if (Thread.currentThread().isInterrupted() || isCancelled()) {
          LOGGER.fine("Traversal for connector " + connectorName
                      + " has been interrupted; breaking out of batch run.");
          break;
        }
        if (clock.getTimeMillis() >= timeoutTime) {
          LOGGER.fine("Traversal batch for connector " + connectorName
              + " is completing due to time limit.");
          break;
        }

        String docid = null;
        try {
          LOGGER.finer("Pulling next document from connector " + connectorName);
          Document nextDocument = resultSet.nextDocument();
          if (nextDocument == null) {
            LOGGER.finer("Traversal batch for connector " + connectorName
                + " at end after processing " + counter + " documents.");

            break;
          } else {
            // Since there are a couple of places below that could throw
            // exceptions but not exit the while loop, the counter should be
            // incremented here to insure it represents documents returned from
            // the list.  Note the call to nextDocument() could also throw a
            // RepositoryDocumentException signaling a skipped document in which
            // case the call will not be counted against the batch maximum.
            counter++;
            // Fetch DocId to use in messages.
            try {
              docid = Value.getSingleValueString(nextDocument,
                                                 SpiConstants.PROPNAME_DOCID);
            } catch (IllegalArgumentException e1) {
                LOGGER.finer("Unable to get document id for document ("
                             + nextDocument + "): " + e1.getMessage());
            } catch (RepositoryException e1) {
                LOGGER.finer("Unable to get document id for document ("
                             + nextDocument + "): " + e1.getMessage());
            }
          }
          LOGGER.finer("Sending document (" + docid + ") from connector "
              + connectorName + " to Pusher");

          if (pusher.take(nextDocument) != PusherStatus.OK) {
            LOGGER.fine("Traversal batch for connector " + connectorName
                + " is completing at the request of the Pusher,"
                + " after processing " + counter + " documents.");
            break;
          }
        } catch (SkippedDocumentException e) {
          /* TODO (bmj): This is a temporary solution and should be replaced.
           * It uses Exceptions for non-exceptional cases.
           */
          // Skip this document.  Proceed on to the next one.
          logSkippedDocument(docid, e);
        } catch (RepositoryDocumentException e) {
          // Skip individual documents that fail.  Proceed on to the next one.
          logSkippedDocument(docid, e);
        } catch (RuntimeException e) {
          // Skip individual documents that fail.  Proceed on to the next one.
          logSkippedDocument(docid, e);
        }
      }
      // No more documents. Wrap up any accumulated feed data and send it off.
      if (!isCancelled()) {
        pusher.flush();
      }
    } catch (OutOfMemoryError e) {
      pusher.cancel();
      System.runFinalization();
      System.gc();
      result = new BatchResult(TraversalDelayPolicy.ERROR);
      try {
        LOGGER.severe("Out of JVM Heap Space.  Will retry later.");
        LOGGER.log(Level.FINEST, e.getMessage(), e);
      } catch (Throwable t) {
        // OutOfMemory state may prevent us from logging the error.
        // Don't make matters worse by rethrowing something meaningless.
      }
    } catch (RepositoryException e) {
      // Drop the entire batch on the floor.  Do not call checkpoint
      // (as there is a discrepancy between what the Connector thinks
      // it has fed, and what actually has been pushed).
      LOGGER.log(Level.SEVERE, "Repository Exception during traversal.", e);
      result = new BatchResult(TraversalDelayPolicy.ERROR);
    } catch (PushException e) {
      LOGGER.log(Level.SEVERE, "Push Exception during traversal.", e);
      // Drop the entire batch on the floor.  Do not call checkpoint
      // (as there is a discrepancy between what the Connector thinks
      // it has fed, and what actually has been pushed).
      result = new BatchResult(TraversalDelayPolicy.ERROR);
    } catch (FeedException e) {
      LOGGER.log(Level.SEVERE, "Feed Exception during traversal.", e);
      // Drop the entire batch on the floor.  Do not call checkpoint
      // (as there is a discrepancy between what the Connector thinks
      // it has fed, and what actually has been pushed).
      result = new BatchResult(TraversalDelayPolicy.ERROR);
    } catch (Throwable t) {
      LOGGER.log(Level.SEVERE, "Uncaught Exception during traversal.", t);
      // Drop the entire batch on the floor.  Do not call checkpoint
      // (as there is a discrepancy between what the Connector thinks
      // it has fed, and what actually has been pushed).
      result = new BatchResult(TraversalDelayPolicy.ERROR);
   } finally {
      // If we have cancelled the work, abandon the batch.
      if (isCancelled()) {
        result = new BatchResult(TraversalDelayPolicy.ERROR);
      }

      // Checkpoint completed work as well as skip past troublesome documents
      // (e.g. documents that are too large and will always fail).
      if ((result == null) && (checkpointAndSave(resultSet) == null)) {
        // Unable to get a checkpoint, so wait a while, then retry batch.
        result = new BatchResult(TraversalDelayPolicy.ERROR);
      }
    }
    if (result == null) {
      result = new BatchResult(TraversalDelayPolicy.IMMEDIATE, counter,
                               startTime, clock.getTimeMillis());
    } else if (pusher != null) {
      // We are returning an error from this batch. Cancel any feed that
      // might be in progress.
      pusher.cancel();
    }
    return result;
  }

  private String checkpointAndSave(DocumentList pm) {
    String connectorState = null;
    LOGGER.fine("CHECKPOINT: Generating checkpoint for connector "
                + connectorName);
    try {
      connectorState = pm.checkpoint();
    } catch (RepositoryException re) {
      // If checkpoint() throws RepositoryException, it means there is no
      // new checkpoint.
      LOGGER.log(Level.FINE, "Failed to obtain checkpoint for connector "
                 + connectorName, re);
      return null;
    } catch (Exception e) {
      LOGGER.log(Level.INFO, "Failed to obtain checkpoint for connector "
                 + connectorName, e);
      return null;
    }
    try {
      if (connectorState != null) {
        if (stateStore != null) {
          stateStore.storeTraversalState(connectorState);
        } else {
          throw new IllegalStateException("null TraversalStateStore");
        }
        LOGGER.fine("CHECKPOINT: " + connectorState);
      }
      return connectorState;
    } catch (IllegalStateException ise) {
      // We get here if the store for the connector is disabled.
      // That happens if the connector was deleted while we were working.
      // Our connector seems to have been deleted.  Don't save a checkpoint.
      LOGGER.fine("Checkpoint discarded: " + connectorState);
    }
    return null;
  }

  private void logSkippedDocument(String docid, Exception e) {
    if (LOGGER.isLoggable(Level.FINER)) {
      LOGGER.log(Level.FINER, "Skipping document (" + docid
          + ") from connector " + connectorName + ": " + e.getMessage());
    }
  }
}
TOP

Related Classes of com.google.enterprise.connector.traversal.QueryTraverser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.