Package com.google.enterprise.connector.filesystem

Source Code of com.google.enterprise.connector.filesystem.FileLister$Traverser

// Copyright 2010 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.enterprise.connector.filesystem;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.enterprise.connector.filesystem.AclBuilder.AclProperties;
import com.google.enterprise.connector.logging.NDC;
import com.google.enterprise.connector.spi.Document;
import com.google.enterprise.connector.spi.DocumentAcceptor;
import com.google.enterprise.connector.spi.DocumentAcceptorException;
import com.google.enterprise.connector.spi.Lister;
import com.google.enterprise.connector.spi.Principal;
import com.google.enterprise.connector.spi.RepositoryDocumentException;
import com.google.enterprise.connector.spi.RepositoryException;
import com.google.enterprise.connector.spi.SecureDocument;
import com.google.enterprise.connector.spi.SpiConstants;
import com.google.enterprise.connector.spi.SpiConstants.FeedType;
import com.google.enterprise.connector.spi.TraversalContext;
import com.google.enterprise.connector.spi.TraversalContextAware;
import com.google.enterprise.connector.spi.TraversalSchedule;
import com.google.enterprise.connector.spi.TraversalScheduleAware;
import com.google.enterprise.connector.spi.Value;
import com.google.enterprise.connector.util.Clock;
import com.google.enterprise.connector.util.SystemClock;

import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
* Implementation of {@link Lister} that feeds files from local
* and network filesystems.  This Lister traverses each directory
* tree rooted at a {@code startPath} in a separate thread.
*
* Initially, a full traversal is performed - all appropriate files
* and directories are fed to to {@link DocumentAcceptor}.  If this
* succeeds, subsequent travarsals of the same filesystem will be
* incremental - only files modified since the previous traversal
* will be fed. (If sending ACLs, directories will always be fed.)
* Periodically, a forced full traversal will be done to ensure
* that the GSA's view of the filesystem does not drift too far
* from reality.
*/
class FileLister implements Lister, TraversalContextAware,
                            TraversalScheduleAware {
  private static final Logger LOGGER =
      Logger.getLogger(FileLister.class.getName());

  private final PathParser pathParser;
  private final DocumentContext context;
  private final AtomicReference<TraversalService> traversalService =
      new AtomicReference<TraversalService>();;

  private Clock clock = new SystemClock();

  /**
   * How often to force a full re-traversal.
   * If less than 0, always try to perform incremental traversal.
   * If equal to 0, always perform full traversals.
   * If greater than 0, perform a full traversal if the
   * fullTraversalInterval time milliseconds has passed
   * since the last full traversal, otherwise perform
   * incremental traversal.
   */
  private long fullTraversalInterval = 24 * 60 * 60 * 1000L;

  /** Cushion for inaccurate timestamps in ifModifiedSince calculations. */
  private long ifModifiedSinceCushion = 60 * 60 * 1000L;;

  private DocumentAcceptor documentAcceptor;
  private TraversalSchedule schedule;
  private TraversalContext traversalContext;

  private static enum Sleep {
    RETRY_DELAY,     // Wait Schedule.retryDelay at end of traversal.
    SCHEDULE_DELAY,  // Wait for Schedule traversal interval.
    ERROR_DELAY      // 5 min wait after general error.
  }

  /**
   * Constructs a {@link FileLister} from
   * parameters which may be obtained using {@link FileConnectorType}.
   *
   * @throws RepositoryDocumentException if this fails due to an environmental
   * issue or an invalid configuration.
   */
  public FileLister(PathParser pathParser, DocumentContext context)
      throws RepositoryException {
    this.pathParser = pathParser;
    this.context = context;
    setIfModifiedSinceCushion(
        context.getPropertyManager().getIfModifiedSinceCushion());
  }

  @Override
  public void setTraversalContext(TraversalContext traversalContext) {
    this.traversalContext = traversalContext;
    context.setTraversalContext(traversalContext);
  }

  @Override
  public void setDocumentAcceptor(DocumentAcceptor documentAcceptor) {
    this.documentAcceptor = documentAcceptor;
  }

  @Override
  public synchronized void setTraversalSchedule(
        TraversalSchedule traversalSchedule) {
    this.schedule = traversalSchedule;
    TraversalService service = traversalService.get();
    if (service != null) {
      // Wake thread from sleep() to notice the change.
      service.interrupt();
    }
  }

  /** Allows tests to set adjustable clock. */
  @VisibleForTesting
  synchronized void setClock(Clock clock) {
    this.clock = clock;
  }

  /** Settable via Spring. */
  public void setFullTraversalIntervalDays(int days) {
    setFullTraversalInterval(
        (days > 0) ? days * 24 * 60 * 60 * 1000L : days);
  }

  @VisibleForTesting
  synchronized void setFullTraversalInterval(long interval) {
    this.fullTraversalInterval = interval;
  }

  @VisibleForTesting
  synchronized void setIfModifiedSinceCushion(long cushion) {
    this.ifModifiedSinceCushion = cushion;
  }

  @VisibleForTesting
  Traverser newTraverser(String startPath) {
    return new Traverser(startPath, documentAcceptor,
                         newTraversalService(false));
  }

  @Override
  public void start() throws RepositoryException {
    TraversalService service = newTraversalService(true);
    Collection<Callable<Void>> traversers = newTraversers(service);

    LOGGER.fine("Starting File Lister");
    try {
      while (!service.isShutdown()) {
        try {
          sleep(Sleep.SCHEDULE_DELAY);
          boolean gotError = false;
          for (Future<Void> future : service.invokeAll(traversers)) {
            try {
              future.get();
            } catch (ExecutionException e) {
              // Already logged in child thread context.
              gotError = true;
            }
          }
          if (!service.isShutdown()) {
            sleep(gotError ? Sleep.ERROR_DELAY : Sleep.RETRY_DELAY);
          }
        } catch (InterruptedException ie) {
          // Awoken from sleep. If not shutdown, then there was a schedule
          // change.  Terminate the traversers, then restart them with the
          // new schedule.
          if (!service.isShutdown()) {
            service = newTraversalService(false);
            traversers = newTraversers(service);
          }
        }
      }
    } catch (RejectedExecutionException e) {
      if (!service.isShutdown()) {
        LOGGER.log(Level.WARNING, "Lister execution failed.", e);
      }
    } catch (Exception e) {
      LOGGER.log(Level.WARNING, "Lister feed failed.", e);
    } finally {
      LOGGER.fine("Halting File Lister");
      try {
        documentAcceptor.cancel();
      } catch (DocumentAcceptorException e) {
        LOGGER.log(Level.WARNING, "Error shutting down Lister", e);
      } finally {
        service.clearListerThread();
        Thread.interrupted();
      }       
    }
  }

  /** Returns true if we are in a shutdown scenario. */
  @VisibleForTesting
  boolean isShutdown() {
    TraversalService service = traversalService.get();
    return (service == null || service.isShutdown());
  }

  @Override
  public void shutdown() throws RepositoryException {
    TraversalService service = traversalService.get();
    if (service != null) {
      service.shutdownNow();
      service.interrupt();
    }
  }

  private void sleep(Sleep delay) throws InterruptedException {
    int seconds = 0;
    synchronized (this) {
      if (schedule.isDisabled()) {
        seconds = Integer.MAX_VALUE;
      } else {
        switch (delay) {
        case ERROR_DELAY:
          seconds = 5 * 60;
          break;
        case RETRY_DELAY:
          seconds = schedule.getRetryDelay();
          if (seconds < 0) {
            seconds = Integer.MAX_VALUE;
          }
          break;
        case SCHEDULE_DELAY:
          seconds = schedule.nextScheduledInterval();
          if (seconds == 0) {
            return; // Don't sleep at all.
          } else if (seconds < 0) {
            seconds = Integer.MAX_VALUE;
          }
          break;
        }
      }
    }

    try {
      LOGGER.finest("Sleeping for " + seconds + " seconds.");
      Thread.sleep(1000L * seconds);
    } finally {
      LOGGER.finest("Awake from sleep.");
    }
  }

  /**
   * Returns a new TraversalService. Waits for an existing one to terminate.
   *
   * @param interruptLister if true, interrupt the old service lister thread.
   */
  private TraversalService newTraversalService(boolean interruptLister) {
    TraversalService service = new TraversalService(Thread.currentThread(),
        context.getPropertyManager().getThreadPoolSize());
    TraversalService oldService = traversalService.getAndSet(service);

    // If already running, shut it down and wait for all threads to exit.
    if (oldService != null) {
      oldService.shutdownNow();
      oldService.awaitTermination();
      if (interruptLister) {
        oldService.interrupt();
      }
    }
   
    return service;
  }

  /**
   * Returns a Collection of Traversers targeted for the TraversalService.
   */
  private Collection<Callable<Void>> newTraversers(TraversalService service) {
    Collection<Callable<Void>> traversers = Lists.newArrayList();
    for (String startPath : context.getStartPaths()) {
      traversers.add(new Traverser(startPath, documentAcceptor, service));
    }
    return traversers;
  }

  private class TraversalService extends ThreadPoolExecutor {
    private Thread listerThread;

    TraversalService(Thread listerThread, int numThreads) {
      super(numThreads, numThreads, 60, TimeUnit.SECONDS,
            new LinkedBlockingQueue<Runnable>());
      this.listerThread = listerThread;
    }

    synchronized void clearListerThread() {
      listerThread = null;
    }
     
    /** Wake listerThread from sleep() to notice a change. */
    synchronized void interrupt() {
      if (listerThread != null) {
        listerThread.interrupt();
      }
    }

    /** Waits for the service to terminate. */
    void awaitTermination() {
      try {
        super.awaitTermination(5 * 60, TimeUnit.SECONDS);
      } catch (InterruptedException e) {
        // Fall through to check for successful termination.
      }
      if (!super.isTerminated()) {
        LOGGER.warning("File Lister did not shut down in a timely fashion.");
      }
    }
  }

  @VisibleForTesting
  class Traverser implements Callable<Void> {
    private final String startPath;
    private final DocumentAcceptor documentAcceptor;
    private final TraversalService service;
    private final String ndc;
    private long lastFullTraversal = 0L;
    private long lastTraversal = 0L;

    public Traverser(String startPath, DocumentAcceptor documentAcceptor,
                     TraversalService service) {
      this.startPath = startPath;
      this.documentAcceptor = documentAcceptor;
      this.service = service;
      this.ndc = NDC.peek();
    }

    @Override
    public Void call() throws Exception {
      NDC.clear();
      NDC.push(ndc);
      NDC.pushAppend(Thread.currentThread().getName());
      try {
        traverse();
      } catch (DocumentAcceptorException e) {
        LOGGER.log(Level.WARNING, "Lister feed error.", e);
        throw e;
      } catch (RepositoryException e) {
        LOGGER.log(Level.WARNING, "Failed to traverse: " + startPath, e);
        throw e;
      } catch (Exception e) {
        LOGGER.log(Level.WARNING, "Failed to traverse: " + startPath, e);
        throw e;
      } finally {
        NDC.remove();
      }
      return null;
    }

    /**
     * Calculate an appropriate ifModifiedSince value based on the
     * start traversal time, and the time of the last full traversal.
     */
    @VisibleForTesting
    synchronized long getIfModifiedSince(long startTime) {
      if (fullTraversalInterval >= 0 &&
          (startTime - lastFullTraversal) >= fullTraversalInterval) {
        // Force a full traversal.
        lastFullTraversal = 0L;
        return 0L;
      } else {
        return Math.max(0L, lastTraversal - ifModifiedSinceCushion);
      }
    }

    /** Record the time that the last successful traversal started. */
    @VisibleForTesting
    void finishedTraversal(long startTime) {
      if (lastFullTraversal == 0L) {
        lastFullTraversal = startTime;
      }
      lastTraversal = startTime;
    }

    /** Returns true if we are in a shutdown scenario. */
    private boolean isShutdown() {
      return service.isShutdown();
    }

    private void traverse() throws DocumentAcceptorException,
        RepositoryException {
      LOGGER.fine("Start traversal: " + startPath);
      ReadonlyFile<?> root =
          pathParser.getFile(startPath, context.getCredentials());
      if (root == null) {
        LOGGER.warning("Failed to open start path: " + startPath);
        return;
      }
      long startTime = clock.getTimeMillis();
      AclProperties aclProps = context.getPropertyManager();
      boolean returnDirectories = root.getFileSystemType().supportsAcls()
        && aclProps.isPushAcls() && aclProps.supportsInheritedAcls()
        && !aclProps.isMarkAllDocumentsPublic();

      try {
        FileIterator iter = new FileIterator(root, context,
            getIfModifiedSince(startTime), returnDirectories);

        if (returnDirectories) {
          try {         
            Document rootShareAclDoc = createRootShareAcl(root);
            if (rootShareAclDoc != null) {
              documentAcceptor.take(rootShareAclDoc);
            }
          } catch (RepositoryException e) {
            LOGGER.log(Level.WARNING,
                "Failed to feed root share ACL document " + root.getPath(), e);
            throw e;
          }
        }
        while (!isShutdown()) {
          String path = "";
          ReadonlyFile<?> file = null;
          try {
            file = iter.next();
            if (file == null) {
              break// No more files.
            }         
            path = file.getPath();
            for (Document document :
                 FileDocument.getDocuments(file, context, root)) {
              documentAcceptor.take(document);
            }
          } catch (RepositoryDocumentException rde) {
            LOGGER.log(Level.WARNING, "Failed to feed document " + path, rde);
          } catch (RepositoryException e) {
            // TODO (bmj): Ideally we should retry the failed file a few times
            // after increasing delays (1, 2, 4, 8 minutes to see if we can
            // overcome apparently transient errors) before skipping over it.
            LOGGER.log(Level.WARNING, "Encountered an error traversing "
                       + startPath + " at document " + path, e);
            if (!isShutdown()) {
              iter.pushBack(file);
              try {
                sleep(Sleep.ERROR_DELAY);
              } catch (InterruptedException ie) {
                // Awake early from sleep.
              }
            }
          }
        }
        // If we succeeded, remember the last completed pass.
        finishedTraversal(startTime);
      } finally {
        LOGGER.fine("End traversal: " + startPath);
        documentAcceptor.flush();
      }
    }

    /*
     * Create and return share ACL as secure document for the root.
     *
     * @throws RepositoryException
     */
    private Document createRootShareAcl(ReadonlyFile<?> root)
        throws RepositoryException {
      try {
        Acl shareAcl = root.getShareAcl();
        if (shareAcl != null && !shareAcl.equals(Acl.USE_HEAD_REQUEST)) {
          Map<String, List<Value>> aclValues = Maps.newHashMap();
          putPrincipalValues(aclValues, SpiConstants.PROPNAME_ACLUSERS,
              shareAcl.getUsers());
          putPrincipalValues(aclValues, SpiConstants.PROPNAME_ACLGROUPS,
              shareAcl.getGroups());
          putPrincipalValues(aclValues, SpiConstants.PROPNAME_ACLDENYUSERS,
              shareAcl.getDenyUsers());
          putPrincipalValues(aclValues, SpiConstants.PROPNAME_ACLDENYGROUPS,
              shareAcl.getDenyGroups());
          putStringValue(aclValues, SpiConstants.PROPNAME_DOCID,
              FileDocument.getRootShareAclId(root));
          putStringValue(aclValues, SpiConstants.PROPNAME_FEEDTYPE,
              FeedType.CONTENTURL.toString());
          putStringValue(aclValues, SpiConstants.PROPNAME_ACLINHERITANCETYPE,
              SpiConstants.AclInheritanceType.AND_BOTH_PERMIT.toString());
          return SecureDocument.createAcl(aclValues);
        } else {
          return null;
        }
      } catch (IOException e) {
        throw new RepositoryDocumentException("Failed to create share ACL for "
            + root.getPath(), e);
      }
    }

    /** Adds an optional ACL Property of Principal values to the map. */
    private void putPrincipalValues(Map<String, List<Value>> aclValues,
        String key, Collection<Principal> principals) {
      if (principals != null) {
        List<Value> valueList =
            Lists.newArrayListWithCapacity(principals.size());
        for (Principal principal : principals) {
          valueList.add(Value.getPrincipalValue(principal));
        }
        aclValues.put(key, valueList);
      }
    }

    /** Adds an optional String Property to the map. */
    private void putStringValue(Map<String, List<Value>> aclValues,
        String key, String value) {
      if (value != null) {
        aclValues.put(key,
            Collections.singletonList(Value.getStringValue(value)));
      }
    }
  }
}
TOP

Related Classes of com.google.enterprise.connector.filesystem.FileLister$Traverser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.