Source Code of com.google.enterprise.connector.filesystem.FileRetriever

// Copyright 2012 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


package com.google.enterprise.connector.filesystem;


import com.google.enterprise.connector.spi.Document;
import com.google.enterprise.connector.spi.DocumentNotFoundException;
import com.google.enterprise.connector.spi.RepositoryDocumentException;
import com.google.enterprise.connector.spi.RepositoryException;
import com.google.enterprise.connector.spi.Retriever;
import com.google.enterprise.connector.spi.SkippedDocumentException;
import com.google.enterprise.connector.spi.TraversalContext;
import com.google.enterprise.connector.spi.TraversalContextAware;
import com.google.enterprise.connector.util.MimeTypeDetector;


import java.io.IOException;
import java.io.InputStream;
import java.util.logging.Level;
import java.util.logging.Logger;


class FileRetriever implements Retriever, TraversalContextAware {
  private static final Logger LOGGER =
      Logger.getLogger(FileRetriever.class.getName());


  private final PathParser pathParser;
  private final DocumentContext context;
  private final MimeTypeDetector mimeTypeDetector;
  private TraversalContext traversalContext;


  public FileRetriever(PathParser pathParser, DocumentContext context) {
    this.pathParser = pathParser;
    this.context = context;
    this.mimeTypeDetector = context.getMimeTypeDetector();
  }


  @Override
  public void setTraversalContext(TraversalContext traversalContext) {
    this.traversalContext = traversalContext;
    context.setTraversalContext(traversalContext);
  }


  @Override
  public InputStream getContent(String docid) throws RepositoryException {
    LOGGER.log(Level.FINEST, "Retrieving content for {0}", docid);
    ReadonlyFile<?> file = getFile(docid);
    if (file.isRegularFile()) {
      try {
        String mimeType = mimeTypeDetector.getMimeType(file.getName(), file);
        int supportLevel = traversalContext.mimeTypeSupportLevel(mimeType);
        long len = file.length();
        if (supportLevel > 0 && len > 0 &&
            len <= traversalContext.maxDocumentSize()) {
          return file.getInputStream();
        } else {
          if (supportLevel < 0) {
            throw new SkippedDocumentException("Skipping file " + docid
                + " - excluded MIME type: " + mimeType);
          } else if (supportLevel == 0) {
            LOGGER.log(Level.FINER, "Returning no content for file {0}"
                + " - unsupported MIME type: {1}",
                new Object[] { docid, mimeType });
          } else if (len <= 0) {
            LOGGER.log(Level.FINER, "Returning no content for file {0}"
                 + " - empty file", docid);
          } else if (len > traversalContext.maxDocumentSize()) {
            LOGGER.log(Level.FINER, "Returning no content for file {0}"
                 + " - too large", docid);
          }
          return null;
        }
      } catch (IOException e) {
        throw new RepositoryDocumentException("Failed to open file: " + docid,
                                              e);
      }
    } else {
      LOGGER.log(Level.FINER, "Returning no content for file {0}"
          + " - not a regular file", docid);
    }
    return null;
  }


  @Override
  public Document getMetaData(String docid) throws RepositoryException {
    LOGGER.log(Level.FINEST, "Retrieving meta-data for {0}", docid);
    ReadonlyFile<?> file = getFile(docid);
    return new FileDocument(file, context, getRoot(file));
  }


  private ReadonlyFile<?> getFile(String docid) throws RepositoryException {
    ReadonlyFile<?> file = pathParser.getFile(docid, context.getCredentials());
    if (file == null) {
      // Not one of our registered filesystems.
      throw new DocumentNotFoundException("Failed to open file: " + docid);
    }
    if (!file.exists()) {
      // File actually does not exist.
      throw new DocumentNotFoundException("File not found: " + docid);
    }
    // Verify that we would have actually fed this document.
    if (!isQualifiedFile(file)) {
      // File may or may-not exist, but it is not available to us.
      throw new SkippedDocumentException("Access denied: " + docid);
    }
    return file;
  }


  /**
   * Verify that we would have actually fed this file, and that it would still
   * be a valid file to feed. First check that it is located under one of our
   * startpaths.  Then verify the file is readable and not hidden or located
   * under a hidden directory. Finally, does it (and all its ancesters),
   * pass the PatternMatcher?
   *
   * @param file the ReadonlyFile in question
   * @return true if file is qualified for retrieval; false otherwise
   */
  private boolean isQualifiedFile(ReadonlyFile<?> file)
      throws RepositoryException {
    if (!file.canRead()) {
      return false;
    }


    // Check to see if the pathname is under one of our start points.
    // The start paths are normalized (have trailing slashes), so we should
    // not have false positives on partial matches. The start paths are sorted
    // by decreasing length of pathname, so look for the longest startpath that
    // matches our file's pathname.
    String pathName = file.getPath();
    String startPath = getStartPath(pathName);


    // No match, not a file we care about.
    if (startPath == null) {
      return false;
    }    
    
    // Next, does it (and all its ancesters), pass the PatternMatcher?
    FilePatternMatcher matcher = context.getFilePatternMatcher();
    Credentials credentials = context.getCredentials();
    FileSystemType<?> fileSystemType = file.getFileSystemType();
    while (pathName.length() >= startPath.length()) {
      // SMB Administrative Shares are marked as "hidden".  We will allow
      // their contents to be retrieved, but we don't want to permit any
      // other hidden files or hidden directories to be returned.
      if (file.isHidden() && !pathName.equals(startPath)) {
        return false;
      }
      if (!matcher.acceptName(pathName)) {
        return false;
      }
      String parentPath = file.getParent();
      if (parentPath == null || pathName.equals(parentPath)) {
        // We tried to walk past the root of the filesystem.
        // That means the startPoint was the root and we are done.
        break;
      }
      file = fileSystemType.getFile(parentPath, credentials);
      pathName = file.getPath();
    }
    return true;
  }


  /**
   * Returns the ReadonlyFile root under which this file resides, or null
   * if the file does not appear to reside under any of our startpaths.
   */
  private ReadonlyFile<?> getRoot(ReadonlyFile<?> file)
      throws RepositoryException {
    String startPath = getStartPath(file.getPath());
    return (startPath == null) ? null :
        file.getFileSystemType().getFile(startPath, context.getCredentials());
  }


  /**
   * Returns the startpath under which this file resides, or null
   * if the file does not appear to reside under any of our startpaths.
   */
  private String getStartPath(String pathName) {
    for (String path : context.getStartPaths()) {
      if (pathName.startsWith(path)) {
        return path;
      }
    }
    return null;
  }
}
Source Code of com.google.enterprise.connector.filesystem.FileRetriever

Related Classes of com.google.enterprise.connector.filesystem.FileRetriever