Source Code of org.apache.lucene.index.IndexReader

package org.apache.lucene.index;


/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2001 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Lucene" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Lucene", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */


import java.io.IOException;
import java.io.File;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Lock;
import org.apache.lucene.document.Document;


/** IndexReader is an abstract class, providing an interface for accessing an
  index.  Search of an index is done entirely through this abstract interface,
  so that any subclass which implements it is searchable.


  <p> Concrete subclasses of IndexReader are usually constructed with a call to
  the static method {@link #open}.


  <p> For efficiency, in this API documents are often referred to via
  <i>document numbers</i>, non-negative integers which each name a unique
  document in the index.  These document numbers are ephemeral--they may change
  as documents are added to and deleted from an index.  Clients should thus not
  rely on a given document having the same number between sessions. */


abstract public class IndexReader {
  protected IndexReader(Directory directory) {
    this.directory = directory;
  }


  Directory directory;
  private Lock writeLock;


  /** Returns an IndexReader reading the index in an FSDirectory in the named
  path. */
  public static IndexReader open(String path) throws IOException {
    return open(FSDirectory.getDirectory(path, false));
  }


  /** Returns an IndexReader reading the index in an FSDirectory in the named
  path. */
  public static IndexReader open(File path) throws IOException {
    return open(FSDirectory.getDirectory(path, false));
  }


  /** Returns an IndexReader reading the index in the given Directory. */
  public static IndexReader open(final Directory directory) throws IOException{
    synchronized (directory) {        // in- & inter-process sync
      return (IndexReader)new Lock.With(directory.makeLock("commit.lock")) {
    public Object doBody() throws IOException {
      SegmentInfos infos = new SegmentInfos();
      infos.read(directory);
      if (infos.size() == 1)      // index is optimized
        return new SegmentReader(infos.info(0), true);


      SegmentReader[] readers = new SegmentReader[infos.size()];
      for (int i = 0; i < infos.size(); i++)
        readers[i] = new SegmentReader(infos.info(i), i==infos.size()-1);
      return new SegmentsReader(directory, readers);
    }
  }.run();
    }
  }


  /** Returns the time the index in the named directory was last modified. */
  public static long lastModified(String directory) throws IOException {
    return lastModified(new File(directory));
  }


  /** Returns the time the index in the named directory was last modified. */
  public static long lastModified(File directory) throws IOException {
    return FSDirectory.fileModified(directory, "segments");
  }


  /** Returns the time the index in this directory was last modified. */
  public static long lastModified(Directory directory) throws IOException {
    return directory.fileModified("segments");
  }


  /**
   * Returns <code>true</code> if an index exists at the specified directory.
   * If the directory does not exist or if there is no index in it.
   * <code>false</code> is returned.
   * @param  directory the directory to check for an index
   * @return <code>true</code> if an index exists; <code>false</code> otherwise
   */
  public static boolean indexExists(String directory) {
    return (new File(directory, "segments")).exists();
  }


  /**
   * Returns <code>true</code> if an index exists at the specified directory.
   * If the directory does not exist or if there is no index in it.
   * @param  directory the directory to check for an index
   * @return <code>true</code> if an index exists; <code>false</code> otherwise
   */
  public static boolean indexExists(File directory) {
    return (new File(directory, "segments")).exists();
  }


  /**
   * Returns <code>true</code> if an index exists at the specified directory.
   * If the directory does not exist or if there is no index in it.
   * @param  directory the directory to check for an index
   * @return <code>true</code> if an index exists; <code>false</code> otherwise
   * @throws IOException if there is a problem with accessing the index
   */
  public static boolean indexExists(Directory directory) throws IOException {
    return directory.fileExists("segments");
  }


  /** Returns the number of documents in this index. */
  abstract public int numDocs();


  /** Returns one greater than the largest possible document number.
    This may be used to, e.g., determine how big to allocate an array which
    will have an element for every document number in an index.
   */
  abstract public int maxDoc();


  /** Returns the stored fields of the <code>n</code><sup>th</sup>
      <code>Document</code> in this index. */
  abstract public Document document(int n) throws IOException;


  /** Returns true if document <i>n</i> has been deleted */
  abstract public boolean isDeleted(int n);


  /** Returns the byte-encoded normalization factor for the named field of
    every document.  This is used by the search code to score documents.
    @see org.apache.lucene.search.Similarity#norm
    */
  abstract public byte[] norms(String field) throws IOException;


  /** Returns an enumeration of all the terms in the index.
    The enumeration is ordered by Term.compareTo().  Each term
    is greater than all that precede it in the enumeration.
   */
  abstract public TermEnum terms() throws IOException;


  /** Returns an enumeration of all terms after a given term.
    The enumeration is ordered by Term.compareTo().  Each term
    is greater than all that precede it in the enumeration.
   */
  abstract public TermEnum terms(Term t) throws IOException;


  /** Returns the number of documents containing the term <code>t</code>. */
  abstract public int docFreq(Term t) throws IOException;


  /** Returns an enumeration of all the documents which contain
    <code>term</code>. For each document, the document number, the frequency of
    the term in that document is also provided, for use in search scoring.
    Thus, this method implements the mapping:
    <p><ul>
    Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq&gt;<sup>*</sup>
    </ul>
    <p>The enumeration is ordered by document number.  Each document number
    is greater than all that precede it in the enumeration. */
  public TermDocs termDocs(Term term) throws IOException {
    TermDocs termDocs = termDocs();
    termDocs.seek(term);
    return termDocs;
  }


  /** Returns an unpositioned {@link TermDocs} enumerator. */
  abstract public TermDocs termDocs() throws IOException;


  /** Returns an enumeration of all the documents which contain
    <code>term</code>.  For each document, in addition to the document number
    and frequency of the term in that document, a list of all of the ordinal
    positions of the term in the document is available.  Thus, this method
    implements the mapping:


    <p><ul>
    Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq,
          &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
    pos<sub>freq-1</sub>&gt;
  &gt;<sup>*</sup>
    </ul>
    <p> This positional information faciliates phrase and proximity searching.
    <p>The enumeration is ordered by document number.  Each document number is
    greater than all that precede it in the enumeration. */
  public TermPositions termPositions(Term term) throws IOException {
    TermPositions termPositions = termPositions();
    termPositions.seek(term);
    return termPositions;
  }


  /** Returns an unpositioned {@link TermPositions} enumerator. */
  abstract public TermPositions termPositions() throws IOException;


  /** Deletes the document numbered <code>docNum</code>.  Once a document is
    deleted it will not appear in TermDocs or TermPostitions enumerations.
    Attempts to read its field with the {@link #document}
    method will result in an error.  The presence of this document may still be
    reflected in the {@link #docFreq} statistic, though
    this will be corrected eventually as the index is further modified.  */
  public synchronized final void delete(int docNum) throws IOException {
    if (writeLock == null) {
      Lock writeLock = directory.makeLock("write.lock");
      if (!writeLock.obtain())        // obtain write lock
        throw new IOException("Index locked for write: " + writeLock);
      this.writeLock = writeLock;
    }
    doDelete(docNum);
  }
  abstract void doDelete(int docNum) throws IOException;


  /** Deletes all documents containing <code>term</code>.
    This is useful if one uses a document field to hold a unique ID string for
    the document.  Then to delete such a document, one merely constructs a
    term with the appropriate field and the unique ID string as its text and
    passes it to this method.  Returns the number of documents deleted. */
  public final int delete(Term term) throws IOException {
    TermDocs docs = termDocs(term);
    if ( docs == null ) return 0;
    int n = 0;
    try {
      while (docs.next()) {
  delete(docs.doc());
  n++;
      }
    } finally {
      docs.close();
    }
    return n;
  }


  /**
   * Closes files associated with this index.
   * Also saves any new deletions to disk.
   * No other methods should be called after this has been called.
   */
  public final synchronized void close() throws IOException {
    doClose();
    if (writeLock != null) {
      writeLock.release();  // release write lock
      writeLock = null;
    }
  }


  /** Implements close. */
  abstract void doClose() throws IOException;


  /** Release the write lock, if needed. */
  protected final void finalize() throws IOException {
    if (writeLock != null) {
      writeLock.release();                        // release write lock
      writeLock = null;
    }
  }


  /**
   * Returns <code>true</code> iff the index in the named directory is
   * currently locked.
   * @param directory the directory to check for a lock
   * @throws IOException if there is a problem with accessing the index
   */
    public static boolean isLocked(Directory directory) throws IOException {
  return directory.fileExists("write.lock");
    }


  /**
   * Returns <code>true</code> iff the index in the named directory is
   * currently locked.
   * @param directory the directory to check for a lock
   * @throws IOException if there is a problem with accessing the index
   */
    public static boolean isLocked(String directory) throws IOException {
  return (new File(directory, "write.lock")).exists();
    }


   /**
    * Forcibly unlocks the index in the named directory.
    * <P>
    * Caution: this should only be used by failure recovery code,
    * when it is known that no other process nor thread is in fact
    * currently accessing this index.
    */
    public static void unlock(Directory directory) throws IOException {
  directory.deleteFile("write.lock");
  directory.deleteFile("commit.lock");
    }
}
Source Code of org.apache.lucene.index.IndexReader

Related Classes of org.apache.lucene.index.IndexReader