Package it.unimi.dsi.mg4j.document

Source Code of it.unimi.dsi.mg4j.document.JdbcDocumentCollection$JdbcDocumentIterator

package it.unimi.dsi.mg4j.document;

/*    
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2005-2010 Sebastiano Vigna
*
*  This library is free software; you can redistribute it and/or modify it
*  under the terms of the GNU Lesser General Public License as published by the Free
*  Software Foundation; either version 3 of the License, or (at your option)
*  any later version.
*
*  This library is distributed in the hope that it will be useful, but
*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
*  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
*  for more details.
*
*  You should have received a copy of the GNU Lesser General Public License
*  along with this program; if not, see <http://www.gnu.org/licenses/>.
*
*/

import it.unimi.dsi.fastutil.ints.Int2IntMap;
import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.objects.Reference2ObjectArrayMap;
import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;
import it.unimi.dsi.io.MultipleInputStream;
import it.unimi.dsi.io.NullInputStream;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.mg4j.document.PropertyBasedDocumentFactory.MetadataKeys;
import it.unimi.dsi.mg4j.util.MG4JClassParser;

import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;

/** A {@link it.unimi.dsi.mg4j.document.DocumentCollection} corresponding to
*  the result of a query in a relational database.
*
* <p>An instance of this class is based on a query. The query should produce two
* fixed columns: the first, named <samp>id</samp>, must be an increasing integer
* which act as an identifier (i.e., as a key); the second, named <samp>title</samp>, must be a text field and will
* be used as a title. The remaining columns will be indexed, and the name of the corresponding
* field will be the name of the column (use judiciously <samp>AS</samp>).
*
* <p>In complex queries, the specification <samp>id</samp> for the first column
* could be ambiguous; in that case, you can provide an alternate (and hopefully more precise) specification.
*
* <p>At construction time, the query is executed, obtaining a bijection between
* the values of the identifier and document indices. The bijection is exposed by the
* methods {@link #id2doc(int)} and {@link #doc2id(int)}. The class tolerates
* additions to the database (and they will be skipped), but deletions will cause errors.
*
* <P>This class provides a main method with a flexible syntax that serialises
* a query into a document collection.
*/
public class JdbcDocumentCollection extends AbstractDocumentCollection implements Serializable {

  private static final long serialVersionUID = 1L;
  /** The map from database identifiers to documents. */
  final protected Int2IntMap id2doc;
  /** The map (as an array) from documents to database identifiers. */
  final protected int[] doc2id;
  /** The URI pointing at the database. */
  final protected String dbUri;
  /** Optionally, the driver. */
  transient Class<?> jdbcDriver;
  /** Optionally, the driver name. */
  final String jdbcDriverName;
  /** The factory to be used by this collection. */
  final protected DocumentFactory factory;
  /** The query generating the collection (without the <samp>SELECT</samp> keyword). */
  final protected String select;
  /** The spec for the id field; by default it is <samp>id</samp>, but in complex query it could be ambiguous. */
  final protected String idSpec;
  /** The <samp>WHERE</samp> part of the query generating
   * the collection (without the <samp>WHERE</samp> keyword), or <code>null</code>. */
  final protected String where;
  /** The currently open connection, if any. */
  protected transient Connection connection;
 
  /** Creates a document collection based on the result set of an SQL query using <samp>id</samp> as id specifier.
   *
   * <p><strong>Beware.</strong> This class is not guaranteed to work if the database is
   * deleted or modified after creation!
   *
   * @param dbUri a JDBC URI pointing at the database.
   * @param jdbcDriverName the name of a JDBC driver, or <code>null</code> if you do not want to load a driver.
   * @param select the SQL query generating the collection (without the <samp>SELECT</samp> keyword), except for the <samp>WHERE</samp> part.
   * @param where the <samp>WHERE</samp> part (without the <samp>WHERE</samp> keyword) of the SQL query generating the collection, or <code>null</code>.
   * @param factory the factory that will be used to create documents.
   */
  public JdbcDocumentCollection( final String dbUri, final String jdbcDriverName, final String select, final String where, final DocumentFactory factory ) throws SQLException, ClassNotFoundException {
    this( dbUri, jdbcDriverName, select, "id", where, factory );
  }

  /** Creates a document collection based on the result set of an SQL query.
   *
   * <p><strong>Beware.</strong> This class is not guaranteed to work if the database is
   * deleted or modified after creation!
   *
   * @param dbUri a JDBC URI pointing at the database.
   * @param jdbcDriverName the name of a JDBC driver, or <code>null</code> if you do not want to load a driver.
   * @param select the SQL query generating the collection (without the <samp>SELECT</samp> keyword), except for the <samp>WHERE</samp> part.
   * @param idSpec the complete SQL spec for the <samp>id</samp> (necessary for complex queries with multiple tables).
   * @param where the <samp>WHERE</samp> part (without the <samp>WHERE</samp> keyword) of the SQL query generating the collection, or <code>null</code>.
   * @param factory the factory that will be used to create documents.
   */
  public JdbcDocumentCollection( final String dbUri, final String jdbcDriverName, final String select, final String idSpec, final String where, final DocumentFactory factory ) throws SQLException, ClassNotFoundException {
    this.dbUri = dbUri;
    this.jdbcDriverName = jdbcDriverName;
    this.select = select;
    this.idSpec = idSpec;
    this.where = where;
    this.factory = factory;
   
    initDriver();
    Connection connection = DriverManager.getConnection( dbUri );
    Statement s = connection.createStatement();
    ResultSet rs = s.executeQuery( buildQuery( null ) );
   
    id2doc = new Int2IntOpenHashMap();
    id2doc.defaultReturnValue( -1 );
    final IntArrayList ids = new IntArrayList();
    int id;
    for( int i = 0; rs.next(); i++ ) {
      id = rs.getInt( 1 );
      ids.add( id );
      id2doc.put( i, id );
    }
   
    doc2id = ids.toIntArray();
    rs.close();
    s.close();
    connection.close();
  }

  protected void ensureConnection() throws SQLException {
    if ( connection == null ) connection = DriverManager.getConnection( dbUri );
  }
 
  public void close() throws IOException {
    super.close();
    if ( connection != null ) {
      try {
        connection.close();
        connection = null;
      }
      catch ( SQLException e ) {
        throw new IOException( e.toString() );
      }
    }
  }
 
  public JdbcDocumentCollection copy() {
    try {
      return new JdbcDocumentCollection( dbUri, jdbcDriverName, select, idSpec, where, factory.copy() );
    }
    catch ( Exception e ) {
      throw new RuntimeException( e );
    }
  }
 
  /** Creates a complete query using instance data and possibly an additional <samp>WHERE</samp> clause.
   *
   * @param additionalWhere an additional condition for the <samp>WHERE</samp> clause.
   * @return a complete query based on instance data and <code>additionalWhere</code>,
   */
 
  private String buildQuery( final String additionalWhere ) {
    final MutableString query = new MutableString();
    query.append( "SELECT " ).append( select );
    if ( where == null && additionalWhere != null ) query.append( " WHERE (" ).append( additionalWhere ).append( ")" );
    if ( where != null && additionalWhere == null ) query.append( " WHERE (" ).append( where ).append( ")" );
    if ( where != null && additionalWhere != null ) query.append( " WHERE (" ).append( where ).append( ") AND (" ).append( additionalWhere ).append( ")" );
    query.append( " ORDER BY 1" );
    return query.toString();
  }
 
  private void initDriver() throws ClassNotFoundException {
    jdbcDriver = jdbcDriverName != null ? Class.forName( jdbcDriverName ) : null;
  }
 
  public DocumentFactory factory() {
    return factory;
  }
 
  public int size() {
    return doc2id.length;
  }

  public Document document( final int index ) throws IOException {
    final MutableString title = new MutableString();
    return factory.getDocument( stream( index, title ), metadata( index, title ) );
  }
 
  /** Returns the document associated with a given database identifier.
   *
   * @param id a database identifier.
   * @return the associated document.
   */

  public int id2doc( final int id ) {
    return id2doc.get( id );
  }
 
  /** Returns the database identifier associated with a given document.
   *
   * @param doc a document index.
   * @return the associated database identifier.
   */

  public int doc2id( final int doc ) {
    ensureDocumentIndex( doc );
    return doc2id[ doc ];
  }

  /** Creates metadata with the given title; if the title is not available, it is fetched from the database.
   *
   * @param index a document index.
   * @param title a suggested title, or <code>null</code>.
   * @return the metadata for the document <code>index</code>.
   */
  protected Reference2ObjectMap<Enum<?>,Object> metadata( final int index, CharSequence title ) {
    final Reference2ObjectArrayMap<Enum<?>,Object> metadata = new Reference2ObjectArrayMap<Enum<?>,Object>( 2 );
    if ( title == null ) {
      try {
        ensureConnection();
        final Statement s = connection.createStatement();
        final ResultSet rs = s.executeQuery( buildQuery( idSpec + "=" + doc2id[ index ] ) );
        if ( ! rs.next() ) throw new IllegalStateException( "Id " + doc2id[ index ] + " is no longer in the database" );
        title = rs.getString( 2 );
        rs.close();
        s.close();
      }
      catch ( SQLException e ) {
        throw new RuntimeException( e );
      }
    }
    metadata.put( MetadataKeys.TITLE, title );
    metadata.put( MetadataKeys.URI, Integer.toString( doc2id[ index ] ) );
    return metadata;
  }

  public Reference2ObjectMap<Enum<?>,Object> metadata( final int index ) {
    ensureDocumentIndex( index );
    return metadata( index, null );
  }
   
  public InputStream stream( final int index ) throws IOException {
    return stream( index, null );
  }

  private InputStream getStreamFromResultSet( final ResultSet rs, final MutableString title ) throws SQLException {
    final InputStream[] a = new InputStream[ rs.getMetaData().getColumnCount() - 2 ]; // -2 for id and title
    for( int i = 0; i < a.length; i++ ) {
      a[ i ] = rs.getBinaryStream( i + 3 );
      if ( a[ i ] ==  null ) a[ i ] = NullInputStream.getInstance();
    }
    if ( title != null ) title.replace( rs.getString( 2 ) );
    return MultipleInputStream.getStream( a );
  }

 
  private InputStream stream( final int index, final MutableString title ) throws IOException {
    ensureDocumentIndex( index );
    try {
      ensureConnection();
      Statement s = connection.createStatement();
      ResultSet rs = s.executeQuery( buildQuery( idSpec + "=" + doc2id[ index ] ) );
      if ( ! rs.next() ) throw new IllegalStateException( "Id " + doc2id[ index ] + " is no longer in the database" );
      return getStreamFromResultSet( rs, title );
    }
    catch ( SQLException e ) {
      throw new IOException( e.toString() );
    }
  }

  /** An iterator over the whole collection that performs a single DBMS transaction. */
 
  // ALERT: this is actually VERY inefficient, as metadata() makes a query.
  protected class JdbcDocumentIterator extends AbstractDocumentIterator {
    private final Statement s;
    private final ResultSet rs;
    private final MutableString title = new MutableString();
    private int index = 0;

    private JdbcDocumentIterator() throws SQLException {
      ensureConnection();
      s = connection.createStatement();
      rs = s.executeQuery( buildQuery( null ) );
    }

    public Document nextDocument() throws IOException {
      try {
        if ( ! rs.next() ) return null;
        while( rs.getInt( 1 ) < doc2id[ index ] ) rs.next();
        if ( rs.getInt( 1 ) > doc2id[ index ] ) throw new IllegalStateException( "Row with id " + doc2id[ index ] + " is missing" );
        return factory.getDocument( getStreamFromResultSet( rs, title ), metadata( index++, title ) );
      }
      catch ( SQLException e ) {
        throw new IOException( e.toString() );
      }
    }

    public void close() throws IOException {
      super.close();
      try {
        rs.close();
        s.close();
      }
      catch ( SQLException e ) {
        throw new IOException( e.toString() );
      }
    }
  }
 
  public DocumentIterator iterator() throws IOException {

    try {
      return new JdbcDocumentIterator();
    }
    catch ( SQLException e ) {
      throw new IOException( e.toString() );
    }
  }

  private void readObject( final ObjectInputStream s ) throws IOException, ClassNotFoundException {
    s.defaultReadObject();
    initDriver();
  }
 
  public static void main( final String[] arg ) throws JSAPException, InvocationTargetException, NoSuchMethodException, IllegalAccessException, IOException, SQLException, ClassNotFoundException, InstantiationException {

    SimpleJSAP jsap = new SimpleJSAP( JdbcDocumentCollection.class.getName(), "Saves a serialised document collection based on a set of database rows. The first column of the query is used as an integer id, and the second column for titles. Each remaining column is used to build a segmented input stream, which is passed to a ComposedDocumentFactory made of the specified factories.",
        new Parameter[] {
          new FlaggedOption( "property", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'p', "property", "A 'key=value' specification, or the name of a property file" ).setAllowMultipleDeclarations( true ),
          new FlaggedOption( "jdbcDriver", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'd', "driver", "The JDBC driver. You can omit it if it is already loaded." ),
          new FlaggedOption( "factory", MG4JClassParser.getParser(), IdentityDocumentFactory.class.getName(), JSAP.NOT_REQUIRED, 'f', "factory", "One document factory for each indexed field." ).setAllowMultipleDeclarations( true ),
          new FlaggedOption( "fieldName", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'n', "field-name", "One field name for each field in the composed document factory. If all specified factories have just one field, the name of the SQL column will be used as a default field name." ).setAllowMultipleDeclarations( true ),
          new UnflaggedOption( "collection", JSAP.STRING_PARSER, JSAP.REQUIRED, "The filename for the serialised collection." ),
          new UnflaggedOption( "dburi", JSAP.STRING_PARSER, JSAP.REQUIRED, "The JDBC URI defining the database." ),
          new UnflaggedOption( "select", JSAP.STRING_PARSER, JSAP.REQUIRED, "A SQL query generating the collection, except for the WHERE part." ),
          new FlaggedOption( "idSpec", JSAP.STRING_PARSER, "id", JSAP.NOT_REQUIRED, 'i', "id-spec", "An optional, more precise specification for the id field (the first column)." ),
          new FlaggedOption( "where", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'w', "where", "The the WHERE part (without the WHERE keyword) of the SQL query generating the collection." )       

        }
    );
   
    JSAPResult jsapResult = jsap.parse( arg );
    if ( jsap.messagePrinted() ) return;

    // We run the query to get meta-information about the columns.
    @SuppressWarnings("unused") Class<?> jdbcDriver = Class.forName( jsapResult.getString( "jdbcDriver" ) );
    Connection connection = DriverManager.getConnection( jsapResult.getString( "dburi" ) );
    Statement s = connection.createStatement();
    ResultSet rs = s.executeQuery( "SELECT " + jsapResult.getString( "select" ) );
    ResultSetMetaData metaData = rs.getMetaData();   
    String[] column = new String[ metaData.getColumnCount() - 2 ];
    for( int i = 3; i <= metaData.getColumnCount(); i++ ) column[ i - 3 ] = metaData.getColumnName( i );
    rs.close();
    s.close();
    connection.close();
   
    final DocumentFactory[] factory = new DocumentFactory[ column.length ];
    final Class<?>[] factoryClass = jsapResult.getClassArray( "factory" );
    final String[] property = jsapResult.getStringArray( "property" );
    for( int i = 0; i < factory.length; i++ ) {
      factory[ i ] = PropertyBasedDocumentFactory.getInstance( factoryClass[ Math.min( i, factoryClass.length - 1 ) ], property );
      if ( factory[ i ].numberOfFields() > 1 && ! jsapResult.userSpecified( "fieldName" ) ) throw new IllegalArgumentException( "For factories with more than one field you must specify the name of each field of the composed factory" );
    }

    if ( jsapResult.userSpecified"fieldName" ) ) column = jsapResult.getStringArray( "fieldName" );
   
    BinIO.storeObject( new JdbcDocumentCollection(
                jsapResult.getString( "dburi" ),
                jsapResult.getString( "jdbcDriver" ),
                jsapResult.getString( "select" ),
                jsapResult.getString( "idSpec" ),
                jsapResult.getString( "where" ),
                CompositeDocumentFactory.getFactory( factory, column )
              ), jsapResult.getString( "collection" ) );
  }
}
TOP

Related Classes of it.unimi.dsi.mg4j.document.JdbcDocumentCollection$JdbcDocumentIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.