Source Code of weave.utils.DBFUtils

/*
    Weave (Web-based Analysis and Visualization Environment)
    Copyright (C) 2008-2011 University of Massachusetts Lowell


    This file is a part of Weave.


    Weave is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License, Version 3,
    as published by the Free Software Foundation.


    Weave is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.


    You should have received a copy of the GNU General Public License
    along with Weave.  If not, see <http://www.gnu.org/licenses/>.
*/
package weave.utils;


import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;


import org.geotools.data.shapefile.dbf.DbaseFileHeader;
import org.geotools.data.shapefile.dbf.DbaseFileReader;


 
/**
 * @author skolman
 * @author adufilie
 */
public class DBFUtils
{
  /**
   * @param dbfFile A DBF file
   * @return A list of attribute names in the DBF file
   */
  public static List<String> getAttributeNames(File dbfFile) throws IOException
  {
    FileInputStream fis = new FileInputStream(dbfFile);
    DbaseFileReader dbfReader = new DbaseFileReader(fis.getChannel(), false, Charset.forName("ISO-8859-1"));
    
    //contains the header columns
    DbaseFileHeader dbfHeader = dbfReader.getHeader();
    
    // get the names from the header
    List<String> names = new Vector<String>();
    int n = dbfHeader.getNumFields();
    for (int i = 0; i < n; i++)
      names.add(dbfHeader.getFieldName(i));


    return names;
  }
  
  /**
   * Tests a combined column for uniqueness across several files
   * @param dbfFiles
   * @param columnNames
   * @return A value of true if the columns contain values that uniquely identify rows in all the dbf files.
   * @throws IOException
   */
  public static boolean isColumnUnique(File[] dbfFiles, String[] columnNames) throws IOException
  {
    Set<Object> set = new HashSet<Object>();
    for (File file : dbfFiles)
    {
      Object[][] rows = getDBFData(file, columnNames);
      for (int i = 0; i < rows.length; i++)
      {
        // concatenate all values into a string
        StringBuilder sb = new StringBuilder();
        for (Object str : rows[i])
          sb.append(str);
        String value = sb.toString();
        
        // check if we have seen this value before
        if (set.contains(value))
          return false;
        
        // remember this value
        set.add(value);
      }
    }
    return true;
  }
  
  /**
   * @param dbfFile A DBF file
   * @param fieldNames A list of field names to retrieve, or null for all columns
   * @return A list of attribute names in the DBF file
   */
  public static Object[][] getDBFData(File dbfFile, String[] fieldNames) throws IOException
  {
    List<String> allFields = getAttributeNames(dbfFile);
    FileInputStream fis = new FileInputStream(dbfFile);
    DbaseFileReader dbfReader = new DbaseFileReader(fis.getChannel(), false, Charset.forName("ISO-8859-1"));
    
    //contains the header columns
    DbaseFileHeader dbfHeader = dbfReader.getHeader();
    
    List<Object[]> rowsList = new Vector<Object[]>();
    


    while(dbfReader.hasNext())
    {
      Object[] row;
      if (fieldNames != null)
      {
        dbfReader.read();
        row = new Object[fieldNames.length];
        for (int i = 0; i < fieldNames.length; i++)
        {
          int index = allFields.indexOf(fieldNames[i]);
          if (index < 0)
            row[i] = "";
          else
            row[i] = dbfReader.readField(index);
        }
      }
      else
      {
        row = dbfReader.readEntry();
      }
      rowsList.add(row);
    }
    
    
    
    int numOfCol = dbfHeader.getNumFields();
    
    Object[][] dataRows = new Object[rowsList.size()][numOfCol];
    
    for(int i=0; i < rowsList.size();i++)
    {
      dataRows[i] = rowsList.get(i);
      
    }
    
    return dataRows;
  }
  
  /**
   * @param dbfFiles A list of DBF files to merge
   * @param conn a database connection
   * @param sqlSchema schema to store table
   * @param sqlTable table name to store data
   * @param overwriteTables Set this to true to overwrite an existing SQL table.
   * @param nullValues A list of Strings to interpret as null values.
   * @throws IOException,SQLException
   */
  public static void storeAttributes(File[] dbfFiles, Connection conn, String sqlSchema, String sqlTable, boolean overwriteTables, String[] nullValues) throws IOException,SQLException
  {
    if (!overwriteTables && SQLUtils.tableExists(conn, sqlSchema, sqlTable))
      throw new SQLException("SQL Tables already exist and overwriteTables is false.");
    
    // read records from each file
    List<String> fieldNames = new Vector<String>(); // order corresponds to fieldTypes order
    List<String> fieldTypes = new Vector<String>(); // order corresponds to fieldNames order
    
    FileInputStream[] inputStreams = new FileInputStream[dbfFiles.length];
    DbaseFileHeader[] headers = new DbaseFileHeader[dbfFiles.length];
    DbaseFileReader[] readers = new DbaseFileReader[dbfFiles.length];
    
    // open each file, read each header, get the complete list of field names and types
    for (int i = 0; i < dbfFiles.length; i++)
    {
      inputStreams[i] = new FileInputStream(dbfFiles[i]);
      readers[i] = new DbaseFileReader(inputStreams[i].getChannel(), false, Charset.forName("ISO-8859-1"));
      headers[i] = readers[i].getHeader();
      
      int numFields = headers[i].getNumFields();
      // keep track of the full set of field names
      for (int col = 0; col < numFields; col++)
      {
        String newFieldName = headers[i].getFieldName(col);
        if (ListUtils.findString(newFieldName, fieldNames) < 0)
        {
          fieldNames.add(newFieldName);
          fieldTypes.add(getSQLDataType(conn, headers[i], col));
        }
      }
    }
    
    // begin SQL code
    try
    {
      conn.setAutoCommit(false);
      
      // create the table
      if (overwriteTables)
        SQLUtils.dropTableIfExists(conn, sqlSchema, sqlTable);
      final String ID_FIELD = "the_geom_id";
      fieldNames.add(0, ID_FIELD);
      fieldTypes.add(0, SQLUtils.getBigIntTypeString(conn) + " NOT NULL");


      SQLUtils.createTable(conn, sqlSchema, sqlTable, fieldNames, fieldTypes, null);
      
      // import data from each file
      for (int f = 0; f < dbfFiles.length; f++)
      {
        int numFields = headers[f].getNumFields();
        int numRecords = headers[f].getNumRecords();
        // insert records from this file
        for (int r = 0; r < numRecords; r++)
        {
          Map<String,Object> record = new HashMap<String, Object>();
          record.put(ID_FIELD, r + 1);
          Object[] entry = readers[f].readEntry();
          for (int c = 0; c < numFields; c++)
          {
            if (entry[c] != null && ListUtils.findIgnoreCase(entry[c].toString(), nullValues) < 0)
              record.put(headers[f].getFieldName(c), entry[c]);
          }
          
          // insert the record in the table
          try
          {
            SQLUtils.insertRow(conn, sqlSchema, sqlTable, record);
          }
          catch (SQLException e)
          {
            String str = String.format("Insert failed on row %s of %s: %s", r, dbfFiles[f].getName(), record);
            throw new SQLException(str, e);
          }
        }
        // close the file
        readers[f].close();
        inputStreams[f].close();
        // clean up pointers
        readers[f] = null;
        inputStreams[f] = null;
        headers[f] = null;
      }
    }
    finally
    {
      conn.setAutoCommit(true);
    }
  }
  
  //returns a string format of the SQL Datatype of the column using the getFieldType function from DBaseFileHeader
  private static String getSQLDataType(Connection conn, DbaseFileHeader dbfHeader, int index)
  {
    char dataType = dbfHeader.getFieldType(index);
    String sqlDataType = "";
    if (dataType == 'C')
      sqlDataType = SQLUtils.getVarcharTypeString(conn, dbfHeader.getFieldLength(index));
    else if (dataType == 'N' || dataType == 'F')
    {
      //if it has not 0 decimals return type as integer else Double Precision
      if(dbfHeader.getFieldDecimalCount(index) == 0)
        sqlDataType = SQLUtils.getBigIntTypeString(conn);
      else
        sqlDataType = SQLUtils.getDoubleTypeString(conn);
    }
    else if (dataType == 'D')
    {
      sqlDataType = SQLUtils.getDateTimeTypeString(conn);
    }
    else
    {
      throw new RuntimeException("Unknown DBF data type: "+dataType+" in column "+dbfHeader.getFieldName(index));
    }
    return sqlDataType;
  }
}
Source Code of weave.utils.DBFUtils

Related Classes of weave.utils.DBFUtils