Package de.chris_soft.nanoarchive

Source Code of de.chris_soft.nanoarchive.DerbyArchive

/**
* NanoDoA - File based document archive
*
* Copyright (C) 2011-2012 Christian Packenius, christian.packenius@googlemail.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
package de.chris_soft.nanoarchive;

import java.io.File;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.Properties;
import org.apache.lucene.index.CorruptIndexException;
import de.chris_soft.utilities.DateUtils;
import de.chris_soft.utilities.FileUtils;
import de.chris_soft.utilities.FulltextIndexAndSearchUtils;
import de.chris_soft.utilities.IdUtils;
import de.chris_soft.utilities.LogUtils;
import de.chris_soft.utilities.Pair;
import de.chris_soft.utilities.swing.labellist.LabelStore;

/**
* Archive system that stores documentes in a derby database.
* @author Christian Packenius.
*/
public class DerbyArchive implements LabelStore {
  /**
   * Root directory for fulltext database and document database.
   */
  public final File rootDirectory;

  /**
   * Full text index system.
   */
  final FulltextIndexAndSearchUtils fulltextIndex;

  private final File dbPath;

  /**
   * Database for the document archive.
   */
  public final DB db;

  /**
   * Constructor.
   * @param rootDirectory Root directory for archiving files.
   * @throws IOException
   * @throws SQLException
   * @throws ClassNotFoundException
   */
  public DerbyArchive(File rootDirectory) throws IOException, ClassNotFoundException, SQLException {
    File fulltextIndexDir = new File(rootDirectory, "fulltextIndex");
    checkRootDirectory(rootDirectory, fulltextIndexDir);
    this.rootDirectory = rootDirectory;
    fulltextIndex = new FulltextIndexAndSearchUtils(fulltextIndexDir);
    dbPath = new File(rootDirectory, "db");
    db = new DB(dbPath.getCanonicalPath());
  }

  private void checkRootDirectory(File rootDirectory, File fulltextIndexDir) throws IOException {
    rootDirectory.mkdirs();
    fulltextIndexDir.mkdirs();
    if (!rootDirectory.exists()) {
      throw new IllegalArgumentException("Can't create " + rootDirectory.getCanonicalPath() + " as directory!");
    }
    if (!rootDirectory.isDirectory()) {
      throw new IllegalArgumentException("No directory: " + rootDirectory.getCanonicalPath() + "!");
    }
  }

  /**
   * Stores a document in the archive.
   * @param documentFile The document to archive.
   * @param fulltext Full document text from OCR or something else source.
   * @param metadata Any meta data like tags, creation date and so on that belongs to the document.
   * @return Document ID in archive.
   * @throws Exception
   */
  public long store(File documentFile, String fulltext, Properties metadata) throws Exception {
    long documentID = getDocumentIdForDocumentStore(documentFile);
    documentID = storeInDocumentDatabase(documentFile, metadata, documentID);
    storeInFulltextDatabase(documentID, fulltext);
    return documentID;
  }

  private long getDocumentIdForDocumentStore(File documentFile) throws IOException, SQLException {
    String name = FileUtils.getNameWithoutExtension(documentFile);

    // Example: 20120301205052_001.pdf
    if (name.length() == 22 && name.charAt(14) == '_') {
      name = removeNonDigitCharacters(name);
      if (name.length() == 17) {
        GregorianCalendar gc = new GregorianCalendar();
        gc.set(Calendar.YEAR, Integer.parseInt(name.substring(0, 4)));
        gc.set(Calendar.MONTH, Integer.parseInt(name.substring(4, 2)) - 1);
        gc.set(Calendar.DAY_OF_MONTH, Integer.parseInt(name.substring(6, 8)));
        gc.set(Calendar.HOUR_OF_DAY, Integer.parseInt(name.substring(8, 10)));
        gc.set(Calendar.MINUTE, Integer.parseInt(name.substring(10, 12)));
        long millis = gc.getTimeInMillis();
        while (getDocumentFileFromDocID(millis) != null) {
          millis++;
        }
        return millis;
      }
    }

    // System time in millis as file name?
    name = removeNonDigitCharacters(name);
    if (name.length() == 13) {
      long millis = Long.parseLong(name);
      if (millis >= 1300000000000L) {
        while (getDocumentFileFromDocID(millis) != null) {
          millis++;
        }
        return millis;
      }
    }

    return 0;
  }

  private String removeNonDigitCharacters(String name) {
    for (int i = name.length() - 1; i >= 0; i--) {
      if (name.charAt(i) < '0' || name.charAt(i) > '9') {
        name = name.substring(0, i) + name.substring(i + 1);
      }
    }
    return name;
  }

  private long storeInDocumentDatabase(File document, Properties metadata, long documentID) throws Exception {
    if (documentID == 0) {
      documentID = IdUtils.getUniqueID();
    }
    try {
      long pathID = getPathIDFromMillis(documentID);
      db.addDocument(documentID, pathID, document);
      setDocumentMetadata(documentID, metadata);
    } catch (Exception e) {
      db.deleteDocument(documentID);
      db.deleteDocumentProperties(documentID);
      throw e;
    }
    return documentID;
  }

  private void storeInFulltextDatabase(long documentID, String fulltext) throws CorruptIndexException, IOException {
    fulltextIndex.add(Long.toString(documentID), fulltext);
  }

  private void setDocumentMetadata(long documentID, Properties metadata) throws SQLException {
    for (Object oKey : metadata.keySet()) {
      String key = (String) oKey;
      String value = metadata.getProperty(key);
      db.setDocumentProperty(documentID, key, value);
    }
  }

  private long getPathIDFromMillis(long documentID) throws SQLException {
    String[] date = DateUtils.getDatePathFromMillis(documentID);
    long pathID = 0;
    for (String datePart : date) {
      boolean found = false;
      List<Pair<Long, String>> list = db.getChildPaths(pathID);
      for (Pair<Long, String> pair : list) {
        if (pair.obj2.equals(datePart)) {
          found = true;
          pathID = pair.obj1;
          break;
        }
      }
      if (!found) {
        pathID = db.addPath(datePart, pathID);
      }
    }
    return pathID;
  }

  /**
   * Creates a list of sub directory names from the given directory ID.
   * @param parentPathID ID of the parent directory.
   * @return List of IDs of the child directories.
   * @throws Exception
   */
  public List<Pair<Long, String>> getDirectories(long parentPathID) throws Exception {
    return db.getChildPaths(parentPathID);
  }

  /**
   * Returns the name of a path.
   * @param pathID Path ID.
   * @return Name of the corresponding path.
   * @throws Exception
   */
  public String getPathName(long pathID) throws Exception {
    return db.getPathName(pathID);
  }

  /**
   * Returns a list of documents from the given path.
   * @param pathID Sub directory path ID.
   * @return List of document IDs.
   * @throws Exception
   */
  public List<Long> getFilesFromSubDirectory(long pathID) throws Exception {
    return db.getDocumentsFromPath(pathID);
  }

  /**
   * Returns the name of this archive.
   * @return Name of this archive.
   * @throws Exception
   */
  public String getName() throws Exception {
    return getClass().getSimpleName() + "::" + rootDirectory.getCanonicalPath();
  }

  /**
   * Start a full text search over the archive and inform the listener every time a document is found.
   * @param searchtext Text to search.
   * @param listener Listener that wants to be informed.
   */
  public void documentSearch(final String searchtext, final DocumentFoundListener listener) {
    final DerbyArchive archive = this;
    Runnable runner = new Runnable() {
      @Override
      public void run() {
        DocumentSearch search = new DocumentSearch(searchtext, fulltextIndex, archive);
        try {
          search.searchDocuments(listener);
        } catch (Exception exception) {
          LogUtils.log(exception);
        }
      }
    };
    new Thread(runner, "Document search thread").start();
  }

  /**
   * Read document metadata from documentID.
   * @param documentID Document ID.
   * @return Metadata of this document as properties object.
   * @throws Exception
   */
  public Properties getMetadataFromDocID(long documentID) throws Exception {
    return db.getDocumentProperties(documentID);
  }

  /**
   * Creates a new or overwrites an existing metadata key/value pair for a document.
   * @param documentID DocID of the document.
   * @param key Key of the metadata.
   * @param value Value of the metadata.
   * @throws Exception
   */
  public void putMetadataViaDocID(long documentID, String key, String value) throws Exception {
    db.setDocumentProperty(documentID, key, value);
  }

  /**
   * Returns the file object of the document with the given ID.
   * @param documentID Document ID.
   * @return File of this document.
   * @throws IOException
   * @throws SQLException
   */
  public File getDocumentFileFromDocID(long documentID) throws IOException, SQLException {
    File file = new File(new File("temp"), documentID + ".pdf");
    if (!file.exists()) {
      new File("temp").mkdirs();
      byte[] ba = db.readDocument(documentID);
      if (ba != null) {
        FileUtils.writeBytesIntoFile(ba, file);
      }
      else {
        file = null;
      }
    }
    else {
      file.setLastModified(System.currentTimeMillis());
    }
    return file;
  }

  /**
   * Removes a file from the archive.
   * @param documentID Document ID.
   * @throws IOException
   * @throws SQLException
   */
  public void deleteDocumentById(long documentID) throws IOException, SQLException {
    db.deleteDocument(documentID);
  }

  /**
   * Shut down document archive.
   */
  public void shutdown() {
    db.close();
  }

  /**
   * @throws SQLException
   * @see de.chris_soft.utilities.swing.labellist.LabelStore#getAllLabels()
   */
  @Override
  public List<Long> getAllLabels() throws SQLException {
    return db.getAllLabels();
  }

  /**
   * @see de.chris_soft.utilities.swing.labellist.LabelStore#addNewLabel(java.lang.String)
   */
  @Override
  public long addNewLabel(String label) {
    try {
      return db.createLabel(label);
    } catch (Exception e) {
      LogUtils.log(e);
    }
    return 0;
  }

  /**
   * @throws SQLException
   * @see de.chris_soft.utilities.swing.labellist.LabelStore#getLabelName(long)
   */
  @Override
  public String getLabelName(long labelID) throws SQLException {
    return db.getLabelName(labelID);
  }

  /**
   * @throws SQLException
   * @see de.chris_soft.utilities.swing.labellist.LabelStore#getLabelsFromLabeledObject(java.lang.Object)
   */
  @Override
  public List<Long> getLabelsFromLabeledObject(Object labeledObject) throws SQLException {
    if (!(labeledObject instanceof Long)) {
      return new ArrayList<Long>();
    }
    return db.getLabelsFromDocument((Long) labeledObject);
  }

  /**
   * @see de.chris_soft.utilities.swing.labellist.LabelStore#deleteLabel(long)
   */
  @Override
  public void deleteLabel(long labelID) {
    try {
      db.deleteLabel(labelID);
    } catch (SQLException exception) {
      LogUtils.log(exception);
    }
  }

  /**
   * @see de.chris_soft.utilities.swing.labellist.LabelStore#renameLabel(long, java.lang.String)
   */
  @Override
  public void renameLabel(long labelID, String newLabelName) throws SQLException {
    db.changeLabelName(labelID, newLabelName);
  }
}
TOP

Related Classes of de.chris_soft.nanoarchive.DerbyArchive

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.