/**
* NanoDoA - File based document archive
*
* Copyright (C) 2011-2012 Christian Packenius, christian.packenius@googlemail.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package de.chris_soft.nanoarchive;
import java.io.File;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.Properties;
import org.apache.lucene.index.CorruptIndexException;
import de.chris_soft.utilities.DateUtils;
import de.chris_soft.utilities.FileUtils;
import de.chris_soft.utilities.FulltextIndexAndSearchUtils;
import de.chris_soft.utilities.IdUtils;
import de.chris_soft.utilities.LogUtils;
import de.chris_soft.utilities.Pair;
import de.chris_soft.utilities.swing.labellist.LabelStore;
/**
* Archive system that stores documentes in a derby database.
* @author Christian Packenius.
*/
public class DerbyArchive implements LabelStore {
/**
* Root directory for fulltext database and document database.
*/
public final File rootDirectory;
/**
* Full text index system.
*/
final FulltextIndexAndSearchUtils fulltextIndex;
private final File dbPath;
/**
* Database for the document archive.
*/
public final DB db;
/**
* Constructor.
* @param rootDirectory Root directory for archiving files.
* @throws IOException
* @throws SQLException
* @throws ClassNotFoundException
*/
public DerbyArchive(File rootDirectory) throws IOException, ClassNotFoundException, SQLException {
File fulltextIndexDir = new File(rootDirectory, "fulltextIndex");
checkRootDirectory(rootDirectory, fulltextIndexDir);
this.rootDirectory = rootDirectory;
fulltextIndex = new FulltextIndexAndSearchUtils(fulltextIndexDir);
dbPath = new File(rootDirectory, "db");
db = new DB(dbPath.getCanonicalPath());
}
private void checkRootDirectory(File rootDirectory, File fulltextIndexDir) throws IOException {
rootDirectory.mkdirs();
fulltextIndexDir.mkdirs();
if (!rootDirectory.exists()) {
throw new IllegalArgumentException("Can't create " + rootDirectory.getCanonicalPath() + " as directory!");
}
if (!rootDirectory.isDirectory()) {
throw new IllegalArgumentException("No directory: " + rootDirectory.getCanonicalPath() + "!");
}
}
/**
* Stores a document in the archive.
* @param documentFile The document to archive.
* @param fulltext Full document text from OCR or something else source.
* @param metadata Any meta data like tags, creation date and so on that belongs to the document.
* @return Document ID in archive.
* @throws Exception
*/
public long store(File documentFile, String fulltext, Properties metadata) throws Exception {
long documentID = getDocumentIdForDocumentStore(documentFile);
documentID = storeInDocumentDatabase(documentFile, metadata, documentID);
storeInFulltextDatabase(documentID, fulltext);
return documentID;
}
private long getDocumentIdForDocumentStore(File documentFile) throws IOException, SQLException {
String name = FileUtils.getNameWithoutExtension(documentFile);
// Example: 20120301205052_001.pdf
if (name.length() == 22 && name.charAt(14) == '_') {
name = removeNonDigitCharacters(name);
if (name.length() == 17) {
GregorianCalendar gc = new GregorianCalendar();
gc.set(Calendar.YEAR, Integer.parseInt(name.substring(0, 4)));
gc.set(Calendar.MONTH, Integer.parseInt(name.substring(4, 2)) - 1);
gc.set(Calendar.DAY_OF_MONTH, Integer.parseInt(name.substring(6, 8)));
gc.set(Calendar.HOUR_OF_DAY, Integer.parseInt(name.substring(8, 10)));
gc.set(Calendar.MINUTE, Integer.parseInt(name.substring(10, 12)));
long millis = gc.getTimeInMillis();
while (getDocumentFileFromDocID(millis) != null) {
millis++;
}
return millis;
}
}
// System time in millis as file name?
name = removeNonDigitCharacters(name);
if (name.length() == 13) {
long millis = Long.parseLong(name);
if (millis >= 1300000000000L) {
while (getDocumentFileFromDocID(millis) != null) {
millis++;
}
return millis;
}
}
return 0;
}
private String removeNonDigitCharacters(String name) {
for (int i = name.length() - 1; i >= 0; i--) {
if (name.charAt(i) < '0' || name.charAt(i) > '9') {
name = name.substring(0, i) + name.substring(i + 1);
}
}
return name;
}
private long storeInDocumentDatabase(File document, Properties metadata, long documentID) throws Exception {
if (documentID == 0) {
documentID = IdUtils.getUniqueID();
}
try {
long pathID = getPathIDFromMillis(documentID);
db.addDocument(documentID, pathID, document);
setDocumentMetadata(documentID, metadata);
} catch (Exception e) {
db.deleteDocument(documentID);
db.deleteDocumentProperties(documentID);
throw e;
}
return documentID;
}
private void storeInFulltextDatabase(long documentID, String fulltext) throws CorruptIndexException, IOException {
fulltextIndex.add(Long.toString(documentID), fulltext);
}
private void setDocumentMetadata(long documentID, Properties metadata) throws SQLException {
for (Object oKey : metadata.keySet()) {
String key = (String) oKey;
String value = metadata.getProperty(key);
db.setDocumentProperty(documentID, key, value);
}
}
private long getPathIDFromMillis(long documentID) throws SQLException {
String[] date = DateUtils.getDatePathFromMillis(documentID);
long pathID = 0;
for (String datePart : date) {
boolean found = false;
List<Pair<Long, String>> list = db.getChildPaths(pathID);
for (Pair<Long, String> pair : list) {
if (pair.obj2.equals(datePart)) {
found = true;
pathID = pair.obj1;
break;
}
}
if (!found) {
pathID = db.addPath(datePart, pathID);
}
}
return pathID;
}
/**
* Creates a list of sub directory names from the given directory ID.
* @param parentPathID ID of the parent directory.
* @return List of IDs of the child directories.
* @throws Exception
*/
public List<Pair<Long, String>> getDirectories(long parentPathID) throws Exception {
return db.getChildPaths(parentPathID);
}
/**
* Returns the name of a path.
* @param pathID Path ID.
* @return Name of the corresponding path.
* @throws Exception
*/
public String getPathName(long pathID) throws Exception {
return db.getPathName(pathID);
}
/**
* Returns a list of documents from the given path.
* @param pathID Sub directory path ID.
* @return List of document IDs.
* @throws Exception
*/
public List<Long> getFilesFromSubDirectory(long pathID) throws Exception {
return db.getDocumentsFromPath(pathID);
}
/**
* Returns the name of this archive.
* @return Name of this archive.
* @throws Exception
*/
public String getName() throws Exception {
return getClass().getSimpleName() + "::" + rootDirectory.getCanonicalPath();
}
/**
* Start a full text search over the archive and inform the listener every time a document is found.
* @param searchtext Text to search.
* @param listener Listener that wants to be informed.
*/
public void documentSearch(final String searchtext, final DocumentFoundListener listener) {
final DerbyArchive archive = this;
Runnable runner = new Runnable() {
@Override
public void run() {
DocumentSearch search = new DocumentSearch(searchtext, fulltextIndex, archive);
try {
search.searchDocuments(listener);
} catch (Exception exception) {
LogUtils.log(exception);
}
}
};
new Thread(runner, "Document search thread").start();
}
/**
* Read document metadata from documentID.
* @param documentID Document ID.
* @return Metadata of this document as properties object.
* @throws Exception
*/
public Properties getMetadataFromDocID(long documentID) throws Exception {
return db.getDocumentProperties(documentID);
}
/**
* Creates a new or overwrites an existing metadata key/value pair for a document.
* @param documentID DocID of the document.
* @param key Key of the metadata.
* @param value Value of the metadata.
* @throws Exception
*/
public void putMetadataViaDocID(long documentID, String key, String value) throws Exception {
db.setDocumentProperty(documentID, key, value);
}
/**
* Returns the file object of the document with the given ID.
* @param documentID Document ID.
* @return File of this document.
* @throws IOException
* @throws SQLException
*/
public File getDocumentFileFromDocID(long documentID) throws IOException, SQLException {
File file = new File(new File("temp"), documentID + ".pdf");
if (!file.exists()) {
new File("temp").mkdirs();
byte[] ba = db.readDocument(documentID);
if (ba != null) {
FileUtils.writeBytesIntoFile(ba, file);
}
else {
file = null;
}
}
else {
file.setLastModified(System.currentTimeMillis());
}
return file;
}
/**
* Removes a file from the archive.
* @param documentID Document ID.
* @throws IOException
* @throws SQLException
*/
public void deleteDocumentById(long documentID) throws IOException, SQLException {
db.deleteDocument(documentID);
}
/**
* Shut down document archive.
*/
public void shutdown() {
db.close();
}
/**
* @throws SQLException
* @see de.chris_soft.utilities.swing.labellist.LabelStore#getAllLabels()
*/
@Override
public List<Long> getAllLabels() throws SQLException {
return db.getAllLabels();
}
/**
* @see de.chris_soft.utilities.swing.labellist.LabelStore#addNewLabel(java.lang.String)
*/
@Override
public long addNewLabel(String label) {
try {
return db.createLabel(label);
} catch (Exception e) {
LogUtils.log(e);
}
return 0;
}
/**
* @throws SQLException
* @see de.chris_soft.utilities.swing.labellist.LabelStore#getLabelName(long)
*/
@Override
public String getLabelName(long labelID) throws SQLException {
return db.getLabelName(labelID);
}
/**
* @throws SQLException
* @see de.chris_soft.utilities.swing.labellist.LabelStore#getLabelsFromLabeledObject(java.lang.Object)
*/
@Override
public List<Long> getLabelsFromLabeledObject(Object labeledObject) throws SQLException {
if (!(labeledObject instanceof Long)) {
return new ArrayList<Long>();
}
return db.getLabelsFromDocument((Long) labeledObject);
}
/**
* @see de.chris_soft.utilities.swing.labellist.LabelStore#deleteLabel(long)
*/
@Override
public void deleteLabel(long labelID) {
try {
db.deleteLabel(labelID);
} catch (SQLException exception) {
LogUtils.log(exception);
}
}
/**
* @see de.chris_soft.utilities.swing.labellist.LabelStore#renameLabel(long, java.lang.String)
*/
@Override
public void renameLabel(long labelID, String newLabelName) throws SQLException {
db.changeLabelName(labelID, newLabelName);
}
}