Package de.chris_soft.nanodoa

Source Code of de.chris_soft.nanodoa.DocumentInputManagement

/**
* NanoDoA - File based document archive
*
* Copyright (C) 2011-2012 Christian Packenius, christian.packenius@googlemail.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
package de.chris_soft.nanodoa;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.Properties;
import java.util.Vector;

import javax.mail.MessagingException;

import com.itextpdf.text.Document;
import com.itextpdf.text.Image;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.pdf.RandomAccessFileOrArray;
import com.itextpdf.text.pdf.codec.TiffImage;

import de.chris_soft.nanoarchive.Archive;
import de.chris_soft.nanoarchive.Metadata;
import de.chris_soft.nanodoa.web.DocumentRetrievalWebsite;
import de.chris_soft.nanodoa.web.DocumentServerKeys;
import de.chris_soft.nanodoa.web.WebsiteVariableConstants;
import de.chris_soft.utilities.AppProperties;
import de.chris_soft.utilities.FileUtils;
import de.chris_soft.utilities.HtmlFormatter;
import de.chris_soft.utilities.IdUtils;
import de.chris_soft.utilities.LogUtils;
import de.chris_soft.utilities.SendHtmlMailUtils;

/**
* Searches for documents in multiple directories and puts them into the
* archive.
* @author Christian Packenius.
*/
public class DocumentInputManagement implements Runnable, DocumentServerKeys {
  private final Archive archive;

  private final TrayIconAndMenu trayIAM;

  private final Vector<File> sourceDirectories = new Vector<File>();

  private final Vector<File> sourceFiles = new Vector<File>();

  private boolean stopObservation = false;

  private final Thread thread;

  /**
   * Constructor.
   * @param archive Archive for the documents.
   * @param trayIAM
   * @throws IOException
   */
  public DocumentInputManagement(Archive archive, TrayIconAndMenu trayIAM) throws IOException {
    this.archive = archive;
    this.trayIAM = trayIAM;
    thread = startObservation();
  }

  private Thread startObservation() {
    Thread thread = new Thread(this);
    thread.setDaemon(false);
    thread.start();
    return thread;
  }

  /**
   * Stop observation of destination directories.
   */
  public void stopObservation() {
    stopObservation = true;
  }

  /**
   * Changes the list of directories to observe for documents.
   * @param directories Directory list to observe.
   */
  public void setDestinationDirectories(Collection<File> directories) {
    sourceDirectories.clear();
    sourceDirectories.addAll(directories);
  }

  /**
   * @see java.lang.Runnable#run()
   */
  @Override
  public void run() {
    while (!stopObservation) {
      fillSourceFileList();
      trayIAM.setImage(sourceFiles.size());
      workFiles();
      if (!stopObservation) {
        pauseShortTime();
      }
    }
  }

  private void fillSourceFileList() {
    for (File directory : sourceDirectories) {
      fillFileList(directory);
    }
  }

  private void fillFileList(File directory) {
    if (directory != null) {
      if (directory.isFile()) {
        addFileAfterFilter(directory);
      }
      else if (directory.isDirectory()) {
        addDirectoryFiles(directory);
      }
    }
  }

  private void addFileAfterFilter(File file) {
    if (isArchivableFile(file)) {
      sourceFiles.add(file);
    }
  }

  private boolean isArchivableFile(File file) {
    String ext = FileUtils.getFileExtension(file);
    if (ext != null) {
      ext = ext.toLowerCase();
      if (ext.equals("jpg") || ext.equals("jpe") || ext.equals("jpeg")) {
        return true;
      }
      if (ext.equals("png") || ext.equals("gif")) {
        return true;
      }
      if (ext.equals("pdf")) {
        return true;
      }
      if (ext.equals("tif") || ext.equals("tiff")) {
        return true;
      }
    }
    return false;
  }

  private void addDirectoryFiles(File directory) {
    File[] files = directory.listFiles();
    if (files != null) {
      for (File file : files) {
        fillFileList(file);
      }
    }
  }

  private void workFiles() {
    if (!sourceFiles.isEmpty()) {
      waitFiveSeconds();
    }
    while (!sourceFiles.isEmpty() && !stopObservation) {
      trayIAM.setImage(sourceFiles.size());
      File file = sourceFiles.remove(0);
      if (FileUtils.isReadable(file) && file.length() > 0) {
        workFile(file);
      }
    }
  }

  private void waitFiveSeconds() {
    try {
      Thread.sleep(5000);
    }
    catch (InterruptedException e) {
      // Ignore.
    }
  }

  private void workFile(File documentFile) {
    System.out.println("Working: " + documentFile.getName());
    Properties metadata = new Properties();
    setUniqueDocumentID(metadata);
    getDocumentTextMetadata(documentFile, metadata);
    String pdfFile;
    if ((pdfFile = createArchivePdfFromImage(documentFile, metadata)) != null) {
      if (!sendDocumentViaMail(new File(pdfFile), metadata)) {
        try {
          archive.deleteDocumentById(metadata.getProperty(Metadata.DOCUMENT_ID));
        }
        catch (IOException exception) {
          // Sorry - now we have to archive it twice!
          deleteOriginalImage(documentFile);
        }
      }
      else {
        deleteOriginalImage(documentFile);
      }
    }
  }

  private void setUniqueDocumentID(Properties metadata) {
    String documentID = Long.toString(IdUtils.getUniqueID());
    metadata.setProperty(Metadata.CREATION_TIME, documentID);
    documentID = formatUniqueDocID(documentID);
    metadata.setProperty(Metadata.DOCUMENT_ID, documentID);
  }

  private String formatUniqueDocID(String name) {
    int k = name.length() - 4;
    while (k > 0) {
      name = name.substring(0, k) + "-" + name.substring(k);
      k -= 4;
    }
    return name;
  }

  private void getDocumentTextMetadata(File documentFile, Properties metadata) {
    metadata.setProperty(Metadata.FULLTEXT, FulltextRecognizer.getFulltext(documentFile, metadata));
  }

  private String createArchivePdfFromImage(File documentFile, Properties metadata) {
    File pdfFile = new File(IdUtils.getUniqueID() + ".pdf");
    String filePath = null;
    try {
      String ext = FileUtils.getFileExtension(documentFile).toLowerCase();
      String fulltext = metadata.getProperty(Metadata.FULLTEXT);
      if (ext.equals("pdf")) {
        filePath = archive.store(documentFile, fulltext, metadata);
      }
      else if (ext.equals("tif") || ext.equals("tiff")) {
        createPdfDocumentFileFromTIFF(documentFile, pdfFile);
        filePath = archive.store(pdfFile, fulltext, metadata);
      }
      else {
        createPdfDocumentFileFromSingleImageFile(documentFile, pdfFile);
        filePath = archive.store(pdfFile, fulltext, metadata);
      }
    }
    catch (Exception e) {
      return null;
    }
    FileUtils.deleteFile(pdfFile);
    return filePath;
  }

  private void createPdfDocumentFileFromTIFF(File tiffFile, File pdfFile) throws Exception {
    Document document = new Document(PageSize.A4, 0.0f, 0.0f, 0.0f, 0.0f);
    PdfWriter.getInstance(document, new FileOutputStream(pdfFile));
    document.open();
    RandomAccessFileOrArray ra = new RandomAccessFileOrArray(tiffFile.getCanonicalPath());
    int pageCount = TiffImage.getNumberOfPages(ra);
    for (int i = 0; i < pageCount; ++i) {
      Image img = TiffImage.getTiffImage(ra, i + 1);
      if (img != null) {
        if (i != 0) {
          document.newPage();
        }
        if (img.getWidth() > PageSize.A4.getWidth() || img.getHeight() > PageSize.A4.getHeight()) {
          img.scaleToFit(PageSize.A4.getWidth(), PageSize.A4.getHeight());
        }
        document.add(img);
      }
    }
    ra.close();
    document.close();
  }

  private void createPdfDocumentFileFromSingleImageFile(File imageFile, File pdfFile) throws Exception {
    Document document = new Document(PageSize.A4, 0.0f, 0.0f, 0.0f, 0.0f);
    PdfWriter.getInstance(document, new FileOutputStream(pdfFile));
    document.open();
    Image image = Image.getInstance(imageFile.getCanonicalPath());
    image.scaleToFit(PageSize.A4.getWidth(), PageSize.A4.getHeight());
    document.add(image);
    document.close();
  }

  private boolean sendDocumentViaMail(File pdfFile, Properties metadata) {
    if (!Boolean.parseBoolean(AppProperties.getProperty(MailKeys.PROP_KEY_SHALL_SEND_MAIL))) {
      return true;
    }
    SendHtmlMailUtils mail = new SendHtmlMailUtils();
    if (Boolean.parseBoolean(AppProperties.getProperty(MailKeys.PROP_KEY_ADD_DOCUMENT_AS_ATTACHMENT))) {
      mail.addAttachment(pdfFile);
    }
    mail.addTO(AppProperties.getProperty(MailKeys.PROP_KEY_RECEIVER_MAIL_ADDRESS));
    mail.setFrom(AppProperties.getProperty(MailKeys.PROP_KEY_SENDER_MAIL_ADDRESS));
    mail.setLogin(AppProperties.getProperty(MailKeys.PROP_KEY_SENDER_MAIL_ADDRESS));
    mail.setPassword(AppProperties.getProperty(MailKeys.PROP_KEY_SENDER_MAIL_PW));
    mail.setPop3Host(AppProperties.getProperty(MailKeys.PROP_KEY_SENDER_POP3_SERVER));
    mail.setSmtp(AppProperties.getProperty(MailKeys.PROP_KEY_SENDER_SMTP_SERVER),
        Integer.parseInt(AppProperties.getProperty(MailKeys.PROP_KEY_SENDER_SMTP_PORT)));
    String docID = metadata.getProperty(Metadata.DOCUMENT_ID);
    mail.setSubject("::from::OCR:: " + docID);
    String link = "";
    if (Boolean.parseBoolean(AppProperties.getProperty(PROP_KEY_DOCUMENT_SERVER_SHALL_USE))) {
      String serverName = AppProperties.getProperty(PROP_KEY_DOCUMENT_SERVER_NAME);
      String port = AppProperties.getProperty(PROP_KEY_DOCUMENT_SERVER_PORT);
      // String serverMethod = DocumentServer.DOCUMENT_RETRIEVAL_METHOD;
      String serverMethod = new DocumentRetrievalWebsite().getWebsiteName();
      String docParm = WebsiteVariableConstants.varnameDocumentID + "=" + docID;
      String href = "http://" + serverName + ":" + port + "/" + serverMethod + "?" + docParm + "";
      link = "<p><a href=\"" + href + "\">Link zu " + docID + "</a></p>";
    }
    if (Boolean.parseBoolean(AppProperties.getProperty(MailKeys.PROP_KEY_SET_FULLTEXT_INTO_MAIL_BODY))) {
      String fulltext = metadata.getProperty(Metadata.FULLTEXT);
      String htmltext = HtmlFormatter.getHtmlString(fulltext, false, false);
      mail.setText(link + "<p>" + htmltext + "</p>");
    }
    try {
      mail.sendMail();
      return true;
    }
    catch (MessagingException e) {
      LogUtils.log(e);
      return false;
    }
  }

  private void deleteOriginalImage(File documentFile) {
    FileUtils.deleteFile(documentFile);
  }

  private void pauseShortTime() {
    try {
      Thread.sleep(500);
    }
    catch (InterruptedException e) {
      // Ignore.
    }
  }

  /**
   * Check if the document input management has been stopped.
   * @return <i>true</i> if the thread is not running any more.
   */
  public boolean isStopped() {
    return !thread.isAlive();
  }
}
TOP

Related Classes of de.chris_soft.nanodoa.DocumentInputManagement

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.