Package org.exoplatform.services.document.tika

Source Code of org.exoplatform.services.document.tika.TestPropertiesExtractionOnTika

/*
* Copyright (C) 2003-2010 eXo Platform SAS.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License
* as published by the Free Software Foundation; either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see<http://www.gnu.org/licenses/>.
*/
package org.exoplatform.services.document.tika;

import org.exoplatform.services.document.DCMetaData;
import org.exoplatform.services.document.DocumentReader;
import org.exoplatform.services.document.DocumentReaderService;
import org.exoplatform.services.document.test.BaseStandaloneTest;
import org.exoplatform.services.document.test.TestPropertiesExtracting;

import java.io.InputStream;
import java.util.Calendar;
import java.util.Date;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;

/**
* Created by The eXo Platform SAS.
*
* <br/>Date:
*
* @author <a href="karpenko.sergiy@gmail.com">Karpenko Sergiy</a>
* @version $Id: TestPropertiesExtractionOnTika.java 111 2008-11-11 11:11:11Z serg $
*/
public class TestPropertiesExtractionOnTika extends BaseStandaloneTest
{
   DocumentReaderService service;

   @Override
   public void setUp() throws Exception
   {
      super.setUp();
      service = (DocumentReaderService)getComponentInstanceOfType(DocumentReaderService.class);

   }

   // TODO fix this test
   //   public void testPDFDocumentReaderServiceXMPMetadata() throws Exception
   //   {
   //      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/MyTest.pdf");
   //      try
   //      {
   //         DocumentReader rdr = service.getDocumentReader("application/pdf");
   //         Properties testprops = rdr.getProperties(is);
   //         Properties etalon = new Properties();
   //         etalon.put(DCMetaData.TITLE, "Test de convertion de fichier tif");
   //         etalon.put(DCMetaData.CREATOR, "Christian Klaus");
   //         etalon.put(DCMetaData.SUBJECT, "20080901 TEST Christian Etat OK");
   //         Calendar c = ISO8601.parseEx("2008-09-01T08:01:10+00:00");
   //         etalon.put(DCMetaData.DATE, c);
   //         evalProps(etalon, testprops);
   //      }
   //      finally
   //      {
   //         is.close();
   //      }
   //   }

   public void testPDFDocumentReaderServiceXMPMetadataTikasFile() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/tikaTestPDF.pdf");
      try
      {
         DocumentReader rdr = service.getDocumentReader("application/pdf");
         Properties testprops = rdr.getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "Document title");
         etalon.put(DCMetaData.CREATOR, "Document author");
         evalProps(etalon, testprops);
      }
      finally
      {
         is.close();
      }
   }

   public void testWordDocumentReaderService() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/test.doc");
      try
      {
         Properties props = service.getDocumentReader("application/msword").getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "test-Title");
         etalon.put(DCMetaData.DATE, (new Date(1283247060000L)).toString());
         etalon.put(DCMetaData.SUBJECT, "test-Subject");
         etalon.put(DCMetaData.CREATOR, "Max Yakimenko");
         etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
         etalon.put(DCMetaData.DESCRIPTION, "test-Comments");

         evalProps(etalon, props);
      }
      finally
      {
         is.close();
      }
   }

   public void testPPTDocumentReaderService() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/test.ppt");
      try
      {
         Properties props = service.getDocumentReader("application/powerpoint").getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "test-Title");
         etalon.put(DCMetaData.DATE, (new Date(1283247255041L)).toString());
         etalon.put(DCMetaData.SUBJECT, "test-Subject");
         etalon.put(DCMetaData.CREATOR, "Max Yakimenko");
         etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
         etalon.put(DCMetaData.DESCRIPTION, "test-Comments");
         evalProps(etalon, props);
      }
      finally
      {
         is.close();
      }
   }

   public void testExcelDocumentReaderService() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/test.xls");
      try
      {
         Properties props = service.getDocumentReader("application/excel").getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "test-Title");
         etalon.put(DCMetaData.DATE, (new Date(1283247293000L)).toString());
         etalon.put(DCMetaData.SUBJECT, "test-Subject");
         etalon.put(DCMetaData.CREATOR, "KHANH NGUYEN GIA");
         etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
         etalon.put(DCMetaData.DESCRIPTION, "test-Comments");

         evalProps(etalon, props);
      }
      finally
      {
         is.close();
      }
   }

   public void testXWordDocumentReaderService() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/test.docx");
      try
      {
         Properties props =
            service.getDocumentReader("application/vnd.openxmlformats-officedocument.wordprocessingml.document")
               .getProperties(is);
         Properties etalon = new Properties();
         Calendar date = Calendar.getInstance();
         date.setTimeInMillis(0);
         date.set(2010, 7, 31, 7, 53, 0);

         etalon.put(DCMetaData.TITLE, "test-Title");
         // TODO dates
         //etalon.put(DCMetaData.DATE, date.getTime().toString());
         etalon.put(DCMetaData.SUBJECT, "Subject");
         etalon.put(DCMetaData.CREATOR, "nikolaz");
         //etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
         etalon.put(DCMetaData.DESCRIPTION, "test-Comments");

         evalProps(etalon, props);
      }
      finally
      {
         is.close();
      }
   }

   public void testXPPTDocumentReaderService() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/test.pptx");
      try
      {
         Properties props =
            service.getDocumentReader("application/vnd.openxmlformats-officedocument.presentationml.presentation")
               .getProperties(is);
         Properties etalon = new Properties();
         Calendar date = Calendar.getInstance();
         date.setTimeInMillis(0);
         date.set(2010, 7, 31, 7, 59, 37);

         etalon.put(DCMetaData.TITLE, "test-Title");
         //TODO dates
         //etalon.put(DCMetaData.DATE, date.getTime().toString());
         etalon.put(DCMetaData.SUBJECT, "test-Subject");
         etalon.put(DCMetaData.CREATOR, "Max Yakimenko");
         etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
         etalon.put(DCMetaData.DESCRIPTION, "test-Comments");

         evalProps(etalon, props);
      }
      finally
      {
         is.close();
      }
   }

   public void testXExcelDocumentReaderService() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/test.xlsx");
      try
      {
         Properties props =
            service.getDocumentReader("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
               .getProperties(is);
         Properties etalon = new Properties();
         Calendar date = Calendar.getInstance();
         date.setTimeInMillis(0);
         date.set(2010, 7, 31, 8, 7, 25);

         etalon.put(DCMetaData.TITLE, "test-Title");
         // TODO dates
         //etalon.put(DCMetaData.DATE, date.getTime().toString());
         etalon.put(DCMetaData.SUBJECT, "test-Subject");
         etalon.put(DCMetaData.CREATOR, "KHANH NGUYEN GIA");
         //etalon.put(DCMetaData.CONTRIBUTOR, "Max Yakimenko");
         etalon.put(DCMetaData.DESCRIPTION, "test-Comments");

         evalProps(etalon, props);
      }
      finally
      {
         is.close();
      }
   }

   public void testOODocumentReaderService() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/test.odt");
      try
      {
         Properties props = service.getDocumentReader("application/vnd.oasis.opendocument.text").getProperties(is);
         printProps(props);
         Properties etalon = new Properties();
         Calendar date = Calendar.getInstance();
         date.setTimeInMillis(0);
         date.set(2010, 8, 3, 14, 37, 59);

         etalon.put(DCMetaData.TITLE, "test-Title");
         etalon.put(DCMetaData.LANGUAGE, "ru-RU");
         etalon.put(DCMetaData.DATE, date.getTime().toString());
         etalon.put(DCMetaData.SUBJECT, "test-Subject");
         etalon.put(DCMetaData.CREATOR, "Sergiy Karpenko");
         etalon.put(DCMetaData.DESCRIPTION, "test-Comments");

         evalProps(etalon, props);
      }
      finally
      {
         is.close();
      }
   }

   private void printProps(Properties props)
   {
      Iterator it = props.entrySet().iterator();
      props.toString();
      while (it.hasNext())
      {
         Map.Entry entry = (Map.Entry)it.next();
      }
   }

   private void evalProps(Properties etalon, Properties testedProps)
   {
      Iterator it = etalon.entrySet().iterator();
      while (it.hasNext())
      {
         Map.Entry prop = (Map.Entry)it.next();
         Object tval = testedProps.get(prop.getKey());
         assertNotNull(prop.getKey() + " property not founded. ", tval);
         assertEquals(prop.getKey() + " property value is incorrect", prop.getValue(), tval);
      }
   }

}
TOP

Related Classes of org.exoplatform.services.document.tika.TestPropertiesExtractionOnTika

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.