Package org.apache.tika.detect

Examples of org.apache.tika.detect.Detector


     * We don't currently support the .xlsb file format
     *  (an OOXML container with binary blobs), but we
     *  shouldn't break on these files either (TIKA-826
     */
    public void testExcelXLSB() throws Exception {
       Detector detector = new DefaultDetector();
       AutoDetectParser parser = new AutoDetectParser();
      
       InputStream input = ExcelParserTest.class.getResourceAsStream(
             "/test-documents/testEXCEL.xlsb");
       Metadata m = new Metadata();
       m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
      
       // Should be detected correctly
       MediaType type = null;
       try {
          type = detector.detect(input, m);
          assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
       } finally {
          input.close();
       }
      
View Full Code Here


    }
   
    protected MediaType getMediaType(BufferedInputStream inputStream, String fileName) throws IOException {
        final TikaInputStream tikaInputStreamStream = TikaInputStream.get(new CloseShieldInputStream(inputStream));
        try {
            final Detector detector = new DefaultDetector();
            final Metadata metadata = new Metadata();
            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
           
            final MediaType type = detector.detect(tikaInputStreamStream, metadata);
            logger.debug("Determined '{}' for '{}'", type, fileName);
            return type;
        }
        catch (IOException e) {
            logger.warn("Failed to determine media type for '" + fileName + "' assuming XML", e);
View Full Code Here

               Object instance = detectorClass.newInstance();
               if (!(instance instanceof Detector)) {
                   throw new TikaException(
                           "Configured class is not a Tika Detector: " + name);
               }
               Detector detector = (Detector) instance;
               detectors.add(detector);
           } catch (ClassNotFoundException e) {
               throw new TikaException(
                       "Configured detector class not found: " + name, e);
           } catch (IllegalAccessException e) {
View Full Code Here

    }

    if (mediaType !=null) {
      metadata.add(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE, mediaType.toString());

      final Detector detector = parser.getDetector();

      parser.setDetector(new Detector() {
        public MediaType detect(InputStream inputStream, Metadata metadata) throws IOException {
          String ct = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE);

          if (ct!=null) {
            return MediaType.parse(ct);
          } else {
            return detector.detect(inputStream, metadata);
          }
        }
      });
    }
  }
View Full Code Here

       ForkParser parser = new ForkParser(
             ForkParserIntegrationTest.class.getClassLoader(),
             tika.getParser());
      
       ParseContext context = new ParseContext();
       context.set(Detector.class, new Detector() {
          public MediaType detect(InputStream input, Metadata metadata) {
             return MediaType.OCTET_STREAM;
          }
       });
View Full Code Here

public class TNEFParserTest extends AbstractPOIContainerExtractionTest {
   private static final String file = "testWINMAIL.dat";
  
   public void testBasics() throws Exception {
      TikaInputStream stream = getTestFile(file);
      Detector detector = new DefaultDetector();
      try {
         assertEquals(
                 MediaType.application("vnd.ms-tnef"),
                 detector.detect(stream, new Metadata()));
     } finally {
         stream.close();
     }
   }
View Full Code Here

     * We don't currently support the .xlsb file format
     *  (an OOXML container with binary blobs), but we
     *  shouldn't break on these files either (TIKA-826
     */
    public void testExcelXLSB() throws Exception {
       Detector detector = new DefaultDetector();
       AutoDetectParser parser = new AutoDetectParser();
      
       InputStream input = ExcelParserTest.class.getResourceAsStream(
             "/test-documents/testEXCEL.xlsb");
       Metadata m = new Metadata();
       m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
      
       // Should be detected correctly
       MediaType type = null;
       try {
          type = detector.detect(input, m);
          assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
       } finally {
          input.close();
       }
      
View Full Code Here

    /**
     * We don't currently support the old Excel 95 .xls file format,
     *  but we shouldn't break on these files either (TIKA-976) 
     */
    public void testExcel95() throws Exception {
       Detector detector = new DefaultDetector();
       AutoDetectParser parser = new AutoDetectParser();
      
       InputStream input = ExcelParserTest.class.getResourceAsStream(
             "/test-documents/testEXCEL_95.xls");
       Metadata m = new Metadata();
       m.add(Metadata.RESOURCE_NAME_KEY, "excel_95.xls");
      
       // Should be detected correctly
       MediaType type = null;
       try {
          type = detector.detect(input, m);
          assertEquals("application/vnd.ms-excel", type.toString());
       } finally {
          input.close();
       }
      
View Full Code Here

       ForkParser parser = new ForkParser(
             ForkParserIntegrationTest.class.getClassLoader(),
             tika.getParser());
      
       ParseContext context = new ParseContext();
       context.set(Detector.class, new Detector() {
          public MediaType detect(InputStream input, Metadata metadata) {
             return MediaType.OCTET_STREAM;
          }
       });
View Full Code Here

public class TNEFParserTest extends AbstractPOIContainerExtractionTest {
   private static final String file = "testWINMAIL.dat";
  
   public void testBasics() throws Exception {
      TikaInputStream stream = getTestFile(file);
      Detector detector = new DefaultDetector();
      try {
         assertEquals(
                 MediaType.application("vnd.ms-tnef"),
                 detector.detect(stream, new Metadata()));
     } finally {
         stream.close();
     }
   }
View Full Code Here

TOP

Related Classes of org.apache.tika.detect.Detector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.