Package org.apache.tika.detect

Examples of org.apache.tika.detect.DefaultDetector


        String data = "<!DOCTYPE html>\n<html><body><p>test <span>content</span></p></body></html>";
        InputStream stream = new ByteArrayInputStream(data.getBytes("UTF-8"));
        Writer writer = new StringWriter();
        ContentHandler contentHandler = new BodyContentHandler(writer);
        Metadata metadata = new Metadata();
        Detector contentTypeDetector = new DefaultDetector(classLoader);
        MediaType type = contentTypeDetector.detect(stream, metadata);
        assertEquals(type.toString(), "text/html");
        metadata.add(Metadata.CONTENT_TYPE, type.toString());
        ParseContext parseCtx = new ParseContext();
        parser.parse(stream, contentHandler, metadata, parseCtx);
        writer.flush();
View Full Code Here


        assertNotNull(rawDetectors);
        assertTrue("Should have several Detector names, found " + rawDetectors.size(),
                rawDetectors.size() > 3);

        // Get the classes found within OSGi
        DefaultDetector detector = new DefaultDetector();
        Set<String> osgiDetectors = new HashSet<String>();
        for (Detector d : detector.getDetectors()) {
            osgiDetectors.add(d.getClass().getName());
        }

        // Check that OSGi didn't miss any
        for (String detectorName : rawDetectors) {
View Full Code Here

     (an OOXML container with binary blobs), but we
     *  shouldn't break on these files either (TIKA-826
     */
    @Test
    public void testExcelXLSB() throws Exception {
       Detector detector = new DefaultDetector();
       AutoDetectParser parser = new AutoDetectParser();
      
       InputStream input = ExcelParserTest.class.getResourceAsStream(
             "/test-documents/testEXCEL.xlsb");
       Metadata m = new Metadata();
       m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
      
       // Should be detected correctly
       MediaType type = null;
       try {
          type = detector.detect(input, m);
          assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
       } finally {
          input.close();
       }
      
View Full Code Here

     * We don't currently support the old Excel 95 .xls file format,
     *  but we shouldn't break on these files either (TIKA-976
     */
    @Test
    public void testExcel95() throws Exception {
       Detector detector = new DefaultDetector();
       AutoDetectParser parser = new AutoDetectParser();
      
       InputStream input = ExcelParserTest.class.getResourceAsStream(
             "/test-documents/testEXCEL_95.xls");
       Metadata m = new Metadata();
       m.add(Metadata.RESOURCE_NAME_KEY, "excel_95.xls");
      
       // Should be detected correctly
       MediaType type = null;
       try {
          type = detector.detect(input, m);
          assertEquals("application/vnd.ms-excel", type.toString());
       } finally {
          input.close();
       }
      
View Full Code Here

    private ServiceRegistration forkParserService;

    public void start(BundleContext context) throws Exception {
        detectorService = context.registerService(
                Detector.class.getName(),
                new DefaultDetector(Activator.class.getClassLoader()),
                new Properties());
        Parser parser = new DefaultParser(Activator.class.getClassLoader());
        parserService = context.registerService(
                Parser.class.getName(),
                parser,
View Full Code Here

   private static final String file = "testWINMAIL.dat";
  
   @Test
   public void testBasics() throws Exception {
      TikaInputStream stream = getTestFile(file);
      Detector detector = new DefaultDetector();
      try {
         assertEquals(
                 MediaType.application("vnd.ms-tnef"),
                 detector.detect(stream, new Metadata()));
     } finally {
         stream.close();
     }
   }
View Full Code Here

TOP

Related Classes of org.apache.tika.detect.DefaultDetector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.