Package org.apache.tika

Examples of org.apache.tika.Tika


    }

    // TIKA-1011
    @Test
    public void testUserDefinedCharset() throws Exception {
        String content = new Tika().parseToString(
                HtmlParserTest.class.getResourceAsStream("/test-documents/testUserDefinedCharset.mhtml"), new Metadata());
        assertNotNull(content);
    }
View Full Code Here


    }
   
    //TIKA-1001
    @Test
    public void testNoisyMetaCharsetHeaders() throws Exception {
       Tika tika = new Tika();
       String hit = "\u0623\u0639\u0631\u0628";

       for (int i = 1; i <=4; i++){
          String fileName = "/test-documents/testHTMLNoisyMetaEncoding_"+i+".html";
          String content = tika.parseToString(
                HtmlParserTest.class.getResourceAsStream(fileName));
          assertTrue("testing: " +fileName, content.contains(hit));
       }
    }
View Full Code Here

    } catch (IOException e) {
      throw new RuntimeException(e);
    }

    logger.info("Starting Tikaserver "+properties.getProperty("tikaserver.version"));
    logger.info("Starting Tika Server " + new Tika().toString());

    try {
      Options options = getOptions();

      CommandLineParser cliParser = new GnuParser();
View Full Code Here

     *
     * @see <a href="https://issues.apache.org/jira/browse/TIKA-346">TIKA-346</a>
     */
    @Test
    public void testUnsupportedZipCompressionMethod() throws Exception {
        String content = new Tika().parseToString(
                ZipParserTest.class.getResourceAsStream(
                        "/test-documents/moby.zip"));
        assertTrue(content.contains("README"));
    }
View Full Code Here

public class OOXMLContainerExtractionTest extends AbstractPOIContainerExtractionTest {
    private ContainerExtractor extractor;
   
    @Before
    public void setUp() {
        Tika tika = new Tika();
        extractor = new ParserContainerExtractor(
                tika.getParser(), tika.getDetector());
    }
View Full Code Here

    @Before
    public void setUp() throws Exception{
        TikaConfig config = TikaConfig.getDefaultConfig();
        repo = config.getMimeRepository();
        tika = new Tika(config);
        u = new URL("http://mydomain.com/x.pdf?x=y");
    }
View Full Code Here

  /* our log stream */
  private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class.getName());

  public MimeUtil(Configuration conf) {
    tika = new Tika();
    ObjectCache objectCache = ObjectCache.get(conf);
    MimeTypes mimeTypez = (MimeTypes) objectCache.getObject(MimeTypes.class
        .getName());
    if (mimeTypez == null) {
      try {
View Full Code Here

        URL aURL = new URL(newurl);
        String base = aURL.toString();
        int i = fname.lastIndexOf('.');
        if (i != -1) {
          // Trying to resolve the Mime-Type
          Tika tika = new Tika();
          String contentType = tika.detect(fname);
          try {
            Metadata metadata = new Metadata();
            metadata.set(Response.CONTENT_LENGTH, Long.toString(entry.getSize()));
            metadata.set(Response.CONTENT_TYPE, contentType);
            Content content = new Content(newurl, base, b, contentType, metadata, this.conf);
View Full Code Here

    this.base = url.toString();
    this.file = file;
    this.conf = conf;
   
    MIME = new MimeUtil(conf);
    tika = new Tika();

    if (!"file".equals(url.getProtocol()))
      throw new FileException("Not a file url:" + url);

    if (File.LOG.isTraceEnabled()) {
View Full Code Here

        if (types == null) {
            types = config.getMimeRepository();
        }

        if(tika == null) {
            tika = new Tika(config);
        }
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.Tika

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.