Package org.apache.tika.parser.pdf

Examples of org.apache.tika.parser.pdf.PDFParser


                parsers.put(MediaType.application("vnd.sun.xml.calc"), parser);
                parsers.put(MediaType.application("vnd.sun.xml.draw"), parser);
                parsers.put(MediaType.application("vnd.sun.xml.impress"), parser);
                parsers.put(MediaType.application("vnd.sun.xml.writer"), parser);
            } else if (name.equals("org.apache.jackrabbit.extractor.PdfTextExtractor")) {
                parsers.put(MediaType.application("pdf"), new PDFParser());
            } else if (name.equals("org.apache.jackrabbit.extractor.PlainTextExtractor")) {
                parsers.put(MediaType.TEXT_PLAIN, new TXTParser());
            } else if (name.equals("org.apache.jackrabbit.extractor.PngTextExtractor")) {
                Parser parser = new ImageParser();
                parsers.put(MediaType.image("png"), parser);
View Full Code Here


                parsers.put("application/vnd.sun.xml.draw", parser);
                parsers.put("application/vnd.sun.xml.impress", parser);
                parsers.put("application/vnd.sun.xml.writer", parser);
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.PdfTextExtractor")) {
                parsers.put("application/pdf", new PDFParser());
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.PlainTextExtractor")) {
                parsers.put("text/plain", new TXTParser());
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.PngTextExtractor")) {
View Full Code Here

                parsers.put("application/vnd.sun.xml.draw", parser);
                parsers.put("application/vnd.sun.xml.impress", parser);
                parsers.put("application/vnd.sun.xml.writer", parser);
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.PdfTextExtractor")) {
                parsers.put("application/pdf", new PDFParser());
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.PlainTextExtractor")) {
                parsers.put("text/plain", new TXTParser());
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.PngTextExtractor")) {
View Full Code Here

                parsers.put("application/vnd.sun.xml.draw", parser);
                parsers.put("application/vnd.sun.xml.impress", parser);
                parsers.put("application/vnd.sun.xml.writer", parser);
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.PdfTextExtractor")) {
                parsers.put("application/pdf", new PDFParser());
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.PlainTextExtractor")) {
                parsers.put("text/plain", new TXTParser());
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.PngTextExtractor")) {
View Full Code Here

    InputStream input;
    try {     
      input = new FileInputStream(new File(f.fileName()));
      ContentHandler textHandler = new BodyContentHandler(-1);
      Metadata metadata = new Metadata();
      PDFParser parser = new PDFParser()
      ParseContext context = new ParseContext();
      parser.parse(input, textHandler, metadata, context);
      String[] result = textHandler.toString().split(regex);
      for (int i=0; i<result.length && keepRunning; i++) {
        if (interrupt) {
          processInterrupt();
        }
View Full Code Here

        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);
        writer = new IndexWriter(directory, config);   
        writer.commit();
       
        parser = new FiqlParser<SearchBean>(SearchBean.class);
        extractor = new TikaLuceneContentExtractor(new PDFParser());
    }
View Full Code Here

    private SearchConditionParser< SearchBean > parser;
   
    @Before
    public void setUp() throws Exception {
        parser = new FiqlParser<SearchBean>(SearchBean.class);
        extractor = new TikaContentExtractor(new PDFParser());
    }
View Full Code Here

            extractor.extract(getClass().getResourceAsStream("/files/testTXT.txt")));       
    }

    @Test
    public void testExtractionFromRtfFileUsingPdfParserWithoutMediaTypeValidationFails() {
        final TikaContentExtractor another = new TikaContentExtractor(new PDFParser(), false);
        assertNull("Document should be null, it is not a PDF",
            another.extract(getClass().getResourceAsStream("/files/testRTF.rtf")));       
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.parser.pdf.PDFParser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.