Package org.apache.tika.parser

Examples of org.apache.tika.parser.ParseContext


            }
        }
    }

    public TikaConfig() throws MimeTypeException, IOException {
        ParseContext context = new ParseContext();
        Iterator<Parser> iterator =
            ServiceRegistry.lookupProviders(Parser.class);
        while (iterator.hasNext()) {
            Parser parser = iterator.next();
            for (MediaType type : parser.getSupportedTypes(context)) {
View Full Code Here


          throw new IllegalArgumentException("Parameter must be instance of byte[]");
        }

        ContentHandler textHandler = new BodyContentHandler(fileLengthLimit);
        Metadata metadata = new Metadata();
        ParseContext context = new ParseContext();

        try {
          metadata.set(Metadata.CONTENT_TYPE, tika.detect(inputStream));

          parser.parse(inputStream, textHandler, metadata, context);
View Full Code Here

    private String profileName = null;

    private boolean prettyPrint;
   
    public TikaCLI() throws Exception {
        context = new ParseContext();
        detector = new DefaultDetector();
        parser = new AutoDetectParser(detector);
        context.set(Parser.class, parser);
        context.set(PasswordProvider.class, new PasswordProvider() {
            public String getPassword(Metadata metadata) {
View Full Code Here

        // Package extraction
        ContentHandler handler = new BodyContentHandler();

        Parser parser = tika.getParser();
        ParseContext context = new ParseContext();
        context.set(Parser.class, parser);

        InputStream stream =
                new FileInputStream("src/test/resources/test-documents.zip");
        try {
            parser.parse(stream, handler, new Metadata(), context);
View Full Code Here

        StringBuilder wBuf = new StringBuilder();
        InputStream stream = null;
        Metadata metadata = new Metadata();
        HtmlParser htmlParser = new HtmlParser();
        BodyContentHandler handler = new BodyContentHandler(-1);// -1
        ParseContext parser = new ParseContext();
        try {
            stream = new ByteArrayInputStream(byteObject);
            htmlParser.parse(stream, handler, metadata, parser);
            wBuf.append(handler.toString()
                    + System.getProperty("line.separator"));
View Full Code Here

    protected XMLResult getXML(String filePath) throws Exception {
        return getXML(getResourceAsStream("/test-documents/" + filePath), new AutoDetectParser(), new Metadata());
    }

    protected XMLResult getXML(InputStream input, Parser parser, Metadata metadata) throws Exception {
      ParseContext context = new ParseContext();
      context.set(Parser.class, parser);

      try {
          ContentHandler handler = new ToXMLContentHandler();
          parser.parse(input, handler, metadata, context);
          return new XMLResult(handler.toString(), metadata);
View Full Code Here

        }
        return handler.toString();
    }

    public String getText(InputStream is, Parser parser, Metadata metadata) throws Exception{
        return getText(is, parser, new ParseContext(), metadata);
    }
View Full Code Here

    public String getText(InputStream is, Parser parser, ParseContext context) throws Exception{
        return getText(is, parser, context, new Metadata());
    }

    public String getText(InputStream is, Parser parser) throws Exception{
        return getText(is, parser, new ParseContext(), new Metadata());
    }
View Full Code Here

        Parser parser = tika.getParser();
        Metadata metadata = new Metadata();
        InputStream stream = new FileInputStream(file);
        try {
            parser.parse(
                    stream, new DefaultHandler(), metadata, new ParseContext());
        } finally {
            stream.close();
        }
        assertEquals("Sample Word Document", metadata.get(TikaCoreProperties.TITLE));
    }
View Full Code Here

        Parser parser = tika.getParser();
        Metadata metadata = new Metadata();
        InputStream stream = new FileInputStream(file);
        try {
            parser.parse(
                    stream, new DefaultHandler(), metadata, new ParseContext());
        } finally {
            stream.close();
        }
        assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.parser.ParseContext

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.