Package org.apache.tika.sax

Examples of org.apache.tika.sax.WriteOutContentHandler


       
        Metadata metadata = new Metadata();
        StringWriter writer = new StringWriter();
        tika.getParser().parse(
                     new FileInputStream(file),
                     new WriteOutContentHandler(writer),
                     metadata,
                     new ParseContext());
        String content = writer.toString();

        assertEquals("application/rtf", metadata.get(Metadata.CONTENT_TYPE));
View Full Code Here


      
        Metadata metadata = new Metadata();
        StringWriter writer = new StringWriter();
        tika.getParser().parse(
                     new FileInputStream(file),
                     new WriteOutContentHandler(writer),
                     metadata,
                     new ParseContext());
        String content = writer.toString();
        return new Result(content, metadata);
    }
View Full Code Here

               try
               {
                  Metadata metadata = new Metadata();
                  metadata.set(Metadata.CONTENT_TYPE, mimeType);

                  ContentHandler handler = new WriteOutContentHandler(MAX_READED_SIZE);
                  ParseContext context = new ParseContext();
                  context.set(Parser.class, parser);
                  try
                  {
                     parser.parse(is, handler, metadata, context);
View Full Code Here

               try
               {
                  Metadata metadata = new Metadata();
                  metadata.set(Metadata.CONTENT_TYPE, mimeType);

                  ContentHandler handler = new WriteOutContentHandler(MAX_READED_SIZE);
                  ParseContext context = new ParseContext();
                  context.set(Parser.class, parser);
                  try
                  {
                     parser.parse(is, handler, metadata, context);
View Full Code Here

       
        Metadata metadata = new Metadata();
        StringWriter writer = new StringWriter();
        tika.getParser().parse(
                     new FileInputStream(file),
                     new WriteOutContentHandler(writer),
                     metadata,
                     new ParseContext());
        String content = writer.toString();

        assertEquals("application/rtf", metadata.get(Metadata.CONTENT_TYPE));
View Full Code Here

      
        Metadata metadata = new Metadata();
        StringWriter writer = new StringWriter();
        tika.getParser().parse(
                     new FileInputStream(file),
                     new WriteOutContentHandler(writer),
                     metadata,
                     new ParseContext());
        String content = writer.toString();
        return new Result(content, metadata);
    }
View Full Code Here

        Metadata metadata = new Metadata();
        StringWriter writer = new StringWriter();
        parser.parse(
                new ByteArrayInputStream(text.getBytes("UTF-8")),
                new WriteOutContentHandler(writer),
                metadata,
                new ParseContext());
        String content = writer.toString();

        assertEquals("text/plain", metadata.get(Metadata.CONTENT_TYPE));
View Full Code Here

    public void testCP866() throws Exception {
        Metadata metadata = new Metadata();
        StringWriter writer = new StringWriter();
        parser.parse(
                TXTParserTest.class.getResourceAsStream("/test-documents/russian.cp866.txt"),
                new WriteOutContentHandler(writer),
                metadata,
                new ParseContext());

        assertEquals("text/plain", metadata.get(Metadata.CONTENT_TYPE));
        assertEquals("IBM866", metadata.get(Metadata.CONTENT_ENCODING));
View Full Code Here

    public void testEBCDIC_CP500() throws Exception {
        Metadata metadata = new Metadata();
        StringWriter writer = new StringWriter();
        parser.parse(
                TXTParserTest.class.getResourceAsStream("/test-documents/english.cp500.txt"),
                new WriteOutContentHandler(writer),
                metadata,
                new ParseContext());

        assertEquals("text/plain", metadata.get(Metadata.CONTENT_TYPE));
        assertEquals("IBM500", metadata.get(Metadata.CONTENT_ENCODING));
       
        // Additional check that it isn't too eager on short blocks of text
        metadata = new Metadata();
        writer = new StringWriter();
        parser.parse(
                new ByteArrayInputStream("<html><body>hello world</body></html>".getBytes("UTF-8")),
                new WriteOutContentHandler(writer),
                metadata,
                new ParseContext());

        assertNotSame("IBM500", metadata.get(Metadata.CONTENT_ENCODING));
    }
View Full Code Here

     * @throws IOException if the document can not be read
     * @throws TikaException if the document can not be parsed
     */
    public String parseToString(InputStream stream, Metadata metadata)
            throws IOException, TikaException {
        WriteOutContentHandler handler =
            new WriteOutContentHandler(maxStringLength);
        try {
            ParseContext context = new ParseContext();
            context.set(Parser.class, parser);
            parser.parse(
                    stream, new BodyContentHandler(handler), metadata, context);
        } catch (SAXException e) {
            if (!handler.isWriteLimitReached(e)) {
                // This should never happen with BodyContentHandler...
                throw new TikaException("Unexpected SAX processing failure", e);
            }
        } finally {
            stream.close();
        }
        return handler.toString();
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.sax.WriteOutContentHandler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.