Package org.xml.sax

Examples of org.xml.sax.ContentHandler


        assertTrue(handler.toString().contains(text));
    }

    public void testEmptyText() throws Exception {
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();
        parser.parse(
                new ByteArrayInputStream(new byte[0]), handler, metadata, new ParseContext());
        assertEquals("text/plain", metadata.get(Metadata.CONTENT_TYPE));
        assertEquals("\n", handler.toString());
    }
View Full Code Here


        assertEquals("ISO-8859-1", metadata.get(Metadata.CONTENT_ENCODING));
    }

    private void assertExtractText(String msg, String expected, byte[] input)
            throws Exception {
        ContentHandler handler = new BodyContentHandler() {
            public void ignorableWhitespace(char[] ch, int off, int len) {
                // Ignore the whitespace added by XHTMLContentHandler
            }
        };
        Metadata metadata = new Metadata();
        parser.parse(new ByteArrayInputStream(input), handler, metadata, new ParseContext());
        assertEquals("text/plain", metadata.get(Metadata.CONTENT_TYPE));
        assertEquals(msg, expected, handler.toString());
    }
View Full Code Here

        Parser parser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        // TODO: should auto-detect without the resource name
        metadata.set(Metadata.RESOURCE_NAME_KEY, "testEXCEL.xlsx");
        ContentHandler handler = new BodyContentHandler();

        try {
            parser.parse(input, handler, metadata);

            assertEquals(
                    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                    metadata.get(Metadata.CONTENT_TYPE));
            assertEquals("Simple Excel document", metadata.get(Metadata.TITLE));
            assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
            String content = handler.toString();
            assertTrue(content.contains("Sample Excel Worksheet"));
            assertTrue(content.contains("Numbers and their Squares"));
            assertTrue(content.contains("9"));
            assertFalse(content.contains("9.0"));
            assertTrue(content.contains("196"));
View Full Code Here

        Parser parser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        // TODO: should auto-detect without the resource name
        metadata.set(Metadata.RESOURCE_NAME_KEY, "testEXCEL-formats.xlsx");
        ContentHandler handler = new BodyContentHandler();

        try {
            parser.parse(input, handler, metadata);

            assertEquals(
                    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                    metadata.get(Metadata.CONTENT_TYPE));

            String content = handler.toString();

            // Number #,##0.00
            assertTrue(content.contains("1,599.99"));
            assertTrue(content.contains("-1,599.99"));
View Full Code Here

        Parser parser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        // TODO: should auto-detect without the resource name
        metadata.set(Metadata.RESOURCE_NAME_KEY, "testPPT.pptx");
        ContentHandler handler = new BodyContentHandler();

        try {
            parser.parse(input, handler, metadata);

            assertEquals(
                    "application/vnd.openxmlformats-officedocument.presentationml.presentation",
                    metadata.get(Metadata.CONTENT_TYPE));
            assertEquals("Sample Powerpoint Slide", metadata.get(Metadata.TITLE));
            assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
            String content = handler.toString();
            assertTrue(content.contains("Sample Powerpoint Slide"));
            assertTrue(content.contains("Powerpoint X for Mac"));
        } finally {
            input.close();
        }
View Full Code Here

        Parser parser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        // TODO: should auto-detect without the resource name
        metadata.set(Metadata.RESOURCE_NAME_KEY, "testWORD.docx");
        ContentHandler handler = new BodyContentHandler();

        try {
            parser.parse(input, handler, metadata);

            assertEquals(
                    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
                    metadata.get(Metadata.CONTENT_TYPE));
            assertEquals("Sample Word Document", metadata.get(Metadata.TITLE));
            assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
            assertTrue(handler.toString().contains("Sample Word Document"));
        } finally {
            input.close();
        }
    }
View Full Code Here

    public void testParseAscii() throws Exception {
        String path = "/test-documents/testHTML.html";
        final StringWriter href = new StringWriter();
        final StringWriter name = new StringWriter();
        ContentHandler body = new BodyContentHandler();
        Metadata metadata = new Metadata();
        InputStream stream = HtmlParserTest.class.getResourceAsStream(path);
        try {
            ContentHandler link = new DefaultHandler() {
                @Override
                public void startElement(
                        String u, String l, String n, Attributes a)
                        throws SAXException {
                    if ("a".equals(l)) {
View Full Code Here

        assertTrue(content.contains("extract content"));
        assertTrue(content.contains("an XHTML document"));
    }

    public void testParseEmpty() throws Exception {
        ContentHandler handler = new BodyContentHandler();
        new HtmlParser().parse(
                new ByteArrayInputStream(new byte[0]),
                handler,  new Metadata(), new ParseContext());
        assertEquals("", handler.toString());
    }
View Full Code Here

                    throw new SAXException(e);
                }
            }
            DOMSource source = new DOMSource(node);

            ContentHandler handler;
            if (node.getNodeType() == Node.DOCUMENT_NODE) {
                // Pass all SAX events
            handler = contentHandler;
            } else {
                // Strip start/endDocument
View Full Code Here

        try {
            Metadata metadata = new Metadata();
            metadata.set(Metadata.RESOURCE_NAME_KEY, tp.resourceStatedName);
            metadata.set(Metadata.CONTENT_TYPE, tp.statedType);
            StringWriter writer = new StringWriter();
            ContentHandler handler = new WriteOutContentHandler(writer);
            new AutoDetectParser().parse(input, handler, metadata);

            assertEquals("Bad content type: " + tp,
                    tp.realType, metadata.get(Metadata.CONTENT_TYPE));
View Full Code Here

TOP

Related Classes of org.xml.sax.ContentHandler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.