Package org.apache.tika.metadata

Examples of org.apache.tika.metadata.Metadata


     * @see <a href="https://issues.apache.org/jira/browse/TIKA-483">TIKA-483</a>
     */
    @Test
    public void testEmptyDocument() throws IOException {
        assertEquals(MediaType.OCTET_STREAM, mimeTypes.detect(
                new ByteArrayInputStream(new byte[0]), new Metadata()));

        Metadata namehint = new Metadata();
        namehint.set(Metadata.RESOURCE_NAME_KEY, "test.txt");
        assertEquals(MediaType.TEXT_PLAIN, mimeTypes.detect(
                new ByteArrayInputStream(new byte[0]), namehint));

        Metadata typehint = new Metadata();
        typehint.set(Metadata.CONTENT_TYPE, "text/plain");
        assertEquals(MediaType.TEXT_PLAIN, mimeTypes.detect(
                new ByteArrayInputStream(new byte[0]), typehint));

    }
View Full Code Here


     */
    @Test
    public void testNotXML() throws IOException {
        assertEquals(MediaType.TEXT_PLAIN, mimeTypes.detect(
                new ByteArrayInputStream("<!-- test -->".getBytes("UTF-8")),
                new Metadata()));
    }
View Full Code Here

          assertEquals("A \"Hello World\" file", hwf.getDescription());
          assertEquals(".hello.world", hwf.getExtension());
         
          // Check that we can correct detect with the file one:
          // By name
          Metadata m = new Metadata();
          m.add(Metadata.RESOURCE_NAME_KEY, "test.hello.world");
          assertEquals(hwf.toString(), this.mimeTypes.detect(null, m).toString());
         
          // By contents
          m = new Metadata();
          ByteArrayInputStream s = new ByteArrayInputStream(
                "Hello, World!".getBytes("ASCII"));
          assertEquals(hwf.toString(), this.mimeTypes.detect(s, m).toString());
       } catch (Exception e) {
          fail(e.getMessage());
View Full Code Here

                    try {
                        try {
                            InputStream rawInput = socket.getInputStream();
                            OutputStream output = socket.getOutputStream();
                            InputStream input = TikaInputStream.get(rawInput);
                            type.process(input, output, new Metadata());
                            output.flush();
                        } finally {
                            socket.close();
                        }
                    } catch (Exception e) {
View Full Code Here

     * Extracts data from byte[]
     */
    private String extract(byte[] byteObject) throws TikaException {// throws IOException
        StringBuilder wBuf = new StringBuilder();
        InputStream stream = null;
        Metadata metadata = new Metadata();
        HtmlParser htmlParser = new HtmlParser();
        BodyContentHandler handler = new BodyContentHandler(-1);// -1
        ParseContext parser = new ParseContext();
        try {
            stream = new ByteArrayInputStream(byteObject);
View Full Code Here

            this.metadata = metadata;
        }
    }

    protected XMLResult getXML(String filePath) throws Exception {
        return getXML(getResourceAsStream("/test-documents/" + filePath), new AutoDetectParser(), new Metadata());
    }
View Full Code Here

    public String getText(InputStream is, Parser parser, Metadata metadata) throws Exception{
        return getText(is, parser, new ParseContext(), metadata);
    }

    public String getText(InputStream is, Parser parser, ParseContext context) throws Exception{
        return getText(is, parser, context, new Metadata());
    }
View Full Code Here

    public String getText(InputStream is, Parser parser, ParseContext context) throws Exception{
        return getText(is, parser, context, new Metadata());
    }

    public String getText(InputStream is, Parser parser) throws Exception{
        return getText(is, parser, new ParseContext(), new Metadata());
    }
View Full Code Here

    @Test
    public void testWORDxtraction() throws Exception {
        File file = getResourceAsFile("/test-documents/testWORD.doc");
        Parser parser = tika.getParser();
        Metadata metadata = new Metadata();
        InputStream stream = new FileInputStream(file);
        try {
            parser.parse(
                    stream, new DefaultHandler(), metadata, new ParseContext());
        } finally {
            stream.close();
        }
        assertEquals("Sample Word Document", metadata.get(TikaCoreProperties.TITLE));
    }
View Full Code Here

        File file = getResourceAsFile("/test-documents/testEXCEL.xls");
        String s1 = tika.parseToString(file);
        assertTrue("Text does not contain '" + expected + "'", s1
                .contains(expected));
        Parser parser = tika.getParser();
        Metadata metadata = new Metadata();
        InputStream stream = new FileInputStream(file);
        try {
            parser.parse(
                    stream, new DefaultHandler(), metadata, new ParseContext());
        } finally {
            stream.close();
        }
        assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.metadata.Metadata

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.