Package org.apache.tika.metadata

Examples of org.apache.tika.metadata.Metadata


    public void testExcelParserFormatting() throws Exception {
        InputStream input = ExcelParserTest.class.getResourceAsStream(
                "/test-documents/testEXCEL-formats.xls");
        try {
            Metadata metadata = new Metadata();
            ContentHandler handler = new BodyContentHandler();
            new OfficeParser().parse(input, handler, metadata);

            assertEquals(
                    "application/vnd.ms-excel",
                    metadata.get(Metadata.CONTENT_TYPE));

            String content = handler.toString();

            // Number #,##0.00
            assertTrue(content.contains("1,599.99"));
View Full Code Here


    public void testProtectedExcel() throws Exception {
        InputStream input = OOXMLParserTest.class
                .getResourceAsStream("/test-documents/protected.xlsx");

        Parser parser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        ContentHandler handler = new BodyContentHandler();

        try {
            parser.parse(input, handler, metadata);

            assertEquals(
                    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                    metadata.get(Metadata.CONTENT_TYPE));

            assertEquals("true", metadata.get(TikaMetadataKeys.PROTECTED));
        } finally {
            input.close();
        }
    }
View Full Code Here

        } else {
          throw new IllegalArgumentException("Parameter must be instance of byte[]");
        }

        ContentHandler textHandler = new BodyContentHandler(fileLengthLimit);
        Metadata metadata = new Metadata();
        ParseContext context = new ParseContext();

        try {
          metadata.set(Metadata.CONTENT_TYPE, tika.detect(inputStream));

          parser.parse(inputStream, textHandler, metadata, context);

          bean.set(headingField, metadata.get(TikaCoreProperties.TITLE));

          if (bean.get(createTimestampField) == null) {
            bean.set(createTimestampField, metadata.get(TikaCoreProperties.CREATED));
          }
          if (bean.get(editTimestampField) == null) {
            bean.set(editTimestampField, metadata.get(TikaCoreProperties.MODIFIED));
          }
          if (bean.get(keywordsField) == null) {
            bean.set(keywordsField, metadata.get(TikaCoreProperties.KEYWORDS));
          }
          if (bean.get(publishTimestampField) == null) {
            bean.set(publishTimestampField, metadata.get(TikaCoreProperties.PRINT_DATE));
          }
          if (bean.get(mimetypeField) == null) {
            //HttpHeaders.CONTENT_TYPE
            bean.set(mimetypeField, metadata.get(Metadata.CONTENT_TYPE));
          }

          String content = prepareContent(bean, textHandler);
          bean.set(this.targetAttributeField, content);
View Full Code Here

                new TikaServer(Integer.parseInt(arg)).start();
            } else if (arg.equals("-")) {
                InputStream stream =
                    TikaInputStream.get(new CloseShieldInputStream(System.in));
                try {
                    type.process(stream, System.out, new Metadata());
                } finally {
                    stream.close();
                }
            } else {
                URL url;
                File file = new File(arg);
                if (file.isFile()) {
                    url = file.toURI().toURL();
                } else {
                    url = new URL(arg);
                }
                Metadata metadata = new Metadata();
                InputStream input = TikaInputStream.get(url, metadata);
                try {
                    type.process(input, System.out, metadata);
                } finally {
                    input.close();
View Full Code Here

        context.set(Parser.class, parser);

        InputStream stream =
                new FileInputStream("src/test/resources/test-documents.zip");
        try {
            parser.parse(stream, handler, new Metadata(), context);
        } finally {
            stream.close();
        }

        String content = handler.toString();
View Full Code Here

        // Finally, assume plain text if no control bytes are found
        try {
            TextDetector detector = new TextDetector(getMinLength());
            ByteArrayInputStream stream = new ByteArrayInputStream(data);
            return forName(detector.detect(stream, new Metadata()).toString());
        } catch (Exception e) {
            return rootMimeType;
        }
    }
View Full Code Here

    @Test
    public void testOutputStream() throws Exception {
        ByteArrayOutputStream buffer = new ByteArrayOutputStream();

        XHTMLContentHandler xhtml = new XHTMLContentHandler(
                new BodyContentHandler(buffer), new Metadata());
        xhtml.startDocument();
        xhtml.element("p", "Test text");
        xhtml.endDocument();

        assertEquals("Test text\n", buffer.toString());
View Full Code Here

    @Test
    public void testByteOrderMark() throws Exception {
        assertEquals(MediaType.TEXT_PLAIN, mimeTypes.detect(
                new ByteArrayInputStream("\ufefftest".getBytes("UTF-16LE")),
                new Metadata()));
        assertEquals(MediaType.TEXT_PLAIN, mimeTypes.detect(
                new ByteArrayInputStream("\ufefftest".getBytes("UTF-16BE")),
                new Metadata()));
        assertEquals(MediaType.TEXT_PLAIN, mimeTypes.detect(
                new ByteArrayInputStream("\ufefftest".getBytes("UTF-8")),
                new Metadata()));
    }
View Full Code Here

        }
        for (Map.Entry<String,COSObjectable> ent : embeddedFileNames.entrySet()) {
            PDComplexFileSpecification spec = (PDComplexFileSpecification) ent.getValue();
            PDEmbeddedFile file = spec.getEmbeddedFile();

            Metadata metadata = new Metadata();
            // TODO: other metadata?
            metadata.set(Metadata.RESOURCE_NAME_KEY, ent.getKey());
            metadata.set(Metadata.CONTENT_TYPE, file.getSubtype());
            metadata.set(Metadata.CONTENT_LENGTH, Long.toString(file.getSize()));

            if (embeddedExtractor.shouldParseEmbedded(metadata)) {
                TikaInputStream stream = TikaInputStream.get(file.createInputStream());
                try {
                    embeddedExtractor.parseEmbedded(
View Full Code Here

        assertNotNull("Test stream: ["+urlOrFileName+"] is null!", in);
        if (!in.markSupported()) {
            in = new java.io.BufferedInputStream(in);
        }
        try {
            Metadata metadata = new Metadata();
            String mime = this.mimeTypes.detect(in, metadata).toString();
            assertEquals(urlOrFileName + " is not properly detected: detected.", expected, mime);

            //Add resource name and test again
            metadata.set(Metadata.RESOURCE_NAME_KEY, urlOrFileName);
            mime = this.mimeTypes.detect(in, metadata).toString();
            assertEquals(urlOrFileName + " is not properly detected after adding resource name.", expected, mime);
        } finally {
            in.close();
        }       
View Full Code Here

TOP

Related Classes of org.apache.tika.metadata.Metadata

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.