Package org.apache.tika.metadata

Examples of org.apache.tika.metadata.Metadata.names()


        //make sure old metadata doesn't linger between objects
        assertFalse(Arrays.asList(meta_jpg.getValues("dc:subject")).contains("serbor"));
        assertEquals("false", meta_jpg.get(RTFMetadata.THUMBNAIL));
        assertEquals("false", meta_jpg_exif.get(RTFMetadata.THUMBNAIL));
       
        assertEquals(40, meta_jpg.names().length);
        assertEquals(105, meta_jpg.names().length);
    }
   
    //TIKA-1010 test linked embedded doc
    @Test
View Full Code Here


        assertFalse(Arrays.asList(meta_jpg.getValues("dc:subject")).contains("serbor"));
        assertEquals("false", meta_jpg.get(RTFMetadata.THUMBNAIL));
        assertEquals("false", meta_jpg_exif.get(RTFMetadata.THUMBNAIL));
       
        assertEquals(40, meta_jpg.names().length);
        assertEquals(105, meta_jpg.names().length);
    }
   
    //TIKA-1010 test linked embedded doc
    @Test
    public void testEmbeddedLinkedDocument() throws Exception {
View Full Code Here

        iWorkParser.parse(input, handler, metadata, parseContext);

        // Make sure enough keys came through
        // (Exact numbers will vary based on composites)
        assertTrue("Insufficient metadata found " + metadata.size(), metadata.size() >= 6);
        List<String> metadataKeys = Arrays.asList(metadata.names());
        assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.CONTENT_TYPE));
        assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.SLIDE_COUNT.getName()));
//        assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Office.SLIDE_COUNT.getName()));
        assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.CREATOR.getName()));
        assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.TITLE.getName()));
View Full Code Here

        iWorkParser.parse(input, handler, metadata, parseContext);

        // Make sure enough keys came through
        // (Exact numbers will vary based on composites)
        assertTrue("Insufficient metadata found " + metadata.size(), metadata.size() >= 50);
        List<String> metadataKeys = Arrays.asList(metadata.names());
        assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.CONTENT_TYPE));
        assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.PAGE_COUNT.getName()));
        assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.CREATOR.getName()));
        assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.TITLE.getName()));
        assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.LAST_MODIFIED.getName()));
View Full Code Here

        iWorkParser.parse(input, handler, metadata, parseContext);

        // Make sure enough keys came through
        // (Exact numbers will vary based on composites)
        assertTrue("Insufficient metadata found " + metadata.size(), metadata.size() >= 8);
        List<String> metadataKeys = Arrays.asList(metadata.names());
        assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.CONTENT_TYPE));
        assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.PAGE_COUNT.getName()));
        assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.CREATOR.getName()));
        assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.COMMENTS.getName()));
        assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.TITLE));
View Full Code Here

        metadata.add("json_escapes", "the: \"quick\" brown, fox");
       
        StringWriter writer = new StringWriter();
        JsonMetadata.toJson(metadata, writer);
        Metadata deserialized = JsonMetadata.fromJson(new StringReader(writer.toString()));
        assertEquals(7, deserialized.names().length);
        assertEquals(metadata, deserialized);

        //test that this really is 6 Chinese characters
        assertEquals(6, deserialized.get("alma_mater").length());
View Full Code Here

            document.add(getContentField(documentMetadata, content.getContent()));
        }
       
        if (extractMetadata) {
            Metadata metadata = content.getMetadata();
            for (final String property: metadata.names()) {
                document.add(getField(documentMetadata, property, metadata.get(property)));
            }
        }
       
        if (!StringUtils.isBlank(documentMetadata.getSource())) {
View Full Code Here

                UriRef xhtmlBlobUri = new UriRef("urn:tika:xhtml:"+random);
                ci.addPart(xhtmlBlobUri,  xhtmlSink.getBlob());
            }
            //add the extracted metadata
            if(log.isInfoEnabled()){
                for(String name : metadata.names()){
                    log.info("{}: {}",name,Arrays.toString(metadata.getValues(name)));
                }
            }
            ci.getLock().writeLock().lock();
            try {
View Full Code Here

                totalRead = 0;

                //append meta data if last chunk
                if (eof) {
                    //sort meta data keys
                    List<String> sortedKeyList = Arrays.asList(meta.names());
                    Collections.sort(sortedKeyList);
                    sb.append("\n\n------------------------------METADATA------------------------------\n\n"); //NON-NLS
                    for (String key : sortedKeyList) {
                        String value = meta.get(key);
                        sb.append(key).append(": ").append(value).append("\n");
View Full Code Here

            + content.getUrl());
      }
    }

    // populate Nutch metadata with Tika metadata
    String[] TikaMDNames = tikamd.names();
    for (String tikaMDName : TikaMDNames) {
      if (tikaMDName.equalsIgnoreCase(Metadata.TITLE))
        continue;
      // TODO what if multivalued?
      nutchMetadata.add(tikaMDName, tikamd.get(tikaMDName));
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.