Package org.apache.tika.mime

Examples of org.apache.tika.mime.MimeTypes


        return Normalizer.normalize(text, Normalizer.Form.NFC);
    }

    private String extractTextWithTika(byte[] textBytes, Metadata metadata) throws TikaException, SAXException, IOException {
        AutoDetectParser parser = new AutoDetectParser(new MimeTypes());
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        OutputStreamWriter writer = new OutputStreamWriter(baos, "UTF-8");
        ContentHandler handler = new BodyContentHandler(writer);
        ParseContext context = new ParseContext();
        context.set(PDFParserConfig.class, new LumifyParserConfig());
View Full Code Here


     * @return MIME type of the document
     * @throws IOException if the document stream could not be read
     */
    private MimeType getMimeType(InputStream stream, Metadata metadata)
            throws IOException {
        MimeTypes types = config.getMimeRepository();

        // Get type based on magic prefix
        stream.mark(types.getMinLength());
        try {
            byte[] prefix = getPrefix(stream, types.getMinLength());
            MimeType type = types.getMimeType(prefix);
            if (type != null) {
                return type;
            }
        } finally {
            stream.reset();
        }

        // Get type based on resourceName hint (if available)
        String resourceName = metadata.get(Metadata.RESOURCE_NAME_KEY);
        if (resourceName != null) {
            MimeType type = types.getMimeType(resourceName);
            if (type != null) {
                return type;
            }
        }

        // Get type based on metadata hint (if available)
        String typename = metadata.get(Metadata.CONTENT_TYPE);
        if (typename != null) {
            try {
                return types.forName(typename);
            } catch (MimeTypeException e) {
                // Malformed type name, ignore
            }
        }

        // Finally, use the default type if no matches found
        try {
            return types.forName(MimeTypes.DEFAULT);
        } catch (MimeTypeException e) {
            // Should never happen
            return null;
        }
    }
View Full Code Here

  private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class.getName());

  public MimeUtil(Configuration conf) {
    tika = new Tika();
    ObjectCache objectCache = ObjectCache.get(conf);
    MimeTypes mimeTypez = (MimeTypes) objectCache.getObject(MimeTypes.class
        .getName());
    if (mimeTypez == null) {
      try {
          String customMimeTypeFile = conf.get("mime.types.file");
          if (customMimeTypeFile!=null && customMimeTypeFile.equals("")==false){
View Full Code Here

             d.detect(tis, new Metadata())
       );
    }
   
    public void testTruncatedFiles() throws Exception {
        MimeTypes mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
        ContainerAwareDetector detector = new ContainerAwareDetector(mimeTypes);
       
        // First up a truncated OOXML (zip) file
        InputStream input = getTestDoc("testEXCEL.xlsx");
        byte [] buffer = new byte[300];
View Full Code Here

    String productId = "123";
    int refIndex = 0;

    // Create a new ReferenceResource using a Reference instance.
    Reference reference = new Reference("original", "dataStore", 1000,
      new MimeTypes().forName("text/plain"));

    ReferenceResource resource = new ReferenceResource(productId, refIndex,
      reference, new File("/tmp"));

View Full Code Here

    metadataEntries.put("CAS.Test", "test value");
    Metadata metadata = new Metadata();
    metadata.addMetadata(metadataEntries);

    Reference reference = new Reference("original", "dataStore", 1000,
      new MimeTypes().forName("text/plain"));
    List<Reference> references = new ArrayList<Reference>();
    references.add(reference);

    ProductType productType = new ProductType("1", "GenericFile", "test type",
      "repository", "versioner");
View Full Code Here

    ProductType productType = new ProductType("1", "GenericFile", "test type",
      "repository", "versioner");

    // Create a ProductResource using Reference, Metadata and Product instances.
    Reference reference1 = new Reference("original1", "dataStore1", 500,
      new MimeTypes().forName("text/plain"));
    List<Reference> references1 = new ArrayList<Reference>();
    references1.add(reference1);

    Hashtable metadataEntries1 = new Hashtable<String, Object>();
    metadataEntries1.put("product1_meta", "test1");
    Metadata metadata1 = new Metadata();
    metadata1.addMetadata(metadataEntries1);

    Product product1 = new Product();
    product1.setProductId("123");
    product1.setProductName("test.txt");
    product1.setProductType(productType);

    ProductResource productResource1 = new ProductResource(product1, metadata1,
      references1, new File("/tmp"));


    // Create another ProductResource using Reference, Metadata and Product
    // instances.
    Reference reference2 = new Reference("original2", "dataStore2", 1000,
      new MimeTypes().forName("application/pdf"));
    List<Reference> references2 = new ArrayList<Reference>();
    references2.add(reference2);

    Hashtable metadataEntries2 = new Hashtable<String, Object>();
    metadataEntries2.put("product2_meta", "test2");
View Full Code Here

    metadataEntries1.put("CAS.ProductReceivedTime", "2013-09-12T16:25:50.662Z");
    Metadata metadata1 = new Metadata();
    metadata1.addMetadata(metadataEntries1);

    Reference reference1 = new Reference("original1", "dataStore1", 1000,
      new MimeTypes().forName("text/plain"));

    ProductType productType1 = new ProductType("1", "TestType", "test type 1",
      "repository1", "versioner1");

    Product product1 = new Product();
    product1.setProductId("123");
    product1.setProductName("test product");
    product1.setProductStructure("flat");
    product1.setProductType(productType1);

    FileTransferStatus status1 = new FileTransferStatus(reference1, 1000, 100,
      product1);


    // Create another FileTransferStatus instance using Metadata, Reference,
    // ProductType and Product instances.
    Hashtable metadataEntries2 = new Hashtable<String, Object>();
    metadataEntries2.put("CAS.ProductReceivedTime", "2011-04-11T11:59:59.662Z");
    Metadata metadata2 = new Metadata();
    metadata2.addMetadata(metadataEntries2);

    Reference reference2 = new Reference("original2", "dataStore2", 500,
      new MimeTypes().forName("application/pdf"));

    ProductType productType2 = new ProductType("2", "TestType2", "test type 2",
        "repository2", "versioner2");

    Product product2 = new Product();
View Full Code Here

    metadataEntries.put("CAS.ProductReceivedTime", "2013-09-12T16:25:50.662Z");
    Metadata metadata = new Metadata();
    metadata.addMetadata(metadataEntries);

    Reference reference = new Reference("original", "dataStore", 1000,
      new MimeTypes().forName("text/plain"));

    ProductType productType = new ProductType("1", "GenericFile", "test type",
      "repository", "versioner");

    Product product = new Product();
View Full Code Here

    */
   public static void main(String[] args) throws DataTransferException,
         IOException, URISyntaxException {
      String usage = "LocalFileTransfer --productName <name> --productRepo <repo> [--dir <dirRef>] [--files <origRef 1>...<origRef N>]\n";

      MimeTypes mimeTypeRepo;
      try {
         mimeTypeRepo = MimeTypesFactory
               .create(System
                     .getProperty("org.apache.oodt.cas.filemgr.mime.type.repository"));
      } catch (MimeTypeException e) {
         e.printStackTrace();
         throw new IOException(e.getMessage());
      }

      String productName = null;
      String productRepo = null;
      String transferType = null;
      Reference dirReference = null;

      List<Reference> fileReferences = null;

      for (int i = 0; i < args.length; i++) {
         if (args[i].equals("--dir")) {
            transferType = "dir";
            dirReference = new Reference();
            dirReference.setOrigReference(new File(new URI(args[++i])).toURI()
                  .toString());
            LOG.log(Level.FINER,
                  "LocalFileTransfer.main: Generated orig reference: "
                        + dirReference.getOrigReference());
         } else if (args[i].equals("--files")) {
            transferType = "files";
            fileReferences = new Vector<Reference>();
            for (int j = i + 1; j < args.length; j++) {
               LOG.log(Level.FINER, "LocalFileTransfer.main: Adding file ref: "
                     + args[j]);
               fileReferences.add(new Reference(args[j], null,
                     new File(args[j]).length(), mimeTypeRepo
                           .getMimeType(args[j])));
            }
         } else if (args[i].equals("--productName")) {
            productName = args[++i];
         } else if (args[i].equals("--productRepo")) {
View Full Code Here

TOP

Related Classes of org.apache.tika.mime.MimeTypes

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.