Package org.apache.tika

Examples of org.apache.tika.Tika


  /* our log stream */
  private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class.getName());

  public MimeUtil(Configuration conf) {
    tika = new Tika();
    ObjectCache objectCache = ObjectCache.get(conf);
    MimeTypes mimeTypez = (MimeTypes) objectCache.getObject(MimeTypes.class
        .getName());
    if (mimeTypez == null) {
      try {
View Full Code Here


        || (type != null && type.getName().equals(MimeTypes.OCTET_STREAM))) {
      // If no mime-type header, or cannot find a corresponding registered
      // mime-type, then guess a mime-type from the url pattern
      try {
        TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
        Tika tika = new Tika(tikaConfig);
        retType = tika.detect(url) != null ? tika.detect(url) : null;
      } catch (Exception e) {
        String message = "Problem loading default Tika configuration";
        LOG.error(message, e);
        throw new RuntimeException(e);
      }
    } else {
        retType = type.getName();
    }

    // if magic is enabled use mime magic to guess if the mime type returned
    // from the magic guess is different than the one that's already set so far
    // if it is, and it's not the default mime type, then go with the mime type
    // returned by the magic
    if (this.mimeMagic) {
      magicType = tika.detect(data);

      // Deprecated in Tika 1.0 See https://issues.apache.org/jira/browse/NUTCH-1230
      //MimeType magicType = this.mimeTypes.getMimeType(data);
      if (magicType != null && !magicType.equals(MimeTypes.OCTET_STREAM)
          && !magicType.equals(MimeTypes.PLAIN_TEXT)
View Full Code Here

    this.base = url.toString();
    this.file = file;
    this.conf = conf;
   
    MIME = new MimeUtil(conf);
    tika = new Tika();

    if (!"file".equals(url.getProtocol()))
      throw new FileException("Not a file url:" + url);

    if (File.LOG.isTraceEnabled()) {
View Full Code Here

              s.close();
              partialDoc.setFullText(text);                                     
            }
            else { // not HTML, send to tika instead
              if (null == _tika) {
                _tika = new Tika();
              }
              Metadata metadata = new Metadata();
              text = _tika.parseToString(urlStream, metadata);
              partialDoc.setFullText(text);         
              TextExtractorTika.addMetadata(partialDoc, metadata);
View Full Code Here

        }//TESTED
       
      }//TESTED
    }//(end if has options)
   
    _tika = new Tika(TikaConfig.getDefaultConfig().getDetector(), autoDetectParser);
  }//TESTED (apart from unused number option configuration)
View Full Code Here

        }//TESTED
       
      }//TESTED
    }//(end if has options)
   
    _tika = new Tika(TikaConfig.getDefaultConfig().getDetector(), autoDetectParser);
   
  }//TESTED (apart from unused number option configuration)
View Full Code Here

     * @param bytes for validation
     * @throws ImageFormatException invalid format image processing error
     */
    public void validateImageFormat(byte[] bytes) throws ImageFormatException {
        Validate.notNull(bytes, "Incoming byte array cannot be null");
        Tika tika = new Tika();
        InputStream input = new ByteArrayInputStream(bytes);
        try {
            String type = tika.detect(input);
            if (!VALID_IMAGE_TYPES.contains(type)) {
                LOGGER.debug("Wrong file extension. May be only {}", VALID_IMAGE_EXTENSIONS);
                throw new ImageFormatException(VALID_IMAGE_EXTENSIONS);
            }
        } catch (IOException e) {
View Full Code Here

     * @throws ImageProcessException image conversion problem.
     */
    public BufferedImage convertByteArrayToImage(byte[] bytes) throws ImageProcessException {
        BufferedImage result;
        BufferedInputStream bis = new BufferedInputStream(new ByteArrayInputStream(bytes));
        Tika tika = new Tika();
        try {
            String type = tika.detect(bis);
            if (type.contains(ImageService.ICO_TYPE)) {
                result = ICODecoder.read(bis).get(0);
            } else {
                result = ImageIO.read(bis);
            }
View Full Code Here

    this.base = url.toString();
    this.file = file;
    this.conf = conf;
   
    MIME = new MimeUtil(conf);
    tika = new Tika();

    if (!"file".equals(url.getProtocol()))
      throw new FileException("Not a file url:" + url);

    if (File.LOG.isTraceEnabled()) {
View Full Code Here

  /* our log stream */
  private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class.getName());

  public MimeUtil(Configuration conf) {
    tika = new Tika();
    ObjectCache objectCache = ObjectCache.get(conf);
    MimeTypes mimeTypez = (MimeTypes) objectCache.getObject(MimeTypes.class
        .getName());
    if (mimeTypez == null) {
      try {
View Full Code Here

TOP

Related Classes of org.apache.tika.Tika

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.