Package com.ikanow.infinit.e.harvest.extraction.text.legacy

Examples of com.ikanow.infinit.e.harvest.extraction.text.legacy.TextExtractorTika


          try {
            if ((null != source.useTextExtractor()) && source.useTextExtractor().equalsIgnoreCase("tika")) {
              // Special case: if tika enabled then do that first
              if (null == tikaExtractor) {
                tikaExtractor = new TextExtractorTika();
                tikaExtractor.extractText(d);
              }
            }
            else {
              this.getRawTextFromUrlIfNeeded(d, source.getRssConfig());
View Full Code Here


        }
       
        if ((null != source.useTextExtractor()) && source.useTextExtractor().equalsIgnoreCase("tika")) {
          // Special case: if tika enabled then do that first
          if (null == tikaExtractor) {
            tikaExtractor = new TextExtractorTika();
            tikaExtractor.extractText(doc);
          }
        }
        else {
          getRawTextFromUrlIfNeeded(doc, source.getRssConfig());
View Full Code Here

    }
    if (tmpUrl.endsWith(".pdf") || tmpUrl.endsWith(".doc") || tmpUrl.endsWith(".docx") || tmpUrl.endsWith(".xls") || tmpUrl.endsWith(".xlsx"))
    {
      //(eventually should detect error from AApi and send to tika on certain error types)
      if (null == _tikaExtractor) {
        _tikaExtractor = new TextExtractorTika();
      }
      _tikaExtractor.extractText(partialDoc);
      return;
    }
    //TESTED
View Full Code Here

    }
    catch (Exception e) {
      logger.warn("Can't use Boilerpipe as text extractor: " + e.getMessage());     
    }
    try {
      text_extractor_mappings.put("tika", new TextExtractorTika());
    }
    catch (Exception e) {
      logger.warn("Can't use Tika as text extractor: " + e.getMessage());     
    }
View Full Code Here

TOP

Related Classes of com.ikanow.infinit.e.harvest.extraction.text.legacy.TextExtractorTika

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.