Package railo.runtime.search.lucene2.docs

Source Code of railo.runtime.search.lucene2.docs.WordDocument

package railo.runtime.search.lucene2.docs;

import java.io.IOException;
import java.io.InputStream;

import org.apache.lucene.document.Document;
import org.textmining.text.extraction.WordExtractor;

import railo.commons.io.IOUtil;
import railo.commons.io.res.Resource;
import railo.commons.lang.StringUtil;
import railo.runtime.op.Caster;

/** A utility for making Lucene Documents from a File. */

public final class WordDocument {
   
    private static final int SUMMERY_SIZE=20;
    //private static final char FILE_SEPARATOR = System.getProperty("file.separator").charAt(0);
   
  /** Makes a document for a File.
    <p>
    The document has three fields:
    <ul>
    <li><code>path</code>--containing the pathname of the file, as a stored,
    tokenized field;
    <li><code>modified</code>--containing the last modified date of the file as
    a keyword field as encoded by <a
    href="lucene.document.DateField.html">DateField</a>; and
    <li><code>contents</code>--containing the full contents of the file, as a
    Reader field;
* @param res
* @return matching document
* @throws IOException
    */
    public static Document getDocument(Resource res) throws IOException {
  
      // make a new, empty document
      Document doc = new Document();     
      InputStream is =null;
      try{
        is=IOUtil.toBufferedInputStream(res.getInputStream());
        addContent(null,doc,is);
      }
      finally{
        IOUtil.closeEL(is);
      }
        return doc;
    }
   
    public static Document getDocument(StringBuffer content, InputStream is) throws IOException {
     Document doc = new Document();
      addContent(content,doc,is);
      return doc;
    }
 
 

    private static void addContent(StringBuffer content, Document doc, InputStream is) throws IOException {
      FieldUtil.setMimeType(doc, "application/msword");
      WordExtractor extractor = new WordExtractor();
        String contents;
    try {
      contents = extractor.extractText(is);
      if(content!=null)content.append(contents);
    } catch (Exception e) {
      if(e instanceof IOException) throw (IOException)e;
      throw new IOException(e.getMessage());
    }
        doc.add(FieldUtil.Text("size", Caster.toString(contents.length())));
        FieldUtil.setRaw(doc,contents);
        FieldUtil.setContent(doc, contents);
        //doc.add(FieldUtil.Text("contents", contents.toLowerCase()));
        FieldUtil.setSummary(doc, StringUtil.max(contents,SUMMERY_SIZE),false);
        //doc.add(FieldUtil.UnIndexed("summary",));
  }



private WordDocument() {}
}
   
TOP

Related Classes of railo.runtime.search.lucene2.docs.WordDocument

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.