Package org.sf.mustru.docs

Source Code of org.sf.mustru.docs.ArticleDocDb

package org.sf.mustru.docs;

import java.io.IOException;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.sf.mustru.crawl.ClassifyDoc;

import com.sleepycat.bind.tuple.TupleBinding;
import com.sleepycat.bind.tuple.TupleInput;
import com.sleepycat.bind.tuple.TupleOutput;

/**
* Article documents that resemble a short newspaper or technical article
*/
public class ArticleDoc extends IndexableDoc
{
  private String textType = "";   
 
  /**
   * A class for article documents
   * String (Optional) The name of the file containing the text
   */
  public ArticleDoc() { super(); setBdbBinding( new ArticleDocDb() ); }
  public ArticleDoc(String ifile) { super(ifile); setBdbBinding( new ArticleDocDb() ); }
 
  /**
   * load text specific information, type of text - article, book, etc.
   * category of text
   */
  public void loadSpecific (ClassifyDoc cdoc)
   { 
    setTextType(cdoc.classifyTextContents(this));
    setFileType("article");

    //*-- identify the title and authors
   }

  //*-- create the Lucene Index
  public void loadIndex(IndexWriter ramIW, boolean storeTermVector) throws IOException
  {
   Document doc = new Document();
   doc.add( new Field("key", getFileName(), Field.Store.YES, Field.Index.NO) );
   doc.add(new Field("contents", getContents().toString(), Field.Store.NO, Field.Index.TOKENIZED) );
   doc.add( new Field("type", getFileType(), Field.Store.YES, Field.Index.NO) );
   doc.add( new Field("category", getTextType(), Field.Store.YES, Field.Index.NO) );
   ramIW.addDocument(doc);
  }
 
  public TupleBinding getBdbBinding()
   { return bdbBinding; }

  public void setBdbBinding(TupleBinding bdbBinding)
   { this.bdbBinding = bdbBinding; }
   
  public String toString()
   {
    StringBuffer sb = new StringBuffer();
    sb.append(super.toString());

    //*-- add article specific data
    sb.append(" Text type: "); sb.append(getTextType() );

   return sb.toString();
   }

  public String getTextType()
  { return textType; }

  public void setTextType(String textType)
   { this.textType = textType; }
 
}

/**
* Berkeley DB binding for ArticleDoc
*
*/
final class ArticleDocDb extends TupleBinding
{
static IndexableDocBinding idb = new IndexableDocBinding();
IndexableDoc idoc;

public Object entryToObject(TupleInput ti)
{
  idoc = (IndexableDoc) idb.entryToObject(ti);
  ArticleDoc o = new ArticleDoc();
  o.loadGeneric(idoc); idoc = null;

  //*-- write any text specific information to o
  o.setTextType(ti.readString());

  return o;
}

public void objectToEntry(Object o, TupleOutput to)
{
  idb.objectToEntry(o, to);

  //*-- write text specific to the tuple output
  ArticleDoc tdoc = (ArticleDoc) o;
  to.writeString(tdoc.getTextType());

}

}
TOP

Related Classes of org.sf.mustru.docs.ArticleDocDb

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.
();a=s.createElement(o), m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); ga('create', 'UA-20639858-1', 'auto'); ga('send', 'pageview');