Package org.sf.mustru.docs

Source Code of org.sf.mustru.docs.IndexableDoc

package org.sf.mustru.docs;

import java.io.File;
import java.io.IOException;
import org.apache.lucene.index.IndexWriter;
import org.sf.mustru.crawl.ClassifyDoc;
import org.sf.mustru.utils.Constants;
import org.sf.mustru.utils.StringTools;
import com.sleepycat.bind.tuple.TupleBinding;

/**
* The class containing fields (attributes) of the indexable document.
*/
public  class IndexableDoc implements StoreAbleInterface
{
/**
  *  Type of file (image, article, audio, etc.)
  */
private String fileType = "";   
/**
  *  Title of document
  */
private String title = "";       
/**
  *  Author of document
  */
private String author = "";      
/**
  *  Last modified date
  */
private long mdate = 0;
/**
  *  Last indexed date
  */
private long idate = 0;
/**
  *  Abstract of document contents
  */
private String summary = "";     
/**
  *  Context file where current document was found
  */
private String contextFile = "";
/**
  *  Full canonical path name of file in local filesystem
  */
private String fileName = "";   
/**
  *  SHA signature of file contents
  */
private String fileSignature = "";
/**
  *  Alternate location of file (e.g. URL)
  */
private String fileLocation = ""
/**
  * Size of the file (bytes)
  */
private long fileLength = 0;
/**
  * Rank of the file
  */
private long fileRank = 0;
/**
  *  Metadata extracted from document
  */
private String metadata = "";
/**
  * Language
  */
private String language = "";
/**
  *  Plain text contents of document
  */
private StringBuffer contents;      
/**
  * Binding to create object and database entries in the Berkeley DB, set by
  * specific instances of indexable document
  */
public TupleBinding bdbBinding;

/**
  * Return an instance of an indexable document
  */
public IndexableDoc()
{ setContents( new StringBuffer("") );
setLanguage("English");
setBdbBinding(new IndexableDocBinding() );
}

public IndexableDoc(String ifile)
{
  setFileName(ifile);                       //*-- set the name of the file
  File file = new File(ifile);
  setMdate(file.lastModified() )//*-- set the last modified date
  setFileLength( file.length() );
  setContents(new StringBuffer(""));
  setLanguage("English");
  setBdbBinding(new IndexableDocBinding() );
}

//*-- copy generic file information to a specific instance of an indexable document
public void loadGeneric (IndexableDoc aDoc)
{
  setAuthor(aDoc.getAuthor());
  setContents(aDoc.getContents() );
  setContextFile(aDoc.getContextFile());
  setFileLocation(aDoc.getFileLocation());
  setFileName(aDoc.getFileName());
  setFileSignature(aDoc.getFileSignature());
  setFileType(aDoc.getFileType());
  setFileLength(aDoc.getFileLength());
  setFileRank(aDoc.getFileRank());
  setIdate(aDoc.getIdate());
  setMdate(aDoc.getMdate());
  setMetadata(aDoc.getMetadata());
  setSummary(aDoc.getSummary());
  setTitle(aDoc.getTitle());
  //*-- tuple binding reference set during doc creation
}

//*-- empty method overriden by specific document types
public void loadSpecific(ClassifyDoc cdoc) {  }

//*-- empty method overriden by specific document types
public void loadIndex(IndexWriter iw, boolean storeTermVector) throws IOException { }

public String getAuthor()
{ return author; }

public void setAuthor(String author)
{ this.author = author; }

public StringBuffer getContents()
{ return contents; }

public void setContents(StringBuffer contents)
{ this.contents = contents; }

public String getContextFile()
{ return contextFile; }

public void setContextFile(String contextFile)
{ this.contextFile = contextFile; }

public String getFileLocation()
{ return fileLocation; }

public void setFileLocation(String fileLocation)
{ this.fileLocation = fileLocation; }

public String getFileName()
{ return fileName; }

public void setFileName(String fileName)
{ this.fileName = fileName; }

public String getFileSignature()
{ return fileSignature; }

public void setFileSignature(String fileSignature)
{ this.fileSignature = fileSignature; }

public long getFileLength()
{ return fileLength; }

public void setFileLength(long fileLength)
{ this.fileLength = fileLength; }

public long getFileRank()
{ return fileRank; }

public void setFileRank(long fileRank)
{ this.fileRank = fileRank; }

public String getFileType()
{ return fileType; }

public void setFileType(String fileType)
{ this.fileType = fileType; }

public long getIdate()
{ return idate; }

public void setIdate(long idate)
{ this.idate = idate; }

public long getMdate()
{ return mdate; }

public void setMdate(long mdate)
{ this.mdate = mdate; }

public String getMetadata()
{ return metadata; }

public void setMetadata(String metadata)
{ this.metadata = metadata; }

public String getLanguage()
{ return language; }

public void setLanguage(String language)
{ this.language = language; }

public String getSummary()
{ return summary; }

public void setSummary(String summary)
{ this.summary = summary; }

public String getTitle()
{ return title; }

public void setTitle(String title)
{ this.title = title; }

public TupleBinding getBdbBinding()
{ return bdbBinding; }

public void setBdbBinding( TupleBinding bdbBinding)
{ this.bdbBinding = bdbBinding; }

public String  toString()
{
  StringBuffer sb = new StringBuffer();
  sb.append(" File Type: "); sb.append(getFileType()); sb.append(Constants.NEWLINE );
  sb.append(" Title: "); sb.append(getTitle()); sb.append(Constants.NEWLINE);
  sb.append(" Author: "); sb.append(getAuthor()); sb.append(Constants.NEWLINE);
  sb.append(" Mdate: "); sb.append(getMdate()); sb.append(Constants.NEWLINE);
  sb.append(" Idate: "); sb.append(getIdate()); sb.append(Constants.NEWLINE);
  sb.append(" Summary "); sb.append(getSummary()); sb.append(Constants.NEWLINE);
  sb.append(" Context: "); sb.append(getContextFile()); sb.append(Constants.NEWLINE);
  sb.append(" Filename: "); sb.append(getFileName()); sb.append(Constants.NEWLINE);
  sb.append(" Signature: "); sb.append(getFileSignature()); sb.append(Constants.NEWLINE);
  sb.append(" File Location: "); sb.append(getFileLocation()); sb.append(Constants.NEWLINE);
  sb.append(" File Length: "); sb.append(getFileLength()); sb.append(Constants.NEWLINE);
  sb.append(" File Rank: "); sb.append(getFileRank()); sb.append(Constants.NEWLINE);
  sb.append(" Metadata: "); sb.append(getMetadata()); sb.append(Constants.NEWLINE);
  sb.append(" Language: "); sb.append(getLanguage()); sb.append(Constants.NEWLINE);
  sb.append(" Contents: "); sb.append(StringTools.fillin( contents.toString(), 70, '.') );
  return sb.toString();
}

}
TOP

Related Classes of org.sf.mustru.docs.IndexableDoc

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.