package org.sf.mustru.docs;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.index.IndexWriter;
import org.sf.mustru.crawl.ClassifyDoc;
import org.sf.mustru.utils.Constants;
import org.sf.mustru.utils.StringTools;
import com.sleepycat.bind.tuple.TupleBinding;
/**
* The class containing fields (attributes) of the indexable document.
*/
public class IndexableDoc implements StoreAbleInterface
{
/**
* Type of file (image, article, audio, etc.)
*/
private String fileType = "";
/**
* Title of document
*/
private String title = "";
/**
* Author of document
*/
private String author = "";
/**
* Last modified date
*/
private long mdate = 0;
/**
* Last indexed date
*/
private long idate = 0;
/**
* Abstract of document contents
*/
private String summary = "";
/**
* Context file where current document was found
*/
private String contextFile = "";
/**
* Full canonical path name of file in local filesystem
*/
private String fileName = "";
/**
* SHA signature of file contents
*/
private String fileSignature = "";
/**
* Alternate location of file (e.g. URL)
*/
private String fileLocation = "";
/**
* Size of the file (bytes)
*/
private long fileLength = 0;
/**
* Rank of the file
*/
private long fileRank = 0;
/**
* Metadata extracted from document
*/
private String metadata = "";
/**
* Language
*/
private String language = "";
/**
* Plain text contents of document
*/
private StringBuffer contents;
/**
* Binding to create object and database entries in the Berkeley DB, set by
* specific instances of indexable document
*/
public TupleBinding bdbBinding;
/**
* Return an instance of an indexable document
*/
public IndexableDoc()
{ setContents( new StringBuffer("") );
setLanguage("English");
setBdbBinding(new IndexableDocBinding() );
}
public IndexableDoc(String ifile)
{
setFileName(ifile); //*-- set the name of the file
File file = new File(ifile);
setMdate(file.lastModified() ); //*-- set the last modified date
setFileLength( file.length() );
setContents(new StringBuffer(""));
setLanguage("English");
setBdbBinding(new IndexableDocBinding() );
}
//*-- copy generic file information to a specific instance of an indexable document
public void loadGeneric (IndexableDoc aDoc)
{
setAuthor(aDoc.getAuthor());
setContents(aDoc.getContents() );
setContextFile(aDoc.getContextFile());
setFileLocation(aDoc.getFileLocation());
setFileName(aDoc.getFileName());
setFileSignature(aDoc.getFileSignature());
setFileType(aDoc.getFileType());
setFileLength(aDoc.getFileLength());
setFileRank(aDoc.getFileRank());
setIdate(aDoc.getIdate());
setMdate(aDoc.getMdate());
setMetadata(aDoc.getMetadata());
setSummary(aDoc.getSummary());
setTitle(aDoc.getTitle());
//*-- tuple binding reference set during doc creation
}
//*-- empty method overriden by specific document types
public void loadSpecific(ClassifyDoc cdoc) { }
//*-- empty method overriden by specific document types
public void loadIndex(IndexWriter iw, boolean storeTermVector) throws IOException { }
public String getAuthor()
{ return author; }
public void setAuthor(String author)
{ this.author = author; }
public StringBuffer getContents()
{ return contents; }
public void setContents(StringBuffer contents)
{ this.contents = contents; }
public String getContextFile()
{ return contextFile; }
public void setContextFile(String contextFile)
{ this.contextFile = contextFile; }
public String getFileLocation()
{ return fileLocation; }
public void setFileLocation(String fileLocation)
{ this.fileLocation = fileLocation; }
public String getFileName()
{ return fileName; }
public void setFileName(String fileName)
{ this.fileName = fileName; }
public String getFileSignature()
{ return fileSignature; }
public void setFileSignature(String fileSignature)
{ this.fileSignature = fileSignature; }
public long getFileLength()
{ return fileLength; }
public void setFileLength(long fileLength)
{ this.fileLength = fileLength; }
public long getFileRank()
{ return fileRank; }
public void setFileRank(long fileRank)
{ this.fileRank = fileRank; }
public String getFileType()
{ return fileType; }
public void setFileType(String fileType)
{ this.fileType = fileType; }
public long getIdate()
{ return idate; }
public void setIdate(long idate)
{ this.idate = idate; }
public long getMdate()
{ return mdate; }
public void setMdate(long mdate)
{ this.mdate = mdate; }
public String getMetadata()
{ return metadata; }
public void setMetadata(String metadata)
{ this.metadata = metadata; }
public String getLanguage()
{ return language; }
public void setLanguage(String language)
{ this.language = language; }
public String getSummary()
{ return summary; }
public void setSummary(String summary)
{ this.summary = summary; }
public String getTitle()
{ return title; }
public void setTitle(String title)
{ this.title = title; }
public TupleBinding getBdbBinding()
{ return bdbBinding; }
public void setBdbBinding( TupleBinding bdbBinding)
{ this.bdbBinding = bdbBinding; }
public String toString()
{
StringBuffer sb = new StringBuffer();
sb.append(" File Type: "); sb.append(getFileType()); sb.append(Constants.NEWLINE );
sb.append(" Title: "); sb.append(getTitle()); sb.append(Constants.NEWLINE);
sb.append(" Author: "); sb.append(getAuthor()); sb.append(Constants.NEWLINE);
sb.append(" Mdate: "); sb.append(getMdate()); sb.append(Constants.NEWLINE);
sb.append(" Idate: "); sb.append(getIdate()); sb.append(Constants.NEWLINE);
sb.append(" Summary "); sb.append(getSummary()); sb.append(Constants.NEWLINE);
sb.append(" Context: "); sb.append(getContextFile()); sb.append(Constants.NEWLINE);
sb.append(" Filename: "); sb.append(getFileName()); sb.append(Constants.NEWLINE);
sb.append(" Signature: "); sb.append(getFileSignature()); sb.append(Constants.NEWLINE);
sb.append(" File Location: "); sb.append(getFileLocation()); sb.append(Constants.NEWLINE);
sb.append(" File Length: "); sb.append(getFileLength()); sb.append(Constants.NEWLINE);
sb.append(" File Rank: "); sb.append(getFileRank()); sb.append(Constants.NEWLINE);
sb.append(" Metadata: "); sb.append(getMetadata()); sb.append(Constants.NEWLINE);
sb.append(" Language: "); sb.append(getLanguage()); sb.append(Constants.NEWLINE);
sb.append(" Contents: "); sb.append(StringTools.fillin( contents.toString(), 70, '.') );
return sb.toString();
}
}