LOG.info("To index into " + perm);
// delete old, if any
fs.delete(perm, true);
final IndexConfiguration indexConf = new IndexConfiguration();
String content = job.get("hbase.index.conf");
if (content != null) {
indexConf.addFromXML(content);
}
String analyzerName = indexConf.getAnalyzerName();
Analyzer analyzer;
try {
Class<? extends Analyzer> analyzerClass = Class.forName(analyzerName)
.asSubclass(Analyzer.class);
Constructor<? extends Analyzer> analyzerCtor = analyzerClass
.getConstructor(Version.class);
analyzer = analyzerCtor.newInstance(Version.LUCENE_30);
} catch (Exception e) {
throw new IOException("Error in creating an analyzer object "
+ analyzerName);
}
// build locally first
final IndexWriter writer = new IndexWriter(FSDirectory.open(new File(fs
.startLocalOutput(perm, temp).toString())), analyzer, true,
IndexWriter.MaxFieldLength.LIMITED);
// no delete, so no need for maxBufferedDeleteTerms
writer.setMaxBufferedDocs(indexConf.getMaxBufferedDocs());
writer.setMaxFieldLength(indexConf.getMaxFieldLength());
writer.setMaxMergeDocs(indexConf.getMaxMergeDocs());
writer.setMergeFactor(indexConf.getMergeFactor());
String similarityName = indexConf.getSimilarityName();
if (similarityName != null) {
try {
Class<? extends Similarity> similarityClass = Class.forName(
similarityName).asSubclass(Similarity.class);
Constructor<? extends Similarity> ctor = similarityClass
.getConstructor(Version.class);
Similarity similarity = ctor.newInstance(Version.LUCENE_30);
writer.setSimilarity(similarity);
} catch (Exception e) {
throw new IOException("Error in creating a similarity object "
+ similarityName);
}
}
writer.setUseCompoundFile(indexConf.isUseCompoundFile());
return new RecordWriter<ImmutableBytesWritable, LuceneDocumentWrapper>() {
AtomicBoolean closed = new AtomicBoolean(false);
private long docCount = 0;
public void write(ImmutableBytesWritable key, LuceneDocumentWrapper value)
throws IOException {
// unwrap and index doc
Document doc = value.get();
writer.addDocument(doc);
docCount++;
progress.progress();
}
public void close(final Reporter reporter) throws IOException {
// spawn a thread to give progress heartbeats
Thread prog = new Thread() {
@Override
public void run() {
while (!closed.get()) {
try {
reporter.setStatus("closing");
Thread.sleep(1000);
} catch (InterruptedException e) {
continue;
} catch (Throwable e) {
return;
}
}
}
};
try {
prog.start();
// optimize index
if (indexConf.doOptimize()) {
if (LOG.isInfoEnabled()) {
LOG.info("Optimizing index.");
}
writer.optimize();
}