package com.google.code.ftspc.lector.parsers.POI;
import com.google.code.ftspc.lector.indexers.AddDataToIndex;
import com.google.code.ftspc.lector.ini_and_vars.Vars;
import com.google.code.ftspc.lector.parsers.Parser;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.hwpf.HWPFOldDocument;
import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* Class for the DOC parser
* @author Arthur Khusnutdinov
*/
public class DocParser extends Thread implements Parser {
private String pathToFile;
private String fileName;
@Override
public void run() {
InputStream isr = null;
try {
isr = new FileInputStream(pathToFile);
WordExtractor word = new WordExtractor(isr);
String fileContent = "";
String[] paragraphes = word.getParagraphText();
for (String paragraph : paragraphes) {
fileContent += " " + paragraph;
}
AddDataToIndex AddDataToIndex = new AddDataToIndex(null);
AddDataToIndex.doAddData(fileContent, pathToFile, fileName);
Vars.current_run_indexes--;
} catch (OldWordFileFormatException ex) {
parseWord6(pathToFile);
} catch (Exception ex) {
Vars.current_run_indexes--;
Vars.logger.fatal(ex);
} finally {
try {
isr.close();
} catch (IOException ex) {
Vars.logger.fatal(ex);
}
}
}
/**
* Check this!!!
* @param pathToFile
* @deprecated
*/
@Deprecated
private void parseWord6(String pathToFile) {
FileInputStream fis = null;
try {
File docFile = new File(pathToFile);
fis = new FileInputStream(docFile.getAbsolutePath());
POIFSFileSystem pfs = new POIFSFileSystem(fis);
HWPFOldDocument doc = new HWPFOldDocument(pfs);
Word6Extractor docExtractor = new Word6Extractor(doc);
String fileContent = "";
String[] paragraphes = docExtractor.getParagraphText();
for (String paragraph : paragraphes) {
fileContent += " " + paragraph;
}
AddDataToIndex AddDataToIndex = new AddDataToIndex(null);
AddDataToIndex.doAddData(fileContent, pathToFile, fileName);
Vars.current_run_indexes--;
} catch (Exception ex) {
Vars.current_run_indexes--;
Vars.logger.fatal("Error: ", ex);
} finally {
try {
fis.close();
} catch (IOException ex) {
Vars.logger.fatal("Error", ex);
}
}
}
@Override
public void start_th(String pathToFile, String fileName) {
this.pathToFile = pathToFile;
this.fileName = fileName;
this.start();
}
}