Package org.apache.poi.hdf.extractor

Examples of org.apache.poi.hdf.extractor.WordDocument


public void getDocument(String ifile, IndexableDoc doc)
{
  String bodyText = null;
  try
  { logger.info("Using POI classes to extract text from Word document: " + ifile);
  WordDocument wd = new WordDocument( new FileInputStream( new File(ifile) ) );
  StringWriter docTextWriter = new StringWriter();
  wd.writeAllText(new PrintWriter(docTextWriter));   
  docTextWriter.close();

  //*-- if no text was extracted, try antiword
  bodyText = docTextWriter.toString(); bodyText = StringTools.filterChars(bodyText);
  if (bodyText.length() == 0) bodyText = tryAntiword(ifile);
View Full Code Here


* @author Thierry Templier
*/
public class PoiWordDocumentHandler extends AbstractTypeFileDocumentHandler {

  protected String extractText(InputStream inputStream) throws IOException {
    WordDocument wordDocument = new WordDocument(inputStream);
    StringWriter textWriter = new StringWriter();
    wordDocument.writeAllText(new PrintWriter(textWriter));
    textWriter.close();
    return textWriter.toString();
  }
View Full Code Here

  public void buildDocument(InputStream is, Document doc) throws DocumentHandlerException {
    String bodyText = null;

    try {
      WordDocument wordDoc = new WordDocument(is);
      StringWriter sw = new StringWriter();
      wordDoc.writeAllText(sw);
      sw.close();
      bodyText = sw.toString();
    } catch (Exception e) {
      throw new DocumentHandlerException(
          "Cannot extract text from a Word document", e);
View Full Code Here

    throws IOException {
        FileInputStream finp = new FileInputStream(doc);
        FileOutputStream fout = new FileOutputStream(output);

        try {
            WordDocument wd = new WordDocument(finp);       
            Writer writer = new OutputStreamWriter(fout, "UTF-16BE");
            wd.writeAllText(writer);
        } finally {
            close(finp);
            close(fout);
        }
    }
View Full Code Here

TOP

Related Classes of org.apache.poi.hdf.extractor.WordDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.