Package com.etown.lucene

Source Code of com.etown.lucene.PreProcessor

package com.etown.lucene;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Date;

import net.sf.json.JSONObject;

import org.apache.log4j.Logger;

import com.etown.model.Article;
/**
* 文件预处理类
* @author AFI
*/
public class PreProcessor {

  private static Logger log = Logger.getLogger(PreProcessor.class);
 
  private PreProcessor() {}
 
  public static void Process(Article article) throws Exception {
    //将全角标点转成半角标点
    article.setTitle(characterProcessor(article.getTitle()));
    article.setContent(characterProcessor(article.getContent()));
    //创建txt文件
    createTxtFile(article);
  }
  /**
   * 预处理字符(将全角标点转换为半角标点)
   * @param handlerStr 需要处理的字符串
   * @return 处理好的字符串
   */
  private static String characterProcessor(String handlerStr) {
    String resultStr = "";
    String subStr = null;
    byte[] unicodeByte = null;

    try {
      for(int i=0; i<handlerStr.length(); i++) {
        subStr = handlerStr.substring(i, i+1);
        unicodeByte = subStr.getBytes("unicode");
       
        if(unicodeByte[3] == -1) {
          unicodeByte[2] = (byte)(unicodeByte[2] + 32);
          unicodeByte[3] = 0;
          resultStr += new String(unicodeByte,"unicode");
        } else {
          resultStr += subStr;
        }
      }
    } catch (Exception e) {
      log.info("全角标点转半角标点时出错");
      e.printStackTrace();
    }
    return resultStr;
  }
  /**
   * 在相应的目录下生成txt文件
   * @param article
   */
  private static void createTxtFile(Article article) {
    //String folderPath = System.getProperty("web.home") + System.getProperty("luceneLocalFolder") + "/" + String.valueOf(article.getUserId());
    String folderPath = "F:/LuceneTextFolder/" + String.valueOf(article.getUserId());
    String filePath =  folderPath + "/" +article.getTitle() + ".txt";
    log.info("txt文件创建路径" + filePath);
    FileWriter fw = null;
    try {
      File folderFile = new File(folderPath);
      if(!folderFile.exists()) {
        folderFile.mkdir();
      }
      File luceneLocalFile = new File(filePath);
      if(!luceneLocalFile.exists()) {
        luceneLocalFile.createNewFile();
      }
      fw = new FileWriter(filePath);
      JSONObject articleJsonObj = JSONObject.fromObject(article);
      String outPutStr = articleJsonObj.toString();
        char buffer[] = outPutStr.toCharArray();
        fw.write(buffer, 0, buffer.length);
        fw.flush();
    } catch (Exception e) {
      log.info("txt文件写出错误");
      e.printStackTrace();
    } finally{
      if(fw != null) {
        try {
          fw.close();
        } catch (IOException e) {
          log.info("文件写出时关闭FileWriter时错误");
          e.printStackTrace();
        }
      }
    }
  }
 
  public static void main(String[] args) {
    Article article = new Article();
    article.setArticleId(1);
    article.setTitle("测试");
    article.setContent("The FileReader class creates a Reader that you can use to read the contents of a file。 Its two most commonly used constructors are shown here:");
    article.setPubDate(new Date());
    article.setModifyDate(new Date());
    article.setUserId(1231);
    try {
      PreProcessor.Process(article);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
}
TOP

Related Classes of com.etown.lucene.PreProcessor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.