Package org.vosao.search.impl

Source Code of org.vosao.search.impl.SearchIndexImpl

/**
* Vosao CMS. Simple CMS for Google App Engine.
*
* Copyright (C) 2009-2010 Vosao development team.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
*
* email: vosao.dev@gmail.com
*/

package org.vosao.search.impl;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.vosao.business.Business;
import org.vosao.common.VosaoContext;
import org.vosao.dao.Dao;
import org.vosao.entity.ContentEntity;
import org.vosao.entity.FileEntity;
import org.vosao.entity.PageEntity;
import org.vosao.search.Hit;
import org.vosao.search.SearchIndex;
import org.vosao.search.SearchResultFilter;
import org.vosao.utils.StrUtil;

public class SearchIndexImpl implements SearchIndex {

  private static final Log logger = LogFactory.getLog(
      SearchIndexImpl.class);

  private static final String INDEX_MOD_DATE = "IndexModDate";

  private String language;
  private Map<String, Set<Long>> index;
  private Date indexModDate;

  public SearchIndexImpl(String aLanguage) {
    language = aLanguage;
    loadIndex();
  }
 
  @Override
  public void updateIndex(Long pageId) {
    PageEntity page = getDao().getPageDao().getById(pageId);
    if (page == null) {
      return;
    }
    refreshIndex();
    List<PageEntity> versions = getDao().getPageDao().selectByUrl(
        page.getFriendlyURL());
    for (PageEntity version : versions) {
      removeFromIndex(version.getId());
    }
    page = getDao().getPageDao().getByUrl(page.getFriendlyURL());
    if (page == null) {
      return;
    }
    if (!page.isSearchable()) {
      return;
    }
    String content = getDao().getPageDao().getContent(page.getId(),
        getLanguage());
    if (content == null) {
      return;
    }
    String data = StrUtil.extractSearchTextFromHTML(content.toLowerCase());
    String[] words = StrUtil.splitByWord(data);
    //logger.info(Arrays.asList(words));
    for (String word : words) {
      if (word.length() < 3) {
        continue;
      }
      if (!getIndex().containsKey(word)) {
        getIndex().put(word, new HashSet<Long>());
      }
      if (!getIndex().get(word).contains(page.getId())) {
        getIndex().get(word).add(page.getId());
      }
    }
  }
 
  @Override
  public void removeFromIndex(Long pageId) {
    for (Set<Long> pages : getIndex().values()) {
      pages.remove(pageId);
    }
  }
 
  @Override
  public void saveIndex() {
    try {
      byte[] indexContent = StrUtil.zipStringToBytes(indexToString());
      FileEntity file = getBusiness().getFileBusiness()
          .saveFile(getIndexFilename(), indexContent);
      indexModDate = file.getLastModifiedTime();
      getBusiness().getSystemService().getCache().getMemcache().put(
          getIndexKey(), indexModDate);
    }
    catch (Exception e) {
      e.printStackTrace();
    }
  }

  private String indexToString() {
    StringBuffer buf = new StringBuffer();
    int i = 0;
    for (String word : getIndex().keySet()) {
      if (getIndex().get(word).isEmpty()) {
        continue;
      }
      buf.append(i++ == 0 ? "" : ":").append(word).append("=");
      int j = 0;
      for (Long id : getIndex().get(word)) {
        buf.append(j++ == 0 ? "" : ",").append(id);
      }
    }
    return buf.toString();
  }

  @Override
  public List<Hit> search(SearchResultFilter filter, String query,
      int textSize) {
    try {
   
    refreshIndex();
    List<Hit> result = new ArrayList<Hit>();
    List<Long> pages = new ArrayList<Long>(getPageIds(query));
    for (Long pageId : pages) {
      PageEntity page = getDao().getPageDao().getById(pageId);
      if (page != null) {
        if (filter != null && !filter.check(page)) {
          continue;
        }
        ContentEntity content = getBusiness().getPageBusiness()
            .getPageContent(page, language);
        if (content != null) {
          String text = StrUtil.extractSearchTextFromHTML(
              content.getContent());
          if (text.length() > textSize) {
            text = text.substring(0, textSize);
          }
          result.add(new Hit(page, text, language));
        }
      }
      else {
        logger.error("Page not found " + pageId + ". Rebuild index.");
      }
    } 
    return result;
   
    }
    catch (Exception e) {
      e.printStackTrace();
      return Collections.EMPTY_LIST;
    }
  }

  private Set<Long> getPageIds(String query) {
    String[] words = StrUtil.splitByWord(query);
    if (words.length == 0) {
      return Collections.EMPTY_SET;
    }
    Set<Long> keys = getPageKeys(words[0]);
    int i = 0;
    for (String word : words) {
      if (i++ > 0) {
        keys = keysLogicalAnd(keys, getPageKeys(word));
      }
    }
    //logger.info("found keys " + keys.toString());
    return keys;   
 
   
  private Set<Long> keysLogicalAnd(Set<Long> l1, Set<Long> l2) {
    Set<Long> result = new HashSet<Long>();
    for (Long i : l1) {
      if (l2.contains(i) && !result.contains(i)) {
        result.add(i);
      }
    }
    return result;
 
 
  private Set<Long> getPageKeys(String word) {
    if (getIndex().containsKey(word)) {
      return getIndex().get(word);
    }
    return Collections.EMPTY_SET;
  }

  private String getIndexKey() {
    return INDEX_MOD_DATE + getLanguage();
  }
 
  private void refreshIndex() {
    Date date = (Date) getBusiness().getSystemService().getCache()
        .getMemcache().get(getIndexKey());
    if (index == null || date == null || !date.equals(indexModDate)) {
      loadIndex();
    }
  }

  private String getIndexFilename() {
    return "/tmp/index_" + getLanguage() + ".bin";
  }
 
  private void loadIndex() {
    try {
      index = new HashMap<String, Set<Long>>();
      indexModDate = null;
      FileEntity file = getBusiness().getFileBusiness()
          .findFile(getIndexFilename());
      if (file == null) {
        logger.error("Search index not found. " + getIndexFilename());
        return;
      }
      byte[] data = getDao().getFileDao().getFileContent(file);
      if (data != null) {
        String strIndex = StrUtil.unzipStringFromBytes(data);
        indexFromString(strIndex);
        indexModDate = file.getLastModifiedTime();
        Date dt = (Date)getBusiness().getSystemService().getCache()
            .getMemcache().get(getIndexKey());
        if (dt == null || dt.before(indexModDate)) {
          getBusiness().getSystemService().getCache().getMemcache()
              .put(getIndexKey(), indexModDate);
        }
      }
      else {
        logger.error("Search index is empty. " + getIndexFilename());
      }
    }
    catch (Exception e) {
      e.printStackTrace();
    }
  }

  private void indexFromString(String data) {
    if (StringUtils.isEmpty(data)) {
      return;
    }
    for (String wordBuf : data.split("\\:")) {
      //logger.info(wordBuf);
      String[] wordStruc = wordBuf.split("\\=");
      if (wordStruc.length != 2 ) {
        logger.error("Problem with index " + wordBuf);
        continue;
      }
      index.put(wordStruc[0], new HashSet<Long>());
      for (String key : wordStruc[1].split(",")) {
        index.get(wordStruc[0]).add(Long.valueOf(key));
      }
    }
  }

  private Business getBusiness() {
    return VosaoContext.getInstance().getBusiness();
  }
 
  private Dao getDao() {
    return getBusiness().getDao();
  }
 
  @Override
  public String getLanguage() {
    return language;
  }

  private Map<String, Set<Long>> getIndex() {
    if (index == null) {
      index = new HashMap<String, Set<Long>>();
    }
    return index;
  }

  @Override
  public void clear() {
    getIndex().clear();
  }
 
}
TOP

Related Classes of org.vosao.search.impl.SearchIndexImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.