Package com.dotmarketing.util

Source Code of com.dotmarketing.util.GoogleMiniUtils

package com.dotmarketing.util;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;

import com.dotcms.repackage.javax.xml.parsers.DocumentBuilder;
import com.dotcms.repackage.javax.xml.parsers.DocumentBuilderFactory;

import com.dotcms.repackage.org.w3c.dom.Document;
import com.dotcms.repackage.org.w3c.dom.Element;
import com.dotcms.repackage.org.w3c.dom.NodeList;

import com.dotmarketing.beans.GoogleMiniSearch;
import com.dotmarketing.beans.GoogleMiniSearchResult;
/**
* Utility class (Infrastructure Layer) used to make searches using GoogleMini engine
* @author Edgar De Sousa
* @version 1.0
*
*/
public class GoogleMiniUtils {
   
 
  /**
   * Searches under google mini and return the results in a special object
   *
   * @param client A valid setup client in google mini
   * @param collection The collection to search on
   * @param subcollection Subcollection to search on
   * @param query Search query as entered by the user
   * @param metaquery Meta tags query, filter to the search by meta tags names and values I.E. key:value.key:value.key:value|key:value. Use . as AND. User | as OR
   * @param start Start index use -1 if want all results
   * @param num Number of results to show use -1 to show all
   * @param autoFilter Let google mini auto filter results, google mini automatic filtering does Duplicate Snippet Filter and Duplicate Directory Filter
   * @return
   * @throws Exception
   */
  public static GoogleMiniSearch searchGoogleMini(String client, String collection, String subcollection, String query,
      String metaquery, int start, int num, boolean autoFilter)
      throws Exception {

    if (UtilMethods.isSet(client)) {
      StringBuffer searchURL = new StringBuffer(512);
      searchURL.ensureCapacity(128);
      String url = Config.getStringProperty("GOOGLE_MINI_SEARCH_URL");
      if (!url.endsWith("/"))
        url += "/";
      searchURL.append(url + "search?output=xml_no_dtd");
      searchURL.append("&numgm=3");
      if(!autoFilter)
        searchURL.append("&filter=0");
      searchURL.append("&client="+UtilMethods.encodeURL(client));
      searchURL.append("&getfields=*");
      if (-1 < start) {
        searchURL.append("&start=");
        searchURL.append(start);
        if (-1 < num) {
          searchURL.append("&num=");
          searchURL.append(num);
        }
      }
      if (UtilMethods.isSet(query)) {
        searchURL.append("&q=" + UtilMethods.encodeURL(query));
      }
      if (UtilMethods.isSet(metaquery)) {
        searchURL.append("&partialfields=" + metaquery);
      }
     
      if (UtilMethods.isSet(subcollection)) {
        searchURL.append("&restrict=");
        searchURL.append(UtilMethods.encodeURL(subcollection));
      }
     
      if (UtilMethods.isSet(collection)) {
        searchURL.append("&site=");
        searchURL.append(UtilMethods.encodeURL(collection));
      }

      return parseGoogleMiniResults(searchURL.toString());
    }
    return null;
  }
 
  /**
   * Searches under google custom and return the results in a special object
   *
   * @param client A valid setup client in google mini (Null can be used if you want to use the default value 'GOOGLE_CUSTOM_SEARCH_CSEID' configured in 'dotmarketing-config.properties')
   * @param query Search query as entered by the user
   * @param start Start index use -1 if want all results
   * @param num Number of results to show use -1 to show all (Maximum value is 20. If you request more than 20 results, only 20 results will be returned)
   * @param autoFilter Let google mini auto filter results, google mini automatic filtering does Duplicate Snippet Filter and Duplicate Directory Filter
   * @return
   * @throws Exception
   */
  public static GoogleMiniSearch searchGoogleCustom(String client, String query, int start, int num, boolean autoFilter)
      throws Exception {

    if (UtilMethods.isSet(client)) {
      StringBuffer searchURL = new StringBuffer(512);
      searchURL.ensureCapacity(128);
      String url = Config.getStringProperty("GOOGLE_CUSTOM_SEARCH_URL");
      String cx = Config.getStringProperty("GOOGLE_CUSTOM_SEARCH_CSEID");
      if (!url.endsWith("/"))
        url += "/";
      searchURL.append(url + "cse?output=xml_no_dtd");
      if(UtilMethods.isSet(cx))
        searchURL.append("&cx=" + UtilMethods.encodeURL(cx));
      if(!autoFilter)
        searchURL.append("&filter=0");
      searchURL.append("&client=" + UtilMethods.encodeURL(client));
      if (-1 < start) {
        searchURL.append("&start=");
        searchURL.append(start);
        if (-1 < num) {
          searchURL.append("&num=");
          searchURL.append(num);
        }
      }
      if (UtilMethods.isSet(query)) {
        searchURL.append("&q=" + UtilMethods.encodeURL(query));
      }
     
      return parseGoogleMiniResults(searchURL.toString());
    }
    return null;
  }
 
  private static GoogleMiniSearch parseGoogleMiniResults(String searchURL) throws Exception {
   
    GoogleMiniSearch result = null;
    InputStream stream = null;

    try {
      DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory
          .newInstance();
      docBuilderFactory.setValidating(false);
      DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
      stream = new URL(searchURL.toString()).openStream();
      Document doc = docBuilder.parse(stream);
      if ((doc != null) && (doc.getChildNodes() != null)) {
        result = new GoogleMiniSearch();
        Element nodeGSP = (Element) doc.getChildNodes().item(0);
        NodeList GSPChildNodes = nodeGSP.getElementsByTagName("TM");
        if ((GSPChildNodes != null) && (0 < GSPChildNodes.getLength())) {
          Element nodeTM = (Element) GSPChildNodes.item(0);
          result.setSearchTime(nodeTM.getChildNodes().item(0)
              .getNodeValue());
        }
        GSPChildNodes = nodeGSP.getElementsByTagName("Q");
        if ((GSPChildNodes != null) && (0 < GSPChildNodes.getLength())) {
          Element nodeQ = (Element) GSPChildNodes.item(0);
          if (nodeQ.getChildNodes().item(0) != null)
            result.setQuery(nodeQ.getChildNodes().item(0)
                .getNodeValue());
        }
        GSPChildNodes = nodeGSP.getElementsByTagName("PARAM");
        if ((GSPChildNodes != null) && (0 < GSPChildNodes.getLength())) {
          HashMap<String, HashMap<String, String>> params = new HashMap<String, HashMap<String, String>>();
          HashMap<String, String> paramValues;
          for (int i=0; i < GSPChildNodes.getLength(); ++i) {
            paramValues = new HashMap<String, String>();
            Element nodeRES = (Element) GSPChildNodes.item(i);
            paramValues.put("value", nodeRES.getAttribute("value"));
            paramValues.put("original_value", nodeRES.getAttribute("original_value"));
            params.put(nodeRES.getAttribute("name"), paramValues);
          }
          result.setParams(params);
        }
        GSPChildNodes = nodeGSP.getElementsByTagName("Context");
        if ((GSPChildNodes != null) && (0 < GSPChildNodes.getLength())) {
          NodeList ContextChildNodes = nodeGSP.getElementsByTagName("title");
          if ((ContextChildNodes != null) && (0 < ContextChildNodes.getLength())) {
            Element nodeRES = (Element) ContextChildNodes.item(0);
            result.setContextTitle(nodeRES.getChildNodes().item(0)
                .getNodeValue());
          }
        }
        GSPChildNodes = nodeGSP.getElementsByTagName("RES");
        if ((GSPChildNodes != null) && (0 < GSPChildNodes.getLength())) {
          Element nodeRES = (Element) GSPChildNodes.item(0);
          result.setFromIndex(Integer.parseInt(nodeRES
              .getAttribute("SN")));
          result.setToIndex(Integer.parseInt(nodeRES
              .getAttribute("EN")));
        }
        GSPChildNodes = nodeGSP.getElementsByTagName("M");
        if ((GSPChildNodes != null) && (0 < GSPChildNodes.getLength())) {
          Element nodeM = (Element) GSPChildNodes.item(0);
          result.setEstimatedTotal(Integer.parseInt(nodeM
              .getChildNodes().item(0).getNodeValue()));
        }
        GSPChildNodes = nodeGSP.getElementsByTagName("NB");
        if ((GSPChildNodes != null) && (0 < GSPChildNodes.getLength())) {
          Element nodeNB = (Element) GSPChildNodes.item(0);
          NodeList NBChildNodes = nodeNB.getElementsByTagName("PU");
          if ((NBChildNodes != null)
              && (0 < NBChildNodes.getLength())) {
            Element nodePU = (Element) NBChildNodes.item(0);
            result.setPreviousResultPageRelativeURL(nodePU
                .getChildNodes().item(0).getNodeValue());
          }
          NBChildNodes = nodeNB.getElementsByTagName("NU");
          if ((NBChildNodes != null)
              && (0 < NBChildNodes.getLength())) {
            Element nodeNU = (Element) NBChildNodes.item(0);
            result.setNextResultPageRelativeURL(nodeNU
                .getChildNodes().item(0).getNodeValue());
          }
        }
        /*** KeyMatches ****/
        GSPChildNodes = nodeGSP.getElementsByTagName("GM");
        if (GSPChildNodes != null) {
          ArrayList<GoogleMiniSearchResult> keyMatchResults = new ArrayList<GoogleMiniSearchResult>();
          GoogleMiniSearchResult googleSearchResult;
          Element nodeRKeyMatch;
          Element nodeGD;
          Element nodeGL;
          NodeList RChildNodes;
          for (int i = 0; i < GSPChildNodes.getLength(); ++i) {
            googleSearchResult = new GoogleMiniSearchResult();
            nodeRKeyMatch = (Element) GSPChildNodes.item(i);
            RChildNodes = nodeRKeyMatch.getElementsByTagName("GD");
            if ((RChildNodes != null)
                && (0 < RChildNodes.getLength())) {
              nodeGD = (Element) RChildNodes.item(0);
              googleSearchResult.setTitle(nodeGD
                  .getChildNodes().item(0).getNodeValue());
            }
            RChildNodes = nodeRKeyMatch.getElementsByTagName("GL");
            if ((RChildNodes != null)
                && (0 < RChildNodes.getLength())) {
              nodeGL = (Element) RChildNodes.item(0);
              googleSearchResult.setResultURL(nodeGL
                  .getChildNodes().item(0).getNodeValue());
            }
            googleSearchResult.setSnippet("keyMatch");
            keyMatchResults.add(googleSearchResult);
          }
          result.setKeyMatchResults(keyMatchResults);
        }
        /**********/
        GSPChildNodes = nodeGSP.getElementsByTagName("R");
        if (GSPChildNodes != null) {
          GoogleMiniSearchResult googleSearchResult;
          HashMap<String, String> additionalSearchDetails;
          ArrayList<GoogleMiniSearchResult> results = new ArrayList<GoogleMiniSearchResult>(
              100);
          results.ensureCapacity(100);
          Element nodeR;
          NodeList RChildNodes;
          Element nodeU;
          Element nodeUE;
          Element nodeT;
          Element nodeRK;
          Element nodeFS;
          Element nodeMT;
          String fieldName;
          String fieldValue;
          List<String> fieldValues;
          HashMap<String, List<String>> fields;
          HashMap<String, String[]> metaTagsFields;
          Element nodeS;
          Element nodeHAS;
          NodeList HASChildNodes;
          Element nodeL;
          Element nodeHN;
          for (int i = 0; i < GSPChildNodes.getLength(); ++i) {
            googleSearchResult = new GoogleMiniSearchResult();
            nodeR = (Element) GSPChildNodes.item(i);
            googleSearchResult.setResultIndex(Integer
                .parseInt(nodeR.getAttribute("N")));
            if (UtilMethods.isSet(nodeR.getAttribute("L")))
              googleSearchResult.setIdentationLevel(Integer
                  .parseInt(nodeR.getAttribute("L")));
            googleSearchResult.setMimeType(nodeR
                .getAttribute("MIME"));
            RChildNodes = nodeR.getElementsByTagName("U");
            if ((RChildNodes != null)
                && (0 < RChildNodes.getLength())) {
              nodeU = (Element) RChildNodes.item(0);
              googleSearchResult.setResultURL(nodeU
                  .getChildNodes().item(0).getNodeValue());
            }
            RChildNodes = nodeR.getElementsByTagName("UE");
            if ((RChildNodes != null)
                && (0 < RChildNodes.getLength())) {
              nodeUE = (Element) RChildNodes.item(0);
              googleSearchResult.setResultURLEnconded(nodeUE
                  .getChildNodes().item(0).getNodeValue());
            }
            RChildNodes = nodeR.getElementsByTagName("T");
            if ((RChildNodes != null)
                && (0 < RChildNodes.getLength())) {
              nodeT = (Element) RChildNodes.item(0);
              googleSearchResult.setTitle(nodeT.getChildNodes()
                  .item(0).getNodeValue());
            }
            RChildNodes = nodeR.getElementsByTagName("RK");
            if ((RChildNodes != null)
                && (0 < RChildNodes.getLength())) {
              nodeRK = (Element) RChildNodes.item(0);
              googleSearchResult
                  .setGeneralRatingRelevance(Integer
                      .parseInt(nodeRK.getChildNodes()
                          .item(0).getNodeValue()));
            }
            RChildNodes = nodeR.getElementsByTagName("FS");
            if ((RChildNodes != null)
                && (0 < RChildNodes.getLength())) {
              additionalSearchDetails = new HashMap<String, String>(
                  10);
              for (int j = 0; j < RChildNodes.getLength(); ++j) {
                nodeFS = (Element) RChildNodes.item(j);
                additionalSearchDetails.put(nodeFS
                    .getAttribute("NAME"), nodeFS
                    .getAttribute("VALUE"));
              }
              googleSearchResult
                  .setAdditionalSearchDetails(additionalSearchDetails);
            }
            RChildNodes = nodeR.getElementsByTagName("MT");

            if ((RChildNodes != null)
                && (0 < RChildNodes.getLength())) {
              fields = new HashMap<String, List<String>>(10);
              for (int j = 0; j < RChildNodes.getLength(); ++j) {
                nodeMT = (Element) RChildNodes.item(j);
                fieldName = nodeMT.getAttribute("N");
                fieldValue = nodeMT.getAttribute("V");
                // Verify if the field is a date
                if (fieldValue.startsWith("D:")) {
                  fieldValue = fieldValue.substring(2);
                  Date dateValue = UtilMethods
                      .googleDateToDate(fieldValue);
                  fieldValue = UtilMethods
                      .dateToGoogleDate(dateValue);
                }
                fieldValues = fields.get(fieldName);
                if (fieldValues == null) {
                  fieldValues = new ArrayList<String>(10);
                  fields.put(fieldName, fieldValues);
                }
                fieldValues.add(fieldValue);
              }
              metaTagsFields = new HashMap<String, String[]>(
                  fields.size());

              Iterator<String> names = fields.keySet().iterator();
              for (; names.hasNext();) {
                fieldName = names.next();
                metaTagsFields.put(fieldName, fields.get(
                    fieldName).toArray(new String[0]));
              }
              googleSearchResult
                  .setMetaTagsFields(metaTagsFields);
            }
            RChildNodes = nodeR.getElementsByTagName("S");
            if ((RChildNodes != null)
                && (0 < RChildNodes.getLength())) {
              nodeS = (Element) RChildNodes.item(0);
              if(nodeS.getChildNodes().getLength() > 0)
                googleSearchResult.setSnippet(nodeS.getChildNodes()
                  .item(0).getNodeValue());
            }
            RChildNodes = nodeR.getElementsByTagName("LANG");
            if ((RChildNodes != null)
                && (0 < RChildNodes.getLength())) {
              nodeS = (Element) RChildNodes.item(0);
              if(nodeS.getChildNodes().getLength() > 0)
                googleSearchResult.setLanguage(nodeS.getChildNodes()
                  .item(0).getNodeValue());
            }
            RChildNodes = nodeR.getElementsByTagName("Label");
            if ((RChildNodes != null)
                && (0 < RChildNodes.getLength())) {
              nodeS = (Element) RChildNodes.item(0);
              if(nodeS.getChildNodes().getLength() > 0)
                googleSearchResult.setLabel(nodeS.getChildNodes()
                  .item(0).getNodeValue());
            }
            RChildNodes = nodeR.getElementsByTagName("HAS");
            if ((RChildNodes != null)
                && (0 < RChildNodes.getLength())) {
              nodeHAS = (Element) RChildNodes.item(0);
              HASChildNodes = nodeHAS.getElementsByTagName("L");
              if ((HASChildNodes != null)
                  && (0 < HASChildNodes.getLength())) {
                nodeL = (Element) HASChildNodes.item(0);
                googleSearchResult.setSpecialQueryTerm(nodeL
                    .getAttribute("TAG"));
              }
              HASChildNodes = nodeHAS.getElementsByTagName("C");
              if ((HASChildNodes != null)
                  && (0 < HASChildNodes.getLength())) {
                nodeL = (Element) HASChildNodes.item(0);
                googleSearchResult.setDocumentCacheSize(nodeL
                    .getAttribute("SZ"));
                googleSearchResult.setDocumentCacheId(nodeL
                    .getAttribute("CID"));
              }
            }
            RChildNodes = nodeR.getElementsByTagName("HN");
            if ((RChildNodes != null)
                && (0 < RChildNodes.getLength())) {
              nodeHN = (Element) RChildNodes.item(0);
              googleSearchResult.setMoreResultsDirectory(nodeHN
                  .getAttribute("U"));
            }
            results.add(googleSearchResult);
          }
          result.setSearchResults(results);
        }
      }
    } catch (Exception e) {
      if (stream != null)
        stream.close();
      throw e;
    }
    return result;
  }
}
TOP

Related Classes of com.dotmarketing.util.GoogleMiniUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.