Package com.cloudant.indexers

Source Code of com.cloudant.indexers.MyCustomSearch

package com.cloudant.indexers;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import com.cloudant.couchdbjavaserver.*;
import com.cloudant.index.CouchIndexUtils;
import com.cloudant.index.IndexType;
import com.cloudant.index.IndexUtilities;
import com.cloudant.index.SingleDocumentIndex;
import com.stinkyteddy.utils.StDateUtils;

import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.analysis.*;
import org.apache.lucene.util.*;

public class MyCustomSearch implements SearchView {
 

  private Map<String,FieldOption> fieldsToStore = null;
  private Map<Pattern, String> regexps = null;
  private Map<String, FieldOption> matches = null;
  private HashSet<String> misses = null;
  private Analyzer analyzer = null;
  private List<SimpleDateFormat> dateFormats = null;
   
  public Analyzer getAnalyzer() {
    if (analyzer == null) {
      return new StandardAnalyzer(Version.LUCENE_30);
    } else {
      return analyzer;
    }
  }
 
  public void setAnalyzer(Analyzer in) {
    analyzer = in;
  }
 
  public void Log(String message) {

  }

  public JSONArray MapDoc(JSONObject doc) {
    JSONArray out = new JSONArray();
    try {
      Analyzer analyzer = getAnalyzer();
      SingleDocumentIndex index = new SingleDocumentIndex();
      // user specified field set

     
      Map<String, List<Object>> mapped = CouchIndexUtils.MapJSONObject2Object(doc, null);
      if (mapped == null) {
//        Log("can't map json doc");
        return out.put(new JSONArray());
      }
      for (String field : mapped.keySet()) {
        try {
          FieldOption fo = findField(field);
          if (fo == null) continue;
          List<Object> l = mapped.get(field);
          Iterator<Object> iter = l.iterator();
          while (iter.hasNext()) {
            Object o = iter.next();
            if (o == null) continue;
            final IndexType type = fo.getType();
            final float boost = (float)fo.getBoost();
            String luceneName = fo.getFieldName();
            if (luceneName == null) luceneName = field;
            switch (type) {
            case STRING: index.addField(luceneName, IndexUtilities.ObjectToString(o), analyzer, boost);break;
            case KEYWORD: index.addField(luceneName, IndexUtilities.ObjectToString(o), boost);break;
            case BOOLEAN: index.addField(luceneName, o, boost);break;
            // JSON doesn't distinguish between int and long, always returns long
            case INT: index.addField(luceneName, (Long)o, boost);break;
            case LONG: index.addField(luceneName, (Long)o, boost);break;
            case FLOAT: index.addField(luceneName, (Float)o, boost);break;
            case DOUBLE: index.addField(luceneName, (Double)o, boost);break;
            case DATE: {
              Date d = null;
              d = parseDate((String)o);
              if (d != null) {
                index.addField(luceneName, d.getTime(), boost);break;
              } else {
                index.addField(luceneName, IndexUtilities.ObjectToString(o), boost);break;               
              }
            }
            case JSONOBJECT: index.addField(luceneName, o, boost);break;
            case NULL: index.addField(luceneName, (Object)JSONObject.NULL, boost);break;
            }
          }
        } catch (Exception e) {
          /* probably a casting exception */
        }
      }
      out = index.jsonMap();
    } catch (Exception e) {
      out.put(new JSONArray());
      if (e != null ) {
        Log("Map Exception: " + e.toString());
      }
    }
    return out;
  }

  public FieldOption findField(String field) {
    if (fieldsToStore == null || field == null) return null;
    if (misses.contains(field)) return null;
    if (fieldsToStore.containsKey(field)) return fieldsToStore.get(field);
    if (matches.containsKey(field)) return matches.get(field);
    if (regexps == null) return null;
    for (Pattern p: regexps.keySet()) {
      Matcher m = p.matcher(field);
      if (m != null && m.matches()) {
        final FieldOption fo = fieldsToStore.get(regexps.get(p));
        matches.put(field, fo);
        return fo;
      }
    }
    misses.add(field);
    return null;
  }

  /**
   * This configures the list of fields to index.
   *
   * @param A
   *           
   *            json object of the form {"fields":[{"name":"field_name","lucenename":"name_in_index","boost":1.0,"type":"index_type",regexp:true/false,"dateformat":"java_simple_date_fromat"}, ...],"analyzer":"name_of_analyzer"} where
   *            "name" specifies the json field to be indexed (can be regular expression, see below).
   *            "lucenename" is the name of the field in the Cloudant "Lucene" index
   *            the boosts are the weights applied to each field. Boosts are
   *            optional (default is 1.0)
   *            for a list of index types, see com.javaviews.IndexType.java. "string","date","long","float","object" etc..
   *            string types are run through the specified analyzer, where they are tokenized accordingly.
   *            object types are index as json objects with no modification. This is used for numbers.
   *            date types are converted to unix time when indexed and stored as a number
   *            "regexp" tells whether to parese the "name" as a regular expression.
   */
  public void Configure(String config) {
    matches = new HashMap<String, FieldOption>();
    misses = new HashSet<String>();
    if (config == null || config.length() == 0) {
      fieldsToStore = new HashMap<String, FieldOption>();
      fieldsToStore.put(".*",new FieldOption(null,IndexType.STRING,1.0,true));
      return;
    }
    try {
       JSONObject configString = new JSONObject(config);
      fieldsToStore = new HashMap<String, FieldOption>();
      JSONArray fields = configString.optJSONArray("fields");
      if (fields == null) {
        // in this case, we want to map everything as a string using the json names
        fieldsToStore.put(".*",new FieldOption(null,IndexType.STRING,1.0,true));
      } else {
        for (int i = 0; i < fields.length(); i++) {
          JSONObject thisField = fields.getJSONObject(i);
          String name = thisField.getString("name");
          String luceneName = thisField.optString("lucenename",name);
          if (luceneName == null) name = null;
          double boost = thisField.optDouble("boost", 1.0);
          boolean regexp = thisField.optBoolean("regexp", false);
          String type = thisField.optString("type","string");
          IndexType t = IndexType.getIndexTypeFromString(type);
          if (t == IndexType.DATE) {
            try {
              String f = thisField.getString("dateformat");
              if (f != null) {
                SimpleDateFormat d = new SimpleDateFormat(f);
                dateFormats.add(d);
              }
            } catch (IllegalArgumentException ie) {
              /* invalid date format */
            } catch (JSONException je) {
              /* no specified */
            }
          }
          fieldsToStore.put(name, new FieldOption(luceneName, t,boost,regexp));
          if (regexp == true) {
            if (regexps == null) regexps = new HashMap<Pattern, String>();
            regexps.put(Pattern.compile(name), name);
          }
        }
      }
    } catch (JSONException je) {
      Log("CustomSearch.Configure error parsing configuration string: " + config);
    }
    return;   
  }

 
  public class FieldOption {
    private IndexType type;
    private double boost;
    private String fieldName;
    private boolean isRegexp;
    public FieldOption(String fieldName, IndexType type, float boost) {
      this.setType(type);
      this.setBoost(boost);
      this.setFieldName(fieldName);
      this.setRegexp(false);
    }
    public FieldOption(String fieldName, IndexType type, double boost, boolean regexp) {
      this.setType(type);
      this.setBoost(boost);
      this.setFieldName(fieldName);
      this.setRegexp(regexp);
    }
    public void setType(IndexType type) {
      this.type = type;
    }
    public IndexType getType() {
      return type;
    }
    public void setBoost(double boost) {
      this.boost = boost;
    }
    public double getBoost() {
      return boost;
    }
    public void setFieldName(String fieldName) {
      this.fieldName = fieldName;
    }
    public String getFieldName() {
      return fieldName;
    }
    public void setRegexp(boolean isRegexp) {
      this.isRegexp = isRegexp;
    }
    public boolean isRegexp() {
      return isRegexp;
    }
   
  }

  @Override
  public JSONArray ReReduce(JSONArray reduceResults) {
    // rereduce not implemented
    return new JSONArray();
  }

  @Override
  public JSONArray Reduce(JSONArray mapResults) {
    // reduce not implemented
    return new JSONArray();
  }
 
  public Date parseDate(String input) {
    Date d = null;
    if (dateFormats != null) {
      for (SimpleDateFormat f : dateFormats) {
        try {
          d = f.parse(input);
        } catch (ParseException e) {
          /* wrong format */
        }
      }
    }
    // now try some standard formats
    if (d == null) d = StDateUtils.parseDateString(input);
    return d;
  }
}
TOP

Related Classes of com.cloudant.indexers.MyCustomSearch

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.