Package avrobase.solr

Source Code of avrobase.solr.SolrIndex

package avrobase.solr;

import avrobase.AvroBaseException;
import avrobase.Index;
import avrobase.ReversableFunction;
import avrobase.Row;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.specific.SpecificRecord;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Timer;
import java.util.TimerTask;

/**
* An AvroBase that provides Solr indexing
*
* @author sam, http://github.com/beatty
*/
public class SolrIndex<T extends SpecificRecord, K> implements Index<T, K, SQ> {
  private static final String SCHEMA_LOCATION = "/admin/file/?file=schema.xml";
  private final SolrServer solrServer;
  private final String uniqueKey;
  private final List<String> fields;
  private final ReversableFunction<K, String> keyTx;

  private volatile long lastCommit = System.currentTimeMillis();
  private volatile long lastOptimize = System.currentTimeMillis();
  private static Timer commitTimer = new Timer();
  private static Logger logger = LoggerFactory.getLogger("SolrIndex");

  public SolrIndex(String solrURL, ReversableFunction<K, String> keyTx) {
    this.keyTx = keyTx;
    Preconditions.checkNotNull(solrURL);

    try {
      solrServer = new CommonsHttpSolrServer(solrURL);
      URL url = new URL(solrURL + SCHEMA_LOCATION);
      DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
      DocumentBuilder db = dbf.newDocumentBuilder();
      Document document = db.parse(url.openStream());

      // Need to get the unique key and all the fields
      NodeList uniqueKeys = document.getElementsByTagName("uniqueKey");
      if (uniqueKeys == null || uniqueKeys.getLength() != 1) {
        throw new AvroBaseException("Invalid schema configuration, must have 1 unique key");
      }
      uniqueKey = uniqueKeys.item(0).getTextContent();

      // Now get all the fields we are going to index and query
      NodeList fieldList = document.getElementsByTagName("field");
      fields = new ArrayList<String>(fieldList.getLength());
      for (int i = 0; i < fieldList.getLength(); i++) {
        Node field = fieldList.item(i);
        String name = field.getAttributes().getNamedItem("name").getTextContent();
        fields.add(name);
      }
    } catch (MalformedURLException e) {
      throw new AvroBaseException("Invalid Solr URL: " + solrURL, e);
    } catch (ParserConfigurationException e) {
      throw new AvroBaseException(e);
    } catch (SAXException e) {
      throw new AvroBaseException("Failed to parse schema", e);
    } catch (IOException e) {
      throw new AvroBaseException("Failed to read schema", e);
    }
  }


  /**
   * Remove an id from the index.
   *
   * @param row
   * @throws AvroBaseException
   */
  public void unindex(K row) throws AvroBaseException {
    if (solrServer == null) {
      return;
    }

    // TODO: do we really want to throw an exception on index failure??
    try {
      solrServer.deleteById(keyTx.apply(row));
      solrServer.commit();
    } catch (SolrServerException e) {
      throw new AvroBaseException(e);
    } catch (IOException e) {
      throw new AvroBaseException(e);
    }
  }

  @Override
  public Iterable<K> search(SQ sqh) {
    // TODO: If we haven't indexed since the last update, index now. Once RTS is available we can fix this.
    long current = System.currentTimeMillis();
    if (current - lastCommit < 100) {
      UpdateRequest req = new UpdateRequest();
      req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
      try {
        solrServer.request(req);
        lastCommit = current;
      } catch (Exception e) {
        // Ignore failed commit and continue
      }
    }

    SolrQuery solrQuery = sqh.generateQuery(uniqueKey);
    try {
      QueryResponse queryResponse = solrServer.query(solrQuery, SolrRequest.METHOD.POST);
      SolrDocumentList list = queryResponse.getResults();
      sqh.setTotal(list.getNumFound());
      final Iterator<SolrDocument> solrDocumentIterator = list.iterator();
      return new Iterable<K>() {

        @Override
        public Iterator<K> iterator() {
          return new Iterator<K>() {

            @Override
            public boolean hasNext() {
              return solrDocumentIterator.hasNext();
            }

            @Override
            public K next() {
              SolrDocument solrDocument = solrDocumentIterator.next();
              Map<String, Object> map = solrDocument.getFieldValueMap();
              Object o = map.get(uniqueKey);
              if (o == null) {
                throw new AvroBaseException("Unique key not present in document");
              }

              return keyTx.unapply(o.toString());
            }

            @Override
            public void remove() {
              throw new Error("Not implemented");
            }
          };
        }
      };
    } catch (SolrServerException e) {
      throw new AvroBaseException("Query failure: " + sqh.query, e);
    }
  }

  @Override
  public K lookup(SQ query) {
    final Iterable<K> results = search(query);

    try {
      return Iterables.getOnlyElement(results, null);
    } catch (IllegalArgumentException e) {
      throw new AvroBaseException("Too many results");
    }
  }

  /**
   * Index a row and value.
   *
   * @param row
   * @return
   */
  public void index(Row<T,K> row) throws AvroBaseException {
    T value = row.value;
    Schema schema = value.getSchema();
    SolrInputDocument document = new SolrInputDocument();
    for (String field : fields) {
      addField(value, schema, document, field, field);
    }
    document.addField(uniqueKey, keyTx.apply(row.row));
    try {
      UpdateRequest req = new UpdateRequest();
      long current = System.currentTimeMillis();
      if (current - lastCommit > 100) {
        lastCommit = current;
        req.setAction(AbstractUpdateRequest.ACTION.COMMIT, false, false);
      } else {
        final long oldLastCommit = lastCommit;
        // Here we are basically checking to see in the future whether
        // a commit has been done since the last time we committed.
        commitTimer.schedule(new TimerTask() {
          @Override
          public void run() {
            if (oldLastCommit == lastCommit) {
              lastCommit = System.currentTimeMillis();
              UpdateRequest req = new UpdateRequest();
              req.setAction(AbstractUpdateRequest.ACTION.COMMIT, false, false);
              try {
                solrServer.request(req);
              } catch (Exception e) {
                logger.error("Failed to commit", e);
              }
            }
          }
        }, current + 100);
      }
      if (current - lastOptimize > 3600000) {
        lastOptimize = current;
        req.setAction(AbstractUpdateRequest.ACTION.OPTIMIZE, false, false);
      }
      req.add(document);
      solrServer.request(req);
    } catch (SolrServerException e) {
      throw new AvroBaseException(e);
    } catch (IOException e) {
      throw new AvroBaseException(e);
    }
  }

  private void addField(SpecificRecord value, Schema schema, SolrInputDocument document, String field, String solrfield) {
    int dotindex;
    Schema.Field f;
    while ((dotindex = field.indexOf("_")) != -1) {
      f = schema.getField(field.substring(0, dotindex));
      if (f != null) {
        field = field.substring(dotindex + 1);
        Object o = value.get(f.pos());
        if (o instanceof GenericArray) {
          GenericArray ga = (GenericArray) o;
          for (Object e : ga) {
            if (e instanceof SpecificRecord) {
              SpecificRecord sr = (SpecificRecord) e;
              addField(sr, sr.getSchema(), document, field, solrfield);
            } else {
              throw new AvroBaseException("Invalid field name" + solrfield);
            }
          }
          return;
        }
      } else {
        throw new AvroBaseException("Invalid field name" + solrfield);
      }
    }
    f = schema.getField(field);
    if (f != null) {
      Object o = value.get(f.pos());
      if (o instanceof GenericArray) {
        GenericArray ga = (GenericArray) o;
        for (Object e : ga) {
          if (e instanceof SpecificRecord) {
            SpecificRecord sr = (SpecificRecord) e;
            Schema.Field idField = sr.getSchema().getField("id");
            if (idField != null) {
              document.addField(solrfield, sr.get(idField.pos()));
            }
          } else {
            document.addField(solrfield, e);
          }
        }
      } else {
        document.addField(solrfield, o);
      }
    }
  }
}
TOP

Related Classes of avrobase.solr.SolrIndex

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.