Package co.diji.rest

Source Code of co.diji.rest.SolrUpdateHandlerRestAction

package co.diji.rest;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;

import org.apache.commons.codec.binary.Hex;
import org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.WriteConsistencyLevel;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.support.replication.ReplicationType;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.rest.BaseRestHandler;
import org.elasticsearch.rest.RestChannel;
import org.elasticsearch.rest.RestController;
import org.elasticsearch.rest.RestRequest;
import org.elasticsearch.rest.XContentThrowableRestResponse;

import co.diji.solr.SolrResponseWriter;

public class SolrUpdateHandlerRestAction extends BaseRestHandler {

  // content types
  private final String contentTypeFormEncoded = "application/x-www-form-urlencoded";

  // fields in the Solr input document to scan for a document id
  private final String[] idFields = { "id", "docid", "documentid", "contentid", "uuid", "url" };

  // the xml input factory
  private final XMLInputFactory inputFactory = XMLInputFactory.newInstance();

  // the response writer
  private final SolrResponseWriter solrResponseWriter = new SolrResponseWriter();

  /**
   * Rest actions that mock Solr update handlers
   *
   * @param settings ES settings
   * @param client ES client
   * @param restController ES rest controller
   */
  @Inject
  public SolrUpdateHandlerRestAction(Settings settings, Client client, RestController restController) {
    super(settings, client);

    // register update handlers
    // specifying and index and type is optional
    restController.registerHandler(RestRequest.Method.POST, "/_solr/update", this);
    restController.registerHandler(RestRequest.Method.POST, "/_solr/update/{handler}", this);
    restController.registerHandler(RestRequest.Method.POST, "/{index}/_solr/update", this);
    restController.registerHandler(RestRequest.Method.POST, "/{index}/_solr/update/{handler}", this);
    restController.registerHandler(RestRequest.Method.POST, "/{index}/{type}/_solr/update", this);
    restController.registerHandler(RestRequest.Method.POST, "/{index}/{type}/_solr/update/{handler}", this);
  }

  /*
   * (non-Javadoc)
   *
   * @see
   * org.elasticsearch.rest.RestHandler#handleRequest(org.elasticsearch.rest.RestRequest, org.elasticsearch.rest.RestChannel)
   */
  public void handleRequest(final RestRequest request, final RestChannel channel) {
    // Solr will send commits/optimize as encoded form parameters
    // detect this and just send the response without processing
    // we don't need to do commits with ES
    // TODO: support optimize
    if (request.header("Content-Type").contains(contentTypeFormEncoded)) {
      // find the output writer specified
      // it will be inside the content since we have form encoded
      // parameters
      String qstr = request.contentAsString();
      Map<String, String> params = request.params();
      if (params.containsKey("wt")) {
                // output writer already found
            } else if (qstr.contains("wt=javabin")) {
        params.put("wt", "javabin");
      } else if (qstr.contains("wt=xml")) {
        params.put("wt", "xml");
      } else {
        // we have an output writer we don't support yet
        // put junk into wt so sendResponse detects unknown wt
        logger.warn("Unknown wt for commit/optimize");
        params.put("wt", "invalid");
      }

      // send response to Solr
      sendResponse(request, channel);
      return;
    }

    // get the type of Solr update handler we want to mock, default to xml
    final String handler = request.hasParam("handler") ? request.param("handler").toLowerCase() : "xml";

    // Requests are typically sent to Solr in batches of documents
    // We can copy that by submitting batch requests to Solr
    BulkRequest bulkRequest = Requests.bulkRequest();

    // parse and handle the content
    if (handler.equals("xml")) {
      // XML Content
      try {
        // create parser for the content
        XMLStreamReader parser = inputFactory.createXMLStreamReader(new StringReader(request.contentAsString()));

        // parse the xml
        // we only care about doc and delete tags for now
        boolean stop = false;
        while (!stop) {
          // get the xml "event"
          int event = parser.next();
          switch (event) {
          case XMLStreamConstants.END_DOCUMENT:
            // this is the end of the document
            // close parser and exit while loop
            parser.close();
            stop = true;
            break;
          case XMLStreamConstants.START_ELEMENT:
            // start of an xml tag
            // determine if we need to add or delete a document
            String currTag = parser.getLocalName();
            if ("doc".equals(currTag)) {
              // add a document
              Map<String, Object> doc = parseXmlDoc(parser);
              if (doc != null) {
                bulkRequest.add(getIndexRequest(doc, request));
              }
            } else if ("delete".equals(currTag)) {
              // delete a document
              String docid = parseXmlDelete(parser);
              if (docid != null) {
                bulkRequest.add(getDeleteRequest(docid, request));
              }
            }
            break;
          }
        }
      } catch (Exception e) {
        // some sort of error processing the xml input
        try {
          logger.error("Error processing xml input", e);
          channel.sendResponse(new XContentThrowableRestResponse(request, e));
        } catch (IOException e1) {
          logger.error("Failed to send error response", e1);
        }
      }
    } else if (handler.equals("javabin")) {
      // JavaBin Content
      try {
        // We will use the JavaBin codec from solrj
        // unmarshal the input to a SolrUpdate request
        JavaBinUpdateRequestCodec codec = new JavaBinUpdateRequestCodec();
        UpdateRequest req = codec.unmarshal(new ByteArrayInputStream(request.contentByteArray()), null);

        // Get the list of documents to index out of the UpdateRequest
        // Add each document to the bulk request
        // convert the SolrInputDocument into a map which will be used as the ES source field
        List<SolrInputDocument> docs = req.getDocuments();
        if (docs != null) {
          for (SolrInputDocument doc : docs) {
            bulkRequest.add(getIndexRequest(convertToMap(doc), request));
          }
        }

        // See if we have any documents to delete
        // if yes, add them to the bulk request
        if (req.getDeleteById() != null) {
          for (String id : req.getDeleteById()) {
            bulkRequest.add(getDeleteRequest(id, request));
          }
        }
      } catch (Exception e) {
        // some sort of error processing the javabin input
        try {
          logger.error("Error processing javabin input", e);
          channel.sendResponse(new XContentThrowableRestResponse(request, e));
        } catch (IOException e1) {
          logger.error("Failed to send error response", e1);
        }
      }
    }

    // only submit the bulk request if there are index/delete actions
    // it is possible not to have any actions when parsing xml due to the
    // commit and optimize messages that will not generate documents
    if (bulkRequest.numberOfActions() > 0) {
      client.bulk(bulkRequest, new ActionListener<BulkResponse>() {

        // successful bulk request
        public void onResponse(BulkResponse response) {
          logger.info("Bulk request completed");
          for (BulkItemResponse itemResponse : response) {
            if (itemResponse.failed()) {
              logger.error("Index request failed {index:{}, type:{}, id:{}, reason:{}}", itemResponse.index(), itemResponse.type(), itemResponse.id(), itemResponse.failure().message());
            }
          }
        }

        // failed bulk request
        public void onFailure(Throwable e) {
          logger.error("Bulk request failed", e);
        }
      });
    }

    // send dummy response to Solr so the clients don't choke
    sendResponse(request, channel);
  }

  /**
   * Sends a dummy response to the Solr client
   *
   * @param request ES rest request
   * @param channel ES rest channel
   */
  private void sendResponse(RestRequest request, RestChannel channel) {
    // create NamedList with dummy Solr response
    NamedList<Object> solrResponse = new SimpleOrderedMap<Object>();
    NamedList<Object> responseHeader = new SimpleOrderedMap<Object>();
    responseHeader.add("status", 0);
    responseHeader.add("QTime", 5);
    solrResponse.add("responseHeader", responseHeader);

    // send the dummy response
    solrResponseWriter.writeResponse(solrResponse, request, channel);
  }

  /**
   * Generates an ES DeleteRequest object based on the Solr document id
   *
   * @param id the Solr document id
   * @param request the ES rest request
   * @return the ES delete request
   */
  private DeleteRequest getDeleteRequest(String id, RestRequest request) {
    // get the index and type we want to execute this delete request on
    final String index = request.hasParam("index") ? request.param("index") : "solr";
    final String type = request.hasParam("type") ? request.param("type") : "docs";

    // create the delete request object
    DeleteRequest deleteRequest = new DeleteRequest(index, type, getMD5(id));
    deleteRequest.parent(request.param("parent"));

    // TODO: this was causing issues, do we need it?
    // deleteRequest.version(RestActions.parseVersion(request));
    // deleteRequest.versionType(VersionType.fromString(request.param("version_type"),
    // deleteRequest.versionType()));

    deleteRequest.routing(request.param("routing"));

    return deleteRequest;
  }

  /**
   * Converts a SolrInputDocument into an ES IndexRequest
   *
   * @param solrDoc the Solr input document to convert
   * @param request the ES rest request
   * @return the ES index request object
   */
  private IndexRequest getIndexRequest(Map<String, Object> doc, RestRequest request) {
    // get the index and type we want to index the document in
    final String index = request.hasParam("index") ? request.param("index") : "solr";
    final String type = request.hasParam("type") ? request.param("type") : "docs";

    // generate an id for the document
    String id = getIdForDoc(doc);

    // create an IndexRequest for this document
    IndexRequest indexRequest = new IndexRequest(index, type, id);
    indexRequest.routing(request.param("routing"));
    indexRequest.parent(request.param("parent"));
    indexRequest.source(doc);
    indexRequest.timeout(request.paramAsTime("timeout", IndexRequest.DEFAULT_TIMEOUT));
    indexRequest.refresh(request.paramAsBoolean("refresh", indexRequest.refresh()));

    // TODO: this caused issues, do we need it?
    // indexRequest.version(RestActions.parseVersion(request));
    // indexRequest.versionType(VersionType.fromString(request.param("version_type"),
    // indexRequest.versionType()));

    indexRequest.percolate(request.param("percolate", null));
    indexRequest.opType(IndexRequest.OpType.INDEX);

    // TODO: force creation of index, do we need it?
    // indexRequest.create(true);

    String replicationType = request.param("replication");
    if (replicationType != null) {
      indexRequest.replicationType(ReplicationType.fromString(replicationType));
    }

    String consistencyLevel = request.param("consistency");
    if (consistencyLevel != null) {
      indexRequest.consistencyLevel(WriteConsistencyLevel.fromString(consistencyLevel));
    }

    // we just send a response, no need to fork
    indexRequest.listenerThreaded(true);

    // we don't spawn, then fork if local
    indexRequest.operationThreaded(true);

    return indexRequest;
  }

  /**
   * Generates document id. A Solr document id may not be a valid ES id, so we
   * attempt to find the Solr document id and convert it into a valid ES
   * document id. We keep the original Solr id so the document can be found
   * and deleted later if needed.
   *
   * We check for Solr document id's in the following fields: id, docid,
   * documentid, contentid, uuid, url
   *
   * If no id is found, we generate a random one.
   *
   * @param doc the input document
   * @return the generated document id
   */
  private String getIdForDoc(Map<String, Object> doc) {
    // start with a random id
    String id = UUID.randomUUID().toString();

    // scan the input document for an id
    for (String idField : idFields) {
      if (doc.containsKey(idField)) {
        id = doc.get(idField).toString();
        break;
      }
    }

    // always store the id back into the "id" field
    // so we can get it back in results
    doc.put("id", id);
   
    // return the id which is the md5 of either the
    // random uuid or id found in the input document.
    return getMD5(id);
  }

  /**
   * Calculates the md5 hex digest of the given input string
   *
   * @param input the string to md5
   * @return the md5 hex digest
   */
  private String getMD5(String input) {
    String id = "";
    MessageDigest md;
    try {
      md = MessageDigest.getInstance("MD5");
      id = new String(Hex.encodeHex(md.digest(input.getBytes())));
    } catch (NoSuchAlgorithmException e) {
      id = input;
    }

    return id;
  }

  /**
   * Converts a SolrInputDocument into a Map
   *
   * @param doc the SolrInputDocument to convert
   * @return the input document as a map
   */
  private Map<String, Object> convertToMap(SolrInputDocument doc) {
    // create the Map we will put the fields in
    Map<String, Object> newDoc = new HashMap<String, Object>();

    // loop though all the fields and insert them into the map
    Collection<SolrInputField> fields = doc.values();
    if (fields != null) {
      for (SolrInputField field : fields) {
        newDoc.put(field.getName(), field.getValue());
      }
    }

    return newDoc;
  }

  /**
   * Reads a SolrXML document into a map of fields
   *
   * @param parser the xml parser
   * @return the document as a map
   * @throws XMLStreamException
   */
  private Map<String, Object> parseXmlDoc(XMLStreamReader parser) throws XMLStreamException {
    Map<String, Object> doc = new HashMap<String, Object>();
    StringBuilder buf = new StringBuilder();
    String name = null;
    boolean stop = false;
    // infinite loop until we are done parsing the document or an error occurs
    while (!stop) {
      int event = parser.next();
      switch (event) {
      case XMLStreamConstants.START_ELEMENT:
        buf.setLength(0);
        String localName = parser.getLocalName();
        // we are looking for field elements only
        if (!"field".equals(localName)) {
          logger.warn("unexpected xml tag /doc/" + localName);
          doc = null;
          stop = true;
        }

        // get the name attribute of the field
        String attrName = "";
        String attrVal = "";
        for (int i = 0; i < parser.getAttributeCount(); i++) {
          attrName = parser.getAttributeLocalName(i);
          attrVal = parser.getAttributeValue(i);
          if ("name".equals(attrName)) {
            name = attrVal;
          }
        }
        break;
      case XMLStreamConstants.END_ELEMENT:
        if ("doc".equals(parser.getLocalName())) {
          // we are done parsing the doc
          // break out of loop
          stop = true;
        } else if ("field".equals(parser.getLocalName())) {
          // put the field value into the map
          // handle multiple values by putting them into a list
          if (doc.containsKey(name) && (doc.get(name) instanceof List)) {
            List<String> vals = (List<String>) doc.get(name);
            vals.add(buf.toString());
            doc.put(name, vals);
          } else if (doc.containsKey(name)) {
            List<String> vals = new ArrayList<String>();
            vals.add((String) doc.get(name));
            vals.add(buf.toString());
            doc.put(name, vals);
          } else {
            doc.put(name, buf.toString());
          }
        }
        break;
      case XMLStreamConstants.SPACE:
      case XMLStreamConstants.CDATA:
      case XMLStreamConstants.CHARACTERS:
        // save all text data
        buf.append(parser.getText());
        break;
      }
    }

    // return the parsed doc
    return doc;
  }

  /**
   * Parse the document id out of the SolrXML delete command
   *
   * @param parser the xml parser
   * @return the document id to delete
   * @throws XMLStreamException
   */
  private String parseXmlDelete(XMLStreamReader parser) throws XMLStreamException {
    String docid = null;
    StringBuilder buf = new StringBuilder();
    boolean stop = false;
    // infinite loop until we get docid or error
    while (!stop) {
      int event = parser.next();
      switch (event) {
      case XMLStreamConstants.START_ELEMENT:
        // we just want the id node
        String mode = parser.getLocalName();
        if (!"id".equals(mode)) {
          logger.warn("unexpected xml tag /delete/" + mode);
          stop = true;
        }
        buf.setLength(0);
        break;
      case XMLStreamConstants.END_ELEMENT:
        String currTag = parser.getLocalName();
        if ("id".equals(currTag)) {
          // we found the id
          docid = buf.toString();
        } else if ("delete".equals(currTag)) {
          // done parsing, exit loop
          stop = true;
        } else {
          logger.warn("unexpected xml tag /delete/" + currTag);
        }
        break;
      case XMLStreamConstants.SPACE:
      case XMLStreamConstants.CDATA:
      case XMLStreamConstants.CHARACTERS:
        // save all text data (this is the id)
        buf.append(parser.getText());
        break;
      }
    }

    // return the extracted docid
    return docid;
  }
}
TOP

Related Classes of co.diji.rest.SolrUpdateHandlerRestAction

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.