Package gc.solr.task

Source Code of gc.solr.task.Gptdb2SolrTask

/* See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* Esri Inc. licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gc.solr.task;
import gc.base.sql.SqlQuery;
import gc.base.sql.SqlRowHandler;
import gc.base.task.Task;
import gc.base.task.TaskContext;
import gc.base.task.TaskStats;
import gc.base.util.UuidUtil;
import gc.base.xmltypes.XmlTypes;
import gc.gpt.db.GptCollections;
import gc.gpt.db.GptResource;
import gc.gpt.db.GptResourceXml;
import gc.gpt.db.GptUser;
import gc.solr.publish.DocBuilder;
import gc.solr.publish.DocInfo;
import gc.solr.publish.DocPublisher;
import gc.solr.publish.FieldConstants;

import java.io.IOException;
import java.net.URLEncoder;
import java.sql.Connection;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;

public class Gptdb2SolrTask extends Task implements SqlRowHandler {
 
  /*
   TODO
     - collections deletes GPT_
   */

  private boolean      approvedOnly = true;
  private boolean      docsOnly = true;
  private boolean      emptyAclOnly = false;
  private boolean      checkForDeletes = true;
 
  private DocPublisher docPublisher;
  private String       foreignInstanceId;
  private String       foreignInstanceUrl;
  private SolrServer   queryServer;
  private String       solrCollectionUrl;
  private String       syncType = FieldConstants.Val_Sync_Type_Gptdb2Solr;
  private XmlTypes     xmlTypes;
  private Gptdb2SolrInstance gptdb2SolrInstance;
 
  private Map<String,String> okIds = new HashMap<String,String>();
  private List<String> delIds = new ArrayList<String>();
  private int maxIdsInMap = 1000000;
 
 
  public Gptdb2SolrTask(TaskContext context,
      XmlTypes xmlTypes, Gptdb2SolrInstance gptdb2SolrInstance) {
    super(context);
    this.xmlTypes = xmlTypes;
    this.gptdb2SolrInstance = gptdb2SolrInstance;
    this.foreignInstanceId = this.gptdb2SolrInstance.getGptInstanceId();
    this.foreignInstanceUrl = this.gptdb2SolrInstance.getGptInstanceUrl();
    this.solrCollectionUrl = this.gptdb2SolrInstance.getSolrCollectionUrl();
  }

  @Override
  protected void executeTask() throws Exception {
    TaskContext context = this.getContext();
    Connection con = null;
    try {
      // TODO: dbconnection, gptinstance name, gcinstancename
      queryServer = new HttpSolrServer(solrCollectionUrl);
      docPublisher = new DocPublisher(context,solrCollectionUrl,5000,10,1);
      this.docPublisher.startup();
     
      // TODO remove this
      //deleteDocs();
      //if (true) return;
     
      //okIds = null;
      con = gptdb2SolrInstance.makeSqlConnection();
      GptResource r = new GptResource();
      SqlQuery q = new SqlQuery();
      q.query(context,con,r.getSqlQInfo(),this);
      this.walkSolrDocs();
     
    } finally {
      try {if (con != null) con.close();}
      catch (Exception ef) {ef.printStackTrace();}
      try {
        if (queryServer != null) {
          queryServer.shutdown();
        }
      } finally {
        if (docPublisher != null) {
          try {
            docPublisher.commit();
          } finally {
            docPublisher.shutdown();
          }
        }
      }
    }
  }

  @Override
  public void handleSqlRow(TaskContext context, Connection con,
      ResultSet rs, long rowNum) throws Exception {
    try {
      this._handleSqlRow(context,con,rs,rowNum);
    } catch (Exception e) {
      TaskStats stats = context.getStats();
      String tn = context.getTaskName()+".sync";
      stats.incrementCount(tn+".exceptions");
      System.err.println(e.toString());
      //e.printStackTrace(System.err);
    }
  }
 
  private void _handleSqlRow(TaskContext context, Connection con,
      ResultSet rs, long rowNum) throws Exception {
    TaskStats stats = context.getStats();
    String tn = context.getTaskName()+".sync";
    GptResource resource = new GptResource();
    resource.readFields(rs);

    String[] result = queryDoc(resource);
    String id = result[0];
    String fsMatched = result[1];
   
    if (this.approvedOnly) {
      String s = resource.approvalstatus;
      // handles an earlier problem with publishing "posted" docs
      if ((s != null) && s.equals("posted")) {
        fsMatched = null;
      }
    }
   
    if (fsMatched != null) {
      if ((okIds != null) && (okIds.size() <= this.maxIdsInMap)) {
        okIds.put(id,"");
      } else if (okIds != null) {
        okIds = null;
      }
      stats.incrementCount(tn+".noChange");
    } else {
     
      String s;
      boolean bContinue = true;
      if (bContinue && this.approvedOnly) {
        s = resource.approvalstatus;
        if (s == null) s = "";
        if (!s.equals("approved") && !s.equals("reviewed")) {
          stats.incrementCount(tn+".ignore.notApproved");
          bContinue = false;
        }
      }
      if (bContinue && this.emptyAclOnly) {
        s = resource.acl;
        if (s == null) s = "";
        if (s.trim().length() > 0) {
          stats.incrementCount(tn+".ignore.nonEmptyAcl");
          bContinue = false;
        }
      }
      if (bContinue && this.docsOnly) {
        if (resource.isHarvestingSite) {
          stats.incrementCount(tn+".ignore.harvestingSite");
          bContinue = false;
        }
      }
     
      GptResourceXml resourceXml = new GptResourceXml();
      if (bContinue) {
        resourceXml.querySqlDB(context,con,resource.docuuid);
        s = resourceXml.xml;
        if ((s == null) || (s.length() == 0)) {
          stats.incrementCount(tn+".ignore.noResourceXml");
          bContinue = false;
        }
      }
     
      if (bContinue) {
        if (id == null) {
          stats.incrementCount(tn+".insertRequired");
        } else {
          stats.incrementCount(tn+".updateRequired");
        }
       
        GptUser user = new GptUser();
        user.querySqlDB(context,con,resource.owner);
       
        GptResource parentSite = null;
        if (resource.isHarvestedDocument) {
          parentSite = new GptResource();
          parentSite.querySqlDB(context,con,resource.siteuuid);
        }
               
                GptCollections gptCollections = null;
                if (gptdb2SolrInstance.isGptIncludeCollections()) {
                  gptCollections = new GptCollections();
                  gptCollections.querySqlDB(context,con,resource.docuuid);
                }
 
        SolrInputDocument doc = makeDoc(id,resource,user,resourceXml,parentSite,gptCollections);
        //System.err.println(doc);
        updateDoc(doc);
        stats.incrementCount(tn+".solr.sent");
      }
     
      if (!bContinue && this.checkForDeletes) {
        if (id != null) {
          this.docPublisher.getUpdateServer().deleteById(id);
          stats.incrementCount(tn+".solr.sentForDelete");
        }
      }
    }
  }
 
  private SolrInputDocument makeDoc(String id, GptResource resource, GptUser user,
      GptResourceXml resourceXml, GptResource parentSite, GptCollections collections) throws Exception {
   
    /*
     - Collections? Acls?
     - Parent site info in not within the foreign stamp,
       if changed the item will not be updated
     - Owner needs a realm?
     - errors from ConcurrentUpdateSolrServer?
     - store the XML? link to the XML?
     - gpt fields?
     - tags?
   */
   
    TaskContext context = getContext();
    DocBuilder builder = new DocBuilder();
    DocInfo info = new DocInfo();
    SolrInputDocument doc = new SolrInputDocument();
    String s;
   
    if (id == null) id = UuidUtil.normalizeGptUuid(resource.docuuid);
    info.Id = id;
    info.Id_Table = FieldConstants.Val_Id_Table_DocIndex;
   
    //info.Owner_Dn = user.dn;
    info.Owner_Username = user.username;
   
    String sItemUrl = null;
    s = resource.sourceuri;
    if ((s != null) && (s.startsWith("http:") || s.startsWith("https:") ||
         s.startsWith("ftp:") || s.startsWith("ftps:"))) {
      sItemUrl = resource.sourceuri;
    }
    info.Src_Item_Http_ContentType = null;
    info.Src_Item_Http_ForeignStamp = null;
    info.Src_Item_Http_LastModified = null;
    info.Src_Item_LastModified = resource.updatedate;
    info.Src_Item_Uri = resource.sourceuri;
    info.Src_Item_Url = sItemUrl;
    if (parentSite != null) {
      info.Src_Site_Id = resource.siteuuid;
      //info.Src_Site_Name = parentSite.title;
      info.Src_Site_Protocol = parentSite.protocol_type;
      info.Src_Site_Url = parentSite.host_url;
    }
   
    String fs = makeForeignStamp(resource);
    info.Sync_Foreign_Id = resource.docuuid;
    info.Sync_Foreign_InstanceId = this.foreignInstanceId;
    info.Sync_Foreign_InstanceUrl = this.foreignInstanceUrl;
    info.Sync_Foreign_Stamp = fs;
    info.Sync_Type = this.syncType;
   
    if (!resource.isHarvestingSite) {
      String sMetadataUrl = null;
      s = this.foreignInstanceUrl;
      if ((s != null) && (s.length() > 0) && (!s.contains("?"))) {
        if (!s.endsWith("/")) s += "/";
        sMetadataUrl = s+"rest/document?id="+URLEncoder.encode(resource.docuuid,"UTF-8");
      }
      info.Url_Metadata = sMetadataUrl;
    }
    info.Xml_Metadata = resourceXml.xml;
   
    builder.prepare(context, xmlTypes, doc, info);
    if (!this.approvedOnly) {
      builder.setField(doc,"gpt.doc.approvalstatus_s",resource.approvalstatus);
    }
        if (collections!=null) {
          for (String shortName: collections.getShortNames()) {
            builder.addField(doc,FieldConstants.Sys_Src_Collections,shortName);
            builder.addField(doc,FieldConstants.Sys_Src_Collections_ss,shortName);
          }
        }
    //System.err.println(doc);
   
    if ((okIds != null) && (okIds.size() <= this.maxIdsInMap)) {
      okIds.put(id,"");
    } else if (okIds != null) {
      okIds = null;
    }
   
    return doc;
  }
 
  private String makeForeignStamp(GptResource resource) {
    String acl = resource.acl;
    if (acl == null) acl = "";
    String fs = resource.approvalstatus+
                ("."+resource.owner)+
                ("."+acl)+
                ("."+resource.updatedate.getTime());
    return fs;
  }
 
  private String[] queryDoc(GptResource resource)
      throws SolrServerException, IOException {
    String[] result = new String[]{null,null};
    String fldId = FieldConstants.Id;
    String fldForeignStamp = FieldConstants.Sync_Foreign_Stamp;
   
    /* TODO can same id from different GPTs cause a problem? */
   
    String k = UuidUtil.normalizeGptUuid(resource.docuuid);
    String fs = makeForeignStamp(resource);
   
    String fl = fldId+","+fldForeignStamp;
    String q = fldId+":"+k;
    ModifiableSolrParams params = new ModifiableSolrParams();
    params.set("q",q);
    params.set("fl",fl);
   
    QueryResponse response = queryServer.query(params);
    SolrDocumentList docs = response.getResults();
    if (docs != null) {
      int nDocs = docs.size();
      if (nDocs == 1) {
        SolrDocument doc = docs.get(0);
        String sId = (String)doc.getFieldValue(fldId);
        String sFs = (String)doc.getFieldValue(fldForeignStamp);
        result[0] = sId;
        if (sFs.equals(fs) && !gptdb2SolrInstance.isGptIncludeCollections()) {
          result[1] = "fsMatched";
        }
      } else if (nDocs > 1) {
        // TODO: exception here?, fix the problem?
      }
    }
    return result;
  }
 
  private void updateDoc(SolrInputDocument doc) throws SolrServerException, IOException {
    this.docPublisher.updateDoc(doc);
  }
 
  private void deleteDocs() throws SolrServerException, IOException {
    String q = FieldConstants.Id_Table+":"+FieldConstants.Val_Id_Table_DocIndex;
    q += " AND "+FieldConstants.Sync_Type+":"+this.syncType;
    q += " AND "+FieldConstants.Sync_Foreign_InstanceId+":"+this.foreignInstanceId;
    //q = "*:*";
    this.docPublisher.getUpdateServer().deleteByQuery(q);
    this.docPublisher.commit();
  }
 
  private void walkSolrDocs() throws SolrServerException, IOException {
    if (!checkForDeletes) return;
    if ((okIds == null) || (okIds.size() == 0)) return;
   
    TaskContext context = this.getContext();
    TaskStats stats = context.getStats();
    String tn = context.getTaskName()+".walkSolrDocs";
    stats.setString(tn,"...");
   
    String fl = FieldConstants.Id;
    String q = FieldConstants.Id_Table+":"+FieldConstants.Val_Id_Table_DocIndex;
    q += " AND "+FieldConstants.Sync_Type+":"+this.syncType;
    q += " AND "+FieldConstants.Sync_Foreign_InstanceId+":"+this.foreignInstanceId;
    ModifiableSolrParams params = new ModifiableSolrParams();
    params.set("q",q);
    params.set("fl",fl);
    stats.setString(tn+".q",q);
   
    boolean bContinue = true;
    long nDeepTotal = 0;
    long nFetched = 0;
    long nHits = 0;
    int nDocs = 0;
    long nStart = 0;
    int nRows = 1000;
    long nNextStart = 0;
    while (bContinue) {
      bContinue = false;
      params.set("start",""+nStart);
      params.set("rows",""+nRows);
      QueryResponse response = queryServer.query(params);
      SolrDocumentList docs = response.getResults();
      if (docs != null) {
        nHits = docs.getNumFound();
        nDocs = docs.size();
        nNextStart = nStart+nDocs;
        if ((nDocs > 0) && (nNextStart < nHits)) {
          bContinue = true;
        }
        for (int i=0;i<nDocs;i++) {
          SolrDocument doc = docs.get(i);
          String id = (String)doc.getFieldValue(FieldConstants.Id);
          nFetched++;
          stats.incrementCount(tn+".fetched");
          if (okIds.get(id) != null) {
            stats.incrementCount(tn+".idOk");
          } else {
            stats.incrementCount(tn+".idRequiresDelete");
            if ((delIds != null) && (delIds.size() <= this.maxIdsInMap)) {
              delIds.add(id);
            } else if (delIds != null) {
              delIds = null;
              bContinue = false;
              break;
            }
          }
          if ((nDeepTotal > 0) && (nFetched >= nDeepTotal)) {
            bContinue = false;
            break;
          }
        }
        nStart = nNextStart;
      }
    }
   
    if ((delIds != null) && (delIds.size() > 0)) {
      stats.incrementCount(context.getTaskName()+".solr.sentForDelete",delIds.size());
      this.docPublisher.getUpdateServer().deleteById(delIds);
    }
   
  }

}
TOP

Related Classes of gc.solr.task.Gptdb2SolrTask

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.