Package com.github.le11.nls.solr

Source Code of com.github.le11.nls.solr.UIMAAsyncUpdateRequestProcessor

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.github.le11.nls.solr;

import com.github.le11.nls.lucene.UIMAAnalyzersUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.uima.processor.SolrUIMAConfiguration;
import org.apache.solr.uima.processor.UIMAToSolrMapper;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.uima.jcas.JCas;

import java.io.IOException;
import java.io.StringReader;
import java.util.Map;

/**
* @author tommaso
*/
public class UIMAAsyncUpdateRequestProcessor extends UpdateRequestProcessor {

  public UIMAAsyncUpdateRequestProcessor(UpdateRequestProcessor next, SolrCore solrCore, SolrUIMAConfiguration config) {
    super(next);
    initialize(solrCore, config);
  }

  SolrUIMAConfiguration solrUIMAConfiguration;

  private SolrCore solrCore;


  private void initialize(SolrCore solrCore, SolrUIMAConfiguration config) {
    this.solrCore = solrCore;
    solrUIMAConfiguration = config;
  }

  @Override
  public void processAdd(AddUpdateCommand cmd) throws IOException {
    String text = null;
    try {
      /* get Solr document */
      SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();

      /* get the fields to analyze */
      String[] texts = getTextsToAnalyze(solrInputDocument);
      for (int i = 0; i < texts.length; i++) {
        text = texts[i];
        if (text != null && text.length() > 0) {
          /* process the text value */
          JCas jcas = UIMAAnalyzersUtils.getInstance().analyzeAsynchronously(new StringReader(text),
                  solrUIMAConfiguration.getAePath()).getJCas();

          UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas);
          /* get field mapping from config */
          /* map type features on fields */
          for (String typeFQN : solrUIMAConfiguration.getTypesFeaturesFieldsMapping().keySet()) {
            uimaToSolrMapper.map(typeFQN, solrUIMAConfiguration.getTypesFeaturesFieldsMapping().get(typeFQN));
          }
        }
      }
    } catch (Exception e) {
      String logField = solrUIMAConfiguration.getLogField();
      if (logField == null) {
        SchemaField uniqueKeyField = solrCore.getSchema().getUniqueKeyField();
        if (uniqueKeyField != null) {
          logField = uniqueKeyField.getName();
        }
      }
      String optionalFieldInfo = logField == null ? "." :
              new StringBuilder(". ").append(logField).append("=")
                      .append((String) cmd.getSolrInputDocument().getField(logField).getValue())
                      .append(", ").toString();
      int len = Math.min(text.length(), 100);
      if (solrUIMAConfiguration.isIgnoreErrors()) {
        log.warn(new StringBuilder("skip the text processing due to ")
                .append(e.getLocalizedMessage()).append(optionalFieldInfo)
                .append(" text=\"").append(text.substring(0, len)).append("...\"").toString());
      } else {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                new StringBuilder("processing error: ")
                        .append(e.getLocalizedMessage()).append(optionalFieldInfo)
                        .append(" text=\"").append(text.substring(0, len)).append("...\"").toString(), e);
      }
    }
    super.processAdd(cmd);
  }

  /*
   * get the texts to analyze from the corresponding fields
   */
  private String[] getTextsToAnalyze(SolrInputDocument solrInputDocument) {
    String[] fieldsToAnalyze = solrUIMAConfiguration.getFieldsToAnalyze();
    boolean merge = solrUIMAConfiguration.isFieldsMerging();
    String[] textVals;
    if (merge) {
      StringBuilder unifiedText = new StringBuilder("");
      for (int i = 0; i < fieldsToAnalyze.length; i++) {
        unifiedText.append(String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i])));
      }
      textVals = new String[1];
      textVals[0] = unifiedText.toString();
    } else {
      textVals = new String[fieldsToAnalyze.length];
      for (int i = 0; i < fieldsToAnalyze.length; i++) {
        textVals[i] = String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i]));
      }
    }
    return textVals;
  }

}
TOP

Related Classes of com.github.le11.nls.solr.UIMAAsyncUpdateRequestProcessor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.