Package fr.univnantes.lina.uima.dictionaryAnnotator

Source Code of fr.univnantes.lina.uima.dictionaryAnnotator.DictionaryAnnotatorAE$Branch

/**
* UIMA dictionary annotator
* Copyright (C) 2011  Nicolas Hernandez
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*     http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package fr.univnantes.lina.uima.dictionaryAnnotator;


import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.AnalysisComponent;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;

import fr.univnantes.lina.uima.common.AnnotationCollectionUtils;
import fr.univnantes.lina.uima.common.AnnotationUtils;
import fr.univnantes.lina.uima.common.CommonAE;
import fr.univnantes.lina.uima.common.FeatureUtils;
import fr.univnantes.lina.uima.common.ResourceUtils;
import fr.univnantes.lina.uima.dataModels.PrefixTree;
import fr.univnantes.lina.uima.resources.CSVDictionaryResource;

/**
* Annotate text offsets corresponding to matched expressions at this offsets.
* The expressions come from a dictionary
* It is possible to search any feature value
* loading the resource (a resource can alternatively be loaded either by a resource declaration and bounding or via a parameter setting and consequently a call to a doLoad method
* parsing the resource,
* building the prefix tree from a specific part of each entry,
* recognizing the entries of the resource in a view,
* removing covered annotations
*
* @author rocheteau,
* @author revised by hernandez to get more abstraction (commonAE), handle csv resource file, bug correction (last char to tokenize for simple and compound words, char to int (unicode))
*/
public class DictionaryAnnotatorAE extends CommonAE {

  /*
   * PARAMETERS NAMES
   */

  /**
   * Parameter name for the name of the annotation type to create for each line of the csv files
   */
  public static final String LINE_TO_ANNOTATION_PARAM = "LineToAnnotation";

  /**
   * Parameter name for the csv column id and the feature names to
   * associate with from the OutputAnnotation
   */
  public static final String PARAM_COLUMNS_TO_FEATURES = "ColumnsToFeatures";

  /**
   * Parameter name for associating the CSV column values to a single list feature
   * It is an alternative or a complementary approach to the ColumnsToFeatures
   */
  public static final String PARAM_FEATURES_TO_A_LIST_FEATURE = "ColumnsToAListFeature";

  /**
   * Parameter name of the CSV separator
   * by default a tabulation, '\t', but can be set to comma ',' ...
   */
  public static final String CSV_SEPARATOR_PARAM = "CSVSeparator";

  /**
   * Parameter name of the feature path to search
   */
  public static final String FEATUREPATH_TO_SEARCH_PARAM = "FeaturePathToSearch";

  /**
   * Parameter name of the feature path to search
   */
  public static final String EXACT_MATCH_PARAM = "ExactMatch";

  /**
   * Parameter name of AnnotateAnnotationWhereFeaturePathBelongs (see the descriptor)
   */
  public static final String ANNOTATE_ANNOTATION_AND_NOT_FEATURE_VALUE_PARAM = "AnnotateAnnotationWhereFeaturePathBelongs" ;

  /*
   * PARAMETERS DEFAULT VALUES
   */

  /**
   * Name of the default separator between columnId and the feature name associated
   */
  private static String DEFAULT_COLUMNID_AND_FEATURENAME_SEPARATOR = "->";

  /**
   * Name of the default csv separator
   */
  private static String DEFAULT_CSV_SEPARATOR = "\t";

  /**
   * Name of the default feature path to search
   */
  private static String DEFAULT_FEATUREPATH_TO_SEARCH = "uima.tcas.DocumentAnnotation:coveredText";
 
  private static Boolean DEFAULT_EXACT_MATCH_VALUE = false;

  /**
   * Name of the default for AnnotateAnnotationWhereFeaturePathBelongs
   */
  private static Boolean DEFAULT_ANNOTATE_ANNOTATION_AND_NOT_FEATURE_VALUE  = false;

  /*
   * LOCAL VARIABLES 
   */
  private CSVDictionaryResource resource;
  /**
   * Name of the annotation type to create for each line of the csv files
   */
  private String annotationToProcessString = null;

  /**
   * list couples of csv column id and feature names to associate with from the OutputAnnotation
   * the syntax is
   * 0 -> begin
   * if the values of the first column should be set to the begin feature of the OutputAnnotation
   */
  private HashMap<Integer,String> colIdFeatNameHashMap = null;

  /**
   *
   */
  private String arrayFeatureName = null;


    private Boolean debug = true;

  private String csvSeparatorString = "";

  /**
   * Feature path to search
   */
  private String annotationToSearch = "";
  private String featureToSearch = "";

  private Boolean exactMatchBoolean = false;

  private Boolean annotateAnnotationWhereFeaturePathBelongs = false;

  /*
   * GETTER and SETTER
   */
  private PrefixTree getRootNode() {
    return this.resource.getRoot();
  }




  /**
   * Get the values of the configuration parameters
   *
   * @see AnalysisComponent#initialize(UimaContext)
   */
  @Override
  public void initialize(UimaContext aContext) throws ResourceInitializationException {           

    // SUPER PARAMATERS
    super.initialize(aContext);

    // CURRENT PARAMETERS

    // Get the input csv separator
    csvSeparatorString  = (String) aContext.getConfigParameterValue(CSV_SEPARATOR_PARAM);
    if (csvSeparatorString == null) {
      csvSeparatorString = DEFAULT_CSV_SEPARATOR;
    }

    // Get the annotation name
    annotationToProcessString = (String) aContext.getConfigParameterValue(LINE_TO_ANNOTATION_PARAM)
    //annotationFromLineString = (String) aContext.getConfigParameterValue(PARAM_NAME_OUTPUT_ANNOTATION);   
    //if (debug) System.out.println("Warning: annotation to create/update : "+ annotationToProcessString ); 


    // Get the features name
    String[] featuresFromColumnsStringArray =  (String[]) aContext.getConfigParameterValue(PARAM_COLUMNS_TO_FEATURES);
    colIdFeatNameHashMap = new HashMap<Integer, String>();
    if (featuresFromColumnsStringArray != null)
      for (int i = 0;  i < featuresFromColumnsStringArray.length ; i++) {
        String[] ColIdFeatNameCouple = featuresFromColumnsStringArray[i].split(DEFAULT_COLUMNID_AND_FEATURENAME_SEPARATOR);
        if (ColIdFeatNameCouple.length == 2) colIdFeatNameHashMap.put(Integer.parseInt(ColIdFeatNameCouple[0].trim()), ColIdFeatNameCouple[1].trim());
        else {
          System.err.println("Warning: Wrong syntax to the "+PARAM_COLUMNS_TO_FEATURES+" paramater with the following line "+ ColIdFeatNameCouple.toString() )
          //String errorMsg =  "Warning: Wrong syntax to the "+PARAM_FEATURES_TO_COLUMNS+" paramater with the following line "+ ColIdFeatNameCouple.toString();
          //throw new AnnotatorInitializationException(errorMsg,new Object[]{errorMsg});
        }
      }
    //    for (int i = 0;  i < featuresToColumnsStringArray.length ; i++) {
    //      System.out.println("Debug: fc"+ featuresToColumnsStringArray[i] + " i"+ i);   
    //    } 

    //
    arrayFeatureName = (String) aContext.getConfigParameterValue(PARAM_FEATURES_TO_A_LIST_FEATURE)

    exactMatchBoolean = (Boolean) aContext.getConfigParameterValue(EXACT_MATCH_PARAM)
    if (exactMatchBoolean == null) exactMatchBoolean = DEFAULT_EXACT_MATCH_VALUE;

    annotateAnnotationWhereFeaturePathBelongs = (Boolean) aContext.getConfigParameterValue(ANNOTATE_ANNOTATION_AND_NOT_FEATURE_VALUE_PARAM)
    if (annotateAnnotationWhereFeaturePathBelongs == null) annotateAnnotationWhereFeaturePathBelongs = DEFAULT_ANNOTATE_ANNOTATION_AND_NOT_FEATURE_VALUE;


    // Get the featurepath to search
    String featurePathToSearch = (String) aContext.getConfigParameterValue(FEATUREPATH_TO_SEARCH_PARAM)
    if (featurePathToSearch == null) featurePathToSearch = DEFAULT_FEATUREPATH_TO_SEARCH;

    annotationToSearch =  featurePathToSearch.substring(0,featurePathToSearch.lastIndexOf(":"));
    featureToSearch =  featurePathToSearch.substring(featurePathToSearch.lastIndexOf(":")+1,featurePathToSearch.length());
    //System.out.println("Debug: annotationTypeToSearch>"+annotationToSearch+ "< featureToSearch>"+featureToSearch+ "<" );

    //
    this.resource = new CSVDictionaryResource();
    ResourceUtils.loadAResource( this.resource,  aContext,  "DictionaryResource", "DictionaryResourceFile");
  }

  /**
   * The actual process
   *
   * @return the created text to be the dataString of the future created outputView
   */
  @Override
  protected String processContextAnnotation(JCas inputViewJCas,
      FSIterator contextAnnotationsFSIter, Annotation contextAnnotation,
      FSIterator contextualizedInputAnnotationsFSIter,
      String inputFeatureString, JCas outputViewJCas,
      String outputAnnotationString, String ouputFeatureString)
          throws AnalysisEngineProcessException {

    System.out.println("Debug: inputViewJCas.getViewName() "+inputViewJCas.getViewName());

   
    FSIterator subContextAnnotationIterator = AnnotationCollectionUtils.subiterator(
        inputViewJCas, contextAnnotation);

    Boolean atLeastOneAnnotationToSearchFoundAmoungTheSubContextAnnotation = false;
    // for each annotation to search under the context annotation
    while (subContextAnnotationIterator.hasNext()) {
      Annotation aSubAnnotation = (Annotation) subContextAnnotationIterator
          .next();

      System.out.println("Debug: annotationToSearch "+annotationToSearch);
      System.out.println("Debug: aSubAnnotation.getClass().getName() "+aSubAnnotation.getClass().getName());

      //System.out.println("Debug: annotationToSearch.getClass().getName() "+annotationToSearch.getClass().getName()+" wordTypeName"+ wordTypeName);
      if (aSubAnnotation.getClass().getName().equalsIgnoreCase(annotationToSearch)) {
        atLeastOneAnnotationToSearchFoundAmoungTheSubContextAnnotation = true;

        System.out.println("Debug: if (aSubAnnotation.getClass().getName().equalsIgnoreCase(annotationToSearch)) {");

        System.out.println("Debug: anAnnotationToSearch.getBegin()>"+aSubAnnotation.getBegin()+ "< anAnnotationToSearch.getEnd()>"+aSubAnnotation.getEnd()+ "<" );
        //
        String featurePathValue = (String) FeatureUtils.getFeatureValue(aSubAnnotation, featureToSearch); // contextAnnotation.getCoveredText();

        // dictionary word recognizer
        this.recognize(inputViewJCas,this.getRootNode(),aSubAnnotation.getBegin(), aSubAnnotation.getEnd(), featurePathValue);
      }
    }
    if (!atLeastOneAnnotationToSearchFoundAmoungTheSubContextAnnotation) {
      System.err.println("WARNING: the specified context annotation "+annotationToSearch+" has not be found in the annotation index. Check the "+FEATUREPATH_TO_SEARCH_PARAM+" parameter. By default the process has been performed on the whole document text of the current view");
      this.recognize(inputViewJCas,this.getRootNode(), 0, inputViewJCas.getDocumentText().length(), "coveredText");
      //getView(inputViewJCas.getViewName()).

    }
    // keep both or remove the embedded simple ones
    //if (this.isCleaningRequired()) {
    //  AnnotationCollectionUtils.removeSubsumedAnnotation(inputViewJCas,this.compoundWordAnnotationType, this.simpleWordAnnotationType);
    //}
    return contextAnnotation.getCoveredText();
  }


  /**
   *
   * @param aJCas
   * @param root
   * @param relativeBegin TODO
   * @param relativeEnd TODO
   * @param text
   * @throws AnalysisEngineProcessException
   */
  public void recognize(JCas aJCas, PrefixTree root, int relativeBegin, int relativeEnd, String text) throws AnalysisEngineProcessException{
    Type annotationToCreateType = aJCas.getTypeSystem().getType(annotationToProcessString);

    //
    Map<PrefixTree,Branch> currentExploredBranches = new HashMap<PrefixTree,Branch>();
    if (debug)    System.out.println("Debug: Start a new exploration of the tree with the first char");
    currentExploredBranches.put(root,new Branch(annotationToCreateType,0 + relativeBegin));

    int length = text.length();

    //if exactMatchBoolean create only if size to create = length


    // for each character of the text
    for (int index = 0; index < length; index++) {
      //int index = 0;
      //while (index < length)  {

      // get the current text character
      int currentCodePoint = text.codePointAt(index);

      Map<PrefixTree,Branch> nextExploredBranches = new HashMap<PrefixTree,Branch>();
      //if (debug) System.out.println("Debug: for each currently explored branch");
      for (PrefixTree currentExploredBranch : currentExploredBranches.keySet()) {
        PrefixTree nextExploredBranch = currentExploredBranch.getChild(currentCodePoint);

        //
        Branch currentBranch = currentExploredBranches.get(currentExploredBranch);

        //
        if (currentExploredBranch.isLeaf()) {
          //if (debug) System.out.println("Debug: The current explored branch node is a leaf, so we create a corresponding annotation");
          currentBranch.update((List<List<String>>)currentExploredBranch.getValues());

          // for the matched occurrences which do not start at the beginning of the search text, we have to remove the index size at the end
          if (currentBranch.begin != relativeBegin) currentBranch.end =  currentBranch.end - (currentBranch.begin -relativeBegin);

          if (((exactMatchBoolean) && (currentBranch.end - currentBranch.begin == length)) || (!exactMatchBoolean)) {
           
            if (annotateAnnotationWhereFeaturePathBelongs) {
              currentBranch.begin = relativeBegin;
              currentBranch.end = relativeEnd;
            }
            currentBranch.fire(aJCas);
          }
        }

        //
        if (nextExploredBranch != null) {
          //if (debug) System.out.println("Debug: More following nodes with this char at this point in the tree");
          // update the end
          currentBranch.update(index+1);
          // add to next
          nextExploredBranches.put(nextExploredBranch,currentBranch);
        }
        else {
          //if (debug) System.out.println("Debug: No more following nodes with this char at this point in the tree");
        }

        //
        //if (debug) System.out.println("Debug: Start a new exploration of the tree with the following char");
        nextExploredBranches.put(root,new Branch(annotationToCreateType,index + 1 + relativeBegin ));

      }
      //if (debug) System.out.println("Debug: Reset the current explored nodes with the next which have children with the matched the char");
      currentExploredBranches.clear();
      currentExploredBranches.putAll(nextExploredBranches)
      //if (!exactMatchBoolean)
      //index ++;
      //else index = length;
    }

    //if (debug) System.out.println("Debug: for the last char, may be need to create annotations");
    for (PrefixTree currentExploredBranch : currentExploredBranches.keySet()) {
      //
      Branch currentBranch = currentExploredBranches.get(currentExploredBranch);
      //
      if (currentExploredBranch.isLeaf()) {
        //if (debug) System.out.println("Debug: The current explored branch node is a leaf, so we create a corresponding annotation");
        currentBranch.update((List<List<String>>)currentExploredBranch.getValues());
        //if (exactMatchBoolean) if (currentBranch.begin)

        // for the matched occurrences which do not start at the beginning of the search text, we have to remove the index size at the end
        if (currentBranch.begin != relativeBegin) currentBranch.end =  currentBranch.end - (currentBranch.begin -relativeBegin);

        //System.out.println("Debug: currentBranch.begin>" + currentBranch.begin + "< currentBranch.end>" + currentBranch.end + "< (currentBranch.end - currentBranch.begin)>"+ (currentBranch.end - currentBranch.begin) +"< length>" + length + "< ");
        if (((exactMatchBoolean) && ((currentBranch.end - currentBranch.begin) == length)) || (!exactMatchBoolean))
        {
          if (annotateAnnotationWhereFeaturePathBelongs) {
            currentBranch.begin = relativeBegin;
            currentBranch.end = relativeEnd;
          }
          currentBranch.fire(aJCas);
        }
      }
    }
  }




  /**
   * Temporary  structure to handle the range of text
   * currently matched in the tree
   */
  private class Branch {

    private int begin;
    private int end;
    private Type type;
    private List<List<String>> valuesList;

    public Branch(Type type,int begin) {
      this(type, begin, begin);
    }

    public Branch(Type type,int begin,int end) {
      this.type = type;
      this.begin = begin;
      this.end = end;
    }

    public void update(int end) {
      this.end = this.begin + end ;
    }

    public void update(List<List<String>> valuesList) {
      this.valuesList = valuesList;
    }

    /**
     * Launch the creation of the annotations
     * @param aJCas
     */
    public void fire(JCas aJCas) throws AnalysisEngineProcessException{

      if (this.begin < this.end) {
        //System.out.println("Debug: Creating an annotation " );

        // patch
        //if (this.begin != relativeBegin) this.end =  this.end - (this.begin -relativeBegin);

        // a distinct annotation will be created for each list of values
        // and in any case, even if there is no list of values, an annotation will be created
        if (this.valuesList.isEmpty()) {
          ArrayList<String> currentValues = new ArrayList<String> ();
          this.create(aJCas, currentValues);
        }
        else {
          Iterator<List<String>> values = this.valuesList.iterator();
          while (values.hasNext()) {
            this.create(aJCas, values.next());
          }
        }
      }
      else {
        //System.out.println("Debug: Attempt for creating an annotation but this.begin >= this.end");

      }
    }

    /**
     * Create an annotation
     * (at least a sequence of two nodes including the root node and the final node is a leaf)
     * @param aJCas
     */
    public void create(JCas aJCas, List<String> values) throws AnalysisEngineProcessException {


      // For each considered column in the descriptor file
      // Get the features to set
      HashMap<String, Object> featuresHashMap = new HashMap<String, Object>();

      // Add begin and end features
      featuresHashMap.put("begin", Integer.toString(this.begin));
      featuresHashMap.put("end", Integer.toString(this.end));

      // Add a StringArray feature with all the values
      if (arrayFeatureName != null) {
        //List<String> valuesStringArray = new ArrayList<String>();

        // http://objectmix.com/apache/682381-fslists.html
        // In UIMA There's support for the fixed-length arrays, but not for the variable length lists.
        // in fact the UIMA API gives you all the functionality you need for coding what you need with these structures.

        String[] valuesClassicStringArray = new String[values.size()];
        Iterator<String> valuesIterator = values.iterator();
        int i = 0;
        while (valuesIterator.hasNext()) {
          String valueString = valuesIterator.next();
          //System.out.println("Debug: valuesIterator.next()>"+valueString+"<");
          valuesClassicStringArray[i] = valueString;
          i++;
        }
        //System.out.println("Debug: this.values.size()>"+this.values.size()+"< valuesClassicStringArray.length>"+valuesClassicStringArray.length+"<");
        StringArray valuesStringArray = new StringArray( aJCas, i);
        valuesStringArray.copyFromArray(valuesClassicStringArray, 0, 0, i) ;
        featuresHashMap.put(arrayFeatureName, valuesStringArray);
      }

      //
      Set<Integer> colIdFeatNameKeySet = colIdFeatNameHashMap.keySet();
      Iterator<Integer> colIdFeatNameKeySetIter = colIdFeatNameKeySet.iterator();
      //if (debug) if (this.values.size() != 0) {System.out.println("Debug: the values to set are >"+this.values+"<");  }

      //if (debug)  if (colIdFeatNameKeySetIter.hasNext()) {System.out.println("Debug: some colums have been assigned to some features by parameters");  }
      while (colIdFeatNameKeySetIter.hasNext()){
        int  colId = (Integer) colIdFeatNameKeySetIter.next() -1;
        if (colId < values.size() ) {
          //if (debug)  {System.out.println("Debug: the current temporary expression has a value >"+this.values.get(colId)+"< for that feature>"+colIdFeatNameHashMap.get(colId+1));  }
          featuresHashMap.put(colIdFeatNameHashMap.get(colId +1), values.get(colId));
        }
        else {
          //if (debug)  {System.out.println("Debug: the current temporary expression has not a value for that ");  }
          //if (debug) System.out.println("Warning: The column id "+colId+" of the "+PARAM_FEATURES_TO_COLUMNS+" paramater is out of range of the current resource line"); 
          //String errorMsg =  "Warning: Wrong syntax to the "+PARAM_FEATURES_TO_COLUMNS+" paramater with the following line "+ ColIdFeatNameCouple.toString();
          //throw new AnnotatorInitializationException(errorMsg,new Object[]{errorMsg});
        }
      }

      //if (debug) System.out.println("Debug: annotationFromLineString"+annotationToProcessString); 
      //if (debug) System.out.println("Debug: featuresHashMap"+featuresHashMap); 

      AnnotationUtils.createAnnotation( aJCas,  this.type.getName(), featuresHashMap);
      //

    }

  }



}
TOP

Related Classes of fr.univnantes.lina.uima.dictionaryAnnotator.DictionaryAnnotatorAE$Branch

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.