Source Code of fr.univnantes.lina.uima.dictionaryAnnotator.DictionaryAnnotatorAE$Branch

/** 
 * UIMA dictionary annotator
 * Copyright (C) 2011  Nicolas Hernandez
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *     http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package fr.univnantes.lina.uima.dictionaryAnnotator;




import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;


import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.AnalysisComponent;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;


import fr.univnantes.lina.uima.common.AnnotationCollectionUtils;
import fr.univnantes.lina.uima.common.AnnotationUtils;
import fr.univnantes.lina.uima.common.CommonAE;
import fr.univnantes.lina.uima.common.FeatureUtils;
import fr.univnantes.lina.uima.common.ResourceUtils;
import fr.univnantes.lina.uima.dataModels.PrefixTree;
import fr.univnantes.lina.uima.resources.CSVDictionaryResource;


/**
 * Annotate text offsets corresponding to matched expressions at this offsets.
 * The expressions come from a dictionary
 * It is possible to search any feature value
 * loading the resource (a resource can alternatively be loaded either by a resource declaration and bounding or via a parameter setting and consequently a call to a doLoad method
 * parsing the resource, 
 * building the prefix tree from a specific part of each entry, 
 * recognizing the entries of the resource in a view, 
 * removing covered annotations
 * 
 * @author rocheteau, 
 * @author revised by hernandez to get more abstraction (commonAE), handle csv resource file, bug correction (last char to tokenize for simple and compound words, char to int (unicode))
 */
public class DictionaryAnnotatorAE extends CommonAE {


  /*
   * PARAMETERS NAMES
   */


  /**
   * Parameter name for the name of the annotation type to create for each line of the csv files
   */
  public static final String LINE_TO_ANNOTATION_PARAM = "LineToAnnotation";


  /**
   * Parameter name for the csv column id and the feature names to 
   * associate with from the OutputAnnotation
   */
  public static final String PARAM_COLUMNS_TO_FEATURES = "ColumnsToFeatures";


  /**
   * Parameter name for associating the CSV column values to a single list feature 
   * It is an alternative or a complementary approach to the ColumnsToFeatures
   */
  public static final String PARAM_FEATURES_TO_A_LIST_FEATURE = "ColumnsToAListFeature";


  /**
   * Parameter name of the CSV separator 
   * by default a tabulation, '\t', but can be set to comma ',' ... 
   */
  public static final String CSV_SEPARATOR_PARAM = "CSVSeparator";


  /**
   * Parameter name of the feature path to search
   */
  public static final String FEATUREPATH_TO_SEARCH_PARAM = "FeaturePathToSearch";


  /**
   * Parameter name of the feature path to search
   */
  public static final String EXACT_MATCH_PARAM = "ExactMatch";


  /**
   * Parameter name of AnnotateAnnotationWhereFeaturePathBelongs (see the descriptor)
   */
  public static final String ANNOTATE_ANNOTATION_AND_NOT_FEATURE_VALUE_PARAM = "AnnotateAnnotationWhereFeaturePathBelongs" ;


  /*
   * PARAMETERS DEFAULT VALUES
   */


  /**
   * Name of the default separator between columnId and the feature name associated
   */
  private static String DEFAULT_COLUMNID_AND_FEATURENAME_SEPARATOR = "->";


  /**
   * Name of the default csv separator
   */
  private static String DEFAULT_CSV_SEPARATOR = "\t";


  /**
   * Name of the default feature path to search
   */
  private static String DEFAULT_FEATUREPATH_TO_SEARCH = "uima.tcas.DocumentAnnotation:coveredText";
  
  private static Boolean DEFAULT_EXACT_MATCH_VALUE = false;


  /**
   * Name of the default for AnnotateAnnotationWhereFeaturePathBelongs
   */
  private static Boolean DEFAULT_ANNOTATE_ANNOTATION_AND_NOT_FEATURE_VALUE  = false;


  /*
   * LOCAL VARIABLES  
   */
  private CSVDictionaryResource resource;
  /**
   * Name of the annotation type to create for each line of the csv files
   */
  private String annotationToProcessString = null;


  /**
   * list couples of csv column id and feature names to associate with from the OutputAnnotation 
   * the syntax is 
   * 0 -> begin
   * if the values of the first column should be set to the begin feature of the OutputAnnotation
   */
  private HashMap<Integer,String> colIdFeatNameHashMap = null; 


  /**
   * 
   */
  private String arrayFeatureName = null;




    private Boolean debug = true;


  private String csvSeparatorString = "";


  /**
   * Feature path to search
   */
  private String annotationToSearch = "";
  private String featureToSearch = "";


  private Boolean exactMatchBoolean = false;


  private Boolean annotateAnnotationWhereFeaturePathBelongs = false;


  /*
   * GETTER and SETTER
   */
  private PrefixTree getRootNode() {
    return this.resource.getRoot();
  }








  /**
   * Get the values of the configuration parameters
   * 
   * @see AnalysisComponent#initialize(UimaContext)
   */
  @Override
  public void initialize(UimaContext aContext) throws ResourceInitializationException {            


    // SUPER PARAMATERS
    super.initialize(aContext);


    // CURRENT PARAMETERS


    // Get the input csv separator
    csvSeparatorString  = (String) aContext.getConfigParameterValue(CSV_SEPARATOR_PARAM);
    if (csvSeparatorString == null) {
      csvSeparatorString = DEFAULT_CSV_SEPARATOR;
    }


    // Get the annotation name 
    annotationToProcessString = (String) aContext.getConfigParameterValue(LINE_TO_ANNOTATION_PARAM);  
    //annotationFromLineString = (String) aContext.getConfigParameterValue(PARAM_NAME_OUTPUT_ANNOTATION);    
    //if (debug) System.out.println("Warning: annotation to create/update : "+ annotationToProcessString );  




    // Get the features name
    String[] featuresFromColumnsStringArray =  (String[]) aContext.getConfigParameterValue(PARAM_COLUMNS_TO_FEATURES);
    colIdFeatNameHashMap = new HashMap<Integer, String>();
    if (featuresFromColumnsStringArray != null)
      for (int i = 0;  i < featuresFromColumnsStringArray.length ; i++) {
        String[] ColIdFeatNameCouple = featuresFromColumnsStringArray[i].split(DEFAULT_COLUMNID_AND_FEATURENAME_SEPARATOR);
        if (ColIdFeatNameCouple.length == 2) colIdFeatNameHashMap.put(Integer.parseInt(ColIdFeatNameCouple[0].trim()), ColIdFeatNameCouple[1].trim());
        else {
          System.err.println("Warning: Wrong syntax to the "+PARAM_COLUMNS_TO_FEATURES+" paramater with the following line "+ ColIdFeatNameCouple.toString() );  
          //String errorMsg =  "Warning: Wrong syntax to the "+PARAM_FEATURES_TO_COLUMNS+" paramater with the following line "+ ColIdFeatNameCouple.toString();
          //throw new AnnotatorInitializationException(errorMsg,new Object[]{errorMsg}); 
        }
      }
    //    for (int i = 0;  i < featuresToColumnsStringArray.length ; i++) {
    //      System.out.println("Debug: fc"+ featuresToColumnsStringArray[i] + " i"+ i);    
    //    }  


    //
    arrayFeatureName = (String) aContext.getConfigParameterValue(PARAM_FEATURES_TO_A_LIST_FEATURE);  


    exactMatchBoolean = (Boolean) aContext.getConfigParameterValue(EXACT_MATCH_PARAM);  
    if (exactMatchBoolean == null) exactMatchBoolean = DEFAULT_EXACT_MATCH_VALUE;


    annotateAnnotationWhereFeaturePathBelongs = (Boolean) aContext.getConfigParameterValue(ANNOTATE_ANNOTATION_AND_NOT_FEATURE_VALUE_PARAM);  
    if (annotateAnnotationWhereFeaturePathBelongs == null) annotateAnnotationWhereFeaturePathBelongs = DEFAULT_ANNOTATE_ANNOTATION_AND_NOT_FEATURE_VALUE;




    // Get the featurepath to search 
    String featurePathToSearch = (String) aContext.getConfigParameterValue(FEATUREPATH_TO_SEARCH_PARAM);  
    if (featurePathToSearch == null) featurePathToSearch = DEFAULT_FEATUREPATH_TO_SEARCH;


    annotationToSearch =  featurePathToSearch.substring(0,featurePathToSearch.lastIndexOf(":"));
    featureToSearch =  featurePathToSearch.substring(featurePathToSearch.lastIndexOf(":")+1,featurePathToSearch.length());
    //System.out.println("Debug: annotationTypeToSearch>"+annotationToSearch+ "< featureToSearch>"+featureToSearch+ "<" );


    //
    this.resource = new CSVDictionaryResource();
    ResourceUtils.loadAResource( this.resource,  aContext,  "DictionaryResource", "DictionaryResourceFile");
  }


  /**
   * The actual process
   * 
   * @return the created text to be the dataString of the future created outputView
   */
  @Override
  protected String processContextAnnotation(JCas inputViewJCas,
      FSIterator contextAnnotationsFSIter, Annotation contextAnnotation,
      FSIterator contextualizedInputAnnotationsFSIter,
      String inputFeatureString, JCas outputViewJCas,
      String outputAnnotationString, String ouputFeatureString)
          throws AnalysisEngineProcessException {


    System.out.println("Debug: inputViewJCas.getViewName() "+inputViewJCas.getViewName());


    
    FSIterator subContextAnnotationIterator = AnnotationCollectionUtils.subiterator(
        inputViewJCas, contextAnnotation);


    Boolean atLeastOneAnnotationToSearchFoundAmoungTheSubContextAnnotation = false;
    // for each annotation to search under the context annotation
    while (subContextAnnotationIterator.hasNext()) {
      Annotation aSubAnnotation = (Annotation) subContextAnnotationIterator
          .next();


      System.out.println("Debug: annotationToSearch "+annotationToSearch);
      System.out.println("Debug: aSubAnnotation.getClass().getName() "+aSubAnnotation.getClass().getName());


      //System.out.println("Debug: annotationToSearch.getClass().getName() "+annotationToSearch.getClass().getName()+" wordTypeName"+ wordTypeName);
      if (aSubAnnotation.getClass().getName().equalsIgnoreCase(annotationToSearch)) {
        atLeastOneAnnotationToSearchFoundAmoungTheSubContextAnnotation = true;


        System.out.println("Debug: if (aSubAnnotation.getClass().getName().equalsIgnoreCase(annotationToSearch)) {");


        System.out.println("Debug: anAnnotationToSearch.getBegin()>"+aSubAnnotation.getBegin()+ "< anAnnotationToSearch.getEnd()>"+aSubAnnotation.getEnd()+ "<" );
        //
        String featurePathValue = (String) FeatureUtils.getFeatureValue(aSubAnnotation, featureToSearch); // contextAnnotation.getCoveredText();


        // dictionary word recognizer
        this.recognize(inputViewJCas,this.getRootNode(),aSubAnnotation.getBegin(), aSubAnnotation.getEnd(), featurePathValue);
      }
    }
    if (!atLeastOneAnnotationToSearchFoundAmoungTheSubContextAnnotation) {
      System.err.println("WARNING: the specified context annotation "+annotationToSearch+" has not be found in the annotation index. Check the "+FEATUREPATH_TO_SEARCH_PARAM+" parameter. By default the process has been performed on the whole document text of the current view");
      this.recognize(inputViewJCas,this.getRootNode(), 0, inputViewJCas.getDocumentText().length(), "coveredText");
      //getView(inputViewJCas.getViewName()).


    }
    // keep both or remove the embedded simple ones
    //if (this.isCleaningRequired()) {
    //  AnnotationCollectionUtils.removeSubsumedAnnotation(inputViewJCas,this.compoundWordAnnotationType, this.simpleWordAnnotationType);
    //}
    return contextAnnotation.getCoveredText();
  }




  /**
   * 
   * @param aJCas
   * @param root
   * @param relativeBegin TODO
   * @param relativeEnd TODO
   * @param text
   * @throws AnalysisEngineProcessException
   */
  public void recognize(JCas aJCas, PrefixTree root, int relativeBegin, int relativeEnd, String text) throws AnalysisEngineProcessException{
    Type annotationToCreateType = aJCas.getTypeSystem().getType(annotationToProcessString);


    //
    Map<PrefixTree,Branch> currentExploredBranches = new HashMap<PrefixTree,Branch>();
    if (debug)    System.out.println("Debug: Start a new exploration of the tree with the first char");
    currentExploredBranches.put(root,new Branch(annotationToCreateType,0 + relativeBegin));


    int length = text.length();


    //if exactMatchBoolean create only if size to create = length




    // for each character of the text
    for (int index = 0; index < length; index++) {
      //int index = 0; 
      //while (index < length)  {


      // get the current text character
      int currentCodePoint = text.codePointAt(index);


      Map<PrefixTree,Branch> nextExploredBranches = new HashMap<PrefixTree,Branch>();
      //if (debug) System.out.println("Debug: for each currently explored branch");
      for (PrefixTree currentExploredBranch : currentExploredBranches.keySet()) {
        PrefixTree nextExploredBranch = currentExploredBranch.getChild(currentCodePoint);


        // 
        Branch currentBranch = currentExploredBranches.get(currentExploredBranch);


        // 
        if (currentExploredBranch.isLeaf()) {
          //if (debug) System.out.println("Debug: The current explored branch node is a leaf, so we create a corresponding annotation");
          currentBranch.update((List<List<String>>)currentExploredBranch.getValues());


          // for the matched occurrences which do not start at the beginning of the search text, we have to remove the index size at the end
          if (currentBranch.begin != relativeBegin) currentBranch.end =  currentBranch.end - (currentBranch.begin -relativeBegin);


          if (((exactMatchBoolean) && (currentBranch.end - currentBranch.begin == length)) || (!exactMatchBoolean)) {
            
            if (annotateAnnotationWhereFeaturePathBelongs) {
              currentBranch.begin = relativeBegin;
              currentBranch.end = relativeEnd;
            }
            currentBranch.fire(aJCas);
          }
        }


        //
        if (nextExploredBranch != null) {
          //if (debug) System.out.println("Debug: More following nodes with this char at this point in the tree");
          // update the end
          currentBranch.update(index+1);
          // add to next
          nextExploredBranches.put(nextExploredBranch,currentBranch);
        }
        else {
          //if (debug) System.out.println("Debug: No more following nodes with this char at this point in the tree");
        }


        //
        //if (debug) System.out.println("Debug: Start a new exploration of the tree with the following char");
        nextExploredBranches.put(root,new Branch(annotationToCreateType,index + 1 + relativeBegin ));


      }
      //if (debug) System.out.println("Debug: Reset the current explored nodes with the next which have children with the matched the char");
      currentExploredBranches.clear();
      currentExploredBranches.putAll(nextExploredBranches);  
      //if (!exactMatchBoolean) 
      //index ++; 
      //else index = length;
    }


    //if (debug) System.out.println("Debug: for the last char, may be need to create annotations");
    for (PrefixTree currentExploredBranch : currentExploredBranches.keySet()) {
      // 
      Branch currentBranch = currentExploredBranches.get(currentExploredBranch);
      // 
      if (currentExploredBranch.isLeaf()) {
        //if (debug) System.out.println("Debug: The current explored branch node is a leaf, so we create a corresponding annotation");
        currentBranch.update((List<List<String>>)currentExploredBranch.getValues());
        //if (exactMatchBoolean) if (currentBranch.begin) 


        // for the matched occurrences which do not start at the beginning of the search text, we have to remove the index size at the end
        if (currentBranch.begin != relativeBegin) currentBranch.end =  currentBranch.end - (currentBranch.begin -relativeBegin);


        //System.out.println("Debug: currentBranch.begin>" + currentBranch.begin + "< currentBranch.end>" + currentBranch.end + "< (currentBranch.end - currentBranch.begin)>"+ (currentBranch.end - currentBranch.begin) +"< length>" + length + "< ");
        if (((exactMatchBoolean) && ((currentBranch.end - currentBranch.begin) == length)) || (!exactMatchBoolean))
        {
          if (annotateAnnotationWhereFeaturePathBelongs) {
            currentBranch.begin = relativeBegin;
            currentBranch.end = relativeEnd;
          }
          currentBranch.fire(aJCas);
        }
      }
    }
  }








  /**
   * Temporary  structure to handle the range of text 
   * currently matched in the tree
   */
  private class Branch {


    private int begin;
    private int end;
    private Type type;
    private List<List<String>> valuesList; 


    public Branch(Type type,int begin) {
      this(type, begin, begin);
    }


    public Branch(Type type,int begin,int end) {
      this.type = type;
      this.begin = begin;
      this.end = end;
    }


    public void update(int end) {
      this.end = this.begin + end ;
    }


    public void update(List<List<String>> valuesList) {
      this.valuesList = valuesList;
    }


    /**
     * Launch the creation of the annotations 
     * @param aJCas
     */
    public void fire(JCas aJCas) throws AnalysisEngineProcessException{


      if (this.begin < this.end) {
        //System.out.println("Debug: Creating an annotation " );


        // patch 
        //if (this.begin != relativeBegin) this.end =  this.end - (this.begin -relativeBegin);


        // a distinct annotation will be created for each list of values
        // and in any case, even if there is no list of values, an annotation will be created
        if (this.valuesList.isEmpty()) {
          ArrayList<String> currentValues = new ArrayList<String> ();
          this.create(aJCas, currentValues);
        }
        else {
          Iterator<List<String>> values = this.valuesList.iterator();
          while (values.hasNext()) {
            this.create(aJCas, values.next());
          }
        }
      }
      else {
        //System.out.println("Debug: Attempt for creating an annotation but this.begin >= this.end");


      }
    }


    /**
     * Create an annotation 
     * (at least a sequence of two nodes including the root node and the final node is a leaf)
     * @param aJCas
     */
    public void create(JCas aJCas, List<String> values) throws AnalysisEngineProcessException {




      // For each considered column in the descriptor file
      // Get the features to set 
      HashMap<String, Object> featuresHashMap = new HashMap<String, Object>();


      // Add begin and end features
      featuresHashMap.put("begin", Integer.toString(this.begin));
      featuresHashMap.put("end", Integer.toString(this.end));


      // Add a StringArray feature with all the values
      if (arrayFeatureName != null) {
        //List<String> valuesStringArray = new ArrayList<String>();


        // http://objectmix.com/apache/682381-fslists.html
        // In UIMA There's support for the fixed-length arrays, but not for the variable length lists.
        // in fact the UIMA API gives you all the functionality you need for coding what you need with these structures.


        String[] valuesClassicStringArray = new String[values.size()];
        Iterator<String> valuesIterator = values.iterator();
        int i = 0;
        while (valuesIterator.hasNext()) {
          String valueString = valuesIterator.next();
          //System.out.println("Debug: valuesIterator.next()>"+valueString+"<");
          valuesClassicStringArray[i] = valueString;
          i++;
        }
        //System.out.println("Debug: this.values.size()>"+this.values.size()+"< valuesClassicStringArray.length>"+valuesClassicStringArray.length+"<");
        StringArray valuesStringArray = new StringArray( aJCas, i);
        valuesStringArray.copyFromArray(valuesClassicStringArray, 0, 0, i) ;
        featuresHashMap.put(arrayFeatureName, valuesStringArray);
      }


      //
      Set<Integer> colIdFeatNameKeySet = colIdFeatNameHashMap.keySet();
      Iterator<Integer> colIdFeatNameKeySetIter = colIdFeatNameKeySet.iterator();
      //if (debug) if (this.values.size() != 0) {System.out.println("Debug: the values to set are >"+this.values+"<");  }


      //if (debug)  if (colIdFeatNameKeySetIter.hasNext()) {System.out.println("Debug: some colums have been assigned to some features by parameters");  }
      while (colIdFeatNameKeySetIter.hasNext()){
        int  colId = (Integer) colIdFeatNameKeySetIter.next() -1;
        if (colId < values.size() ) {
          //if (debug)  {System.out.println("Debug: the current temporary expression has a value >"+this.values.get(colId)+"< for that feature>"+colIdFeatNameHashMap.get(colId+1));  }
          featuresHashMap.put(colIdFeatNameHashMap.get(colId +1), values.get(colId));
        }
        else {
          //if (debug)  {System.out.println("Debug: the current temporary expression has not a value for that ");  }
          //if (debug) System.out.println("Warning: The column id "+colId+" of the "+PARAM_FEATURES_TO_COLUMNS+" paramater is out of range of the current resource line");  
          //String errorMsg =  "Warning: Wrong syntax to the "+PARAM_FEATURES_TO_COLUMNS+" paramater with the following line "+ ColIdFeatNameCouple.toString();
          //throw new AnnotatorInitializationException(errorMsg,new Object[]{errorMsg}); 
        }
      }


      //if (debug) System.out.println("Debug: annotationFromLineString"+annotationToProcessString);  
      //if (debug) System.out.println("Debug: featuresHashMap"+featuresHashMap);  


      AnnotationUtils.createAnnotation( aJCas,  this.type.getName(), featuresHashMap);
      //


    }


  }






}
Source Code of fr.univnantes.lina.uima.dictionaryAnnotator.DictionaryAnnotatorAE$Branch

Related Classes of fr.univnantes.lina.uima.dictionaryAnnotator.DictionaryAnnotatorAE$Branch