Package org.mitre.medfacts.i2b2.processors

Examples of org.mitre.medfacts.i2b2.processors.FileProcessor


      AssertionAnnotation assertForTI = trainingInstance.getAssertAnnotateForTI();
      //Count number of enclosing negation and speculation scopes
      for (ScopeAnnotation enclosingScope : assertForTI.getEnclosingScopes())
      {
        CueAnnotation cueForScope = enclosingScope.getCueForScope();
        CueSubType scopeType = cueForScope.getCueSubType();
        if (scopeType == CueSubType.NEGATION) enclosingNegationScopeCnt++;
        else if (scopeType == CueSubType.SPECULATION) enclosingSpeculationScopeCnt++;
        else logger.info(String.format("WARNING: CUE %s%n  FOR SCOPE %s%n ENCLOSING %s%n is neither a negation nor speculation cue%n", cueForScope, enclosingScope,assertForTI));
      }
      if (checkForEnabledFeature("statusRuleMixNMatchFeature"))
View Full Code Here


        if (annotationsAtCurrentPosition != null)
        for (Annotation a : annotationsAtCurrentPosition)
        {
          if (a instanceof CueWordAnnotation)
          {
            CueWordAnnotation cueWord = (CueWordAnnotation)a;
            String cueWordType = cueWord.getCueWordType().toString();
            if (checkForEnabledFeature("cueWord"))
            {
              int cueWordBegin = cueWord.getBegin().getTokenOffset();
              int cueWordEnd = cueWord.getEnd().getTokenOffset();
              if (cueWordBegin < conceptBeginTokenOffset) {
                  trainingInstance.addFeature("cueWord_" + cueWordType + "_left");
                  if ((conceptBeginTokenOffset - cueWordBegin) < 4) {
                      trainingInstance.addFeature("cueWord_" + cueWordType + "_left_3");
                  }
              } else if (cueWordBegin > conceptEndTokenOffset) {
                    trainingInstance.addFeature("cueWord_" + cueWordType + "_right");
                    if ((cueWordEnd - conceptEndTokenOffset) < 4) {
                        trainingInstance.addFeature("cueWord_" + cueWordType + "_right_3");
                    }
              } else {
                  trainingInstance.addFeature("cueWord_" + cueWordType + "_within");
              }
            }
            if (checkForEnabledFeature("cueWordValue"))
            {
              trainingInstance.addFeature("cueword_" + cueWord.getCueWordText());
            }
          }
          if (checkForEnabledFeature("concepts")) {
            if (a instanceof ConceptAnnotation) {
                ConceptAnnotation concept = (ConceptAnnotation) a;
View Full Code Here

      if (checkForEnabledFeature("cueWordOrderingsLeft")) {
          List<CueWordAnnotation> annots = new ArrayList<CueWordAnnotation>();
          for (Annotation a : allLineAnnotations) {
      if (a instanceof CueWordAnnotation) {
        CueWordAnnotation an = (CueWordAnnotation)a;
        if ((inCWSCueWordSet(an.getCueWordType())) && (an.getBegin().getTokenOffset() < conceptBeginTokenOffset)) {
                  annots.add(an);
              }
      }
          }
    if (annots.size() > 0) {
      Collections.sort(annots);
      StringBuilder str = new StringBuilder("CWS_left");
      for (CueWordAnnotation a : annots) {
        str.append("_");
        str.append(a.getCueWordType());
      }
      trainingInstance.addFeature(str.toString());
    }
      }

      if (checkForEnabledFeature("cueWordOrderingsRight")) {
          List<CueWordAnnotation> annots = new ArrayList<CueWordAnnotation>();
          for (Annotation a : allLineAnnotations) {
      if (a instanceof CueWordAnnotation) {
        CueWordAnnotation an = (CueWordAnnotation)a;
        if ((inCWSCueWordSet(an.getCueWordType())) && (an.getBegin().getTokenOffset() > conceptEndTokenOffset)) {
    annots.add(an);
              }
      }
          }
          Collections.sort(annots);
    if (annots.size() > 0) {
      StringBuilder str = new StringBuilder("CWS_right");
      for (CueWordAnnotation a : annots) {
              str.append("_");
              str.append(a.getCueWordType());
      }
      trainingInstance.addFeature(str.toString());
    }
      }

      //logger.info(String.format("lineNumber: %d%n", lineNumber);
      String tokensOnCurrentLine[] = textLookup[lineNumber-1];
      for (int currentTokenOffset=0; currentTokenOffset < tokensOnCurrentLine.length; currentTokenOffset++)
      {
        String currentToken = tokensOnCurrentLine[currentTokenOffset];
        List<Annotation> annotationsAtCurrentPosition = indexer.findAnnotationsForPosition(lineNumber, currentTokenOffset);

        int scopeCount = 0;
        if (annotationsAtCurrentPosition != null)
        for (Annotation a : annotationsAtCurrentPosition)
        {
            if (checkForEnabledFeature("concepts")) {
            if (a instanceof ConceptAnnotation) {
                ConceptAnnotation concept = (ConceptAnnotation) a;

                String conceptType = concept.getConceptType().toString();
                int thisConceptBegin = concept.getBegin().getTokenOffset();
                int thisConceptEnd = concept.getEnd().getTokenOffset();
                if (concept.getBegin().getTokenOffset() < conceptBeginTokenOffset) {
                    trainingInstance.addFeature("concept_" + conceptType + "_left");
                    if ((conceptBeginTokenOffset - thisConceptEnd) < 4) {
                        trainingInstance.addFeature("concept_" + conceptType + "_left_3");
                    }
                } else {
                    if ((thisConceptBegin - conceptEndTokenOffset) < 4) {
                        trainingInstance.addFeature("concept_" + conceptType + "_right_3");
                    }
                    trainingInstance.addFeature("concept_" + conceptType + "_right");
                }
            }
            }

          if (a instanceof ScopeAnnotation)
          {
            ScopeAnnotation scope = (ScopeAnnotation)a;
            scopeCount++;
            if (checkForEnabledFeature("scope"))
            {
              trainingInstance.addFeature("scope");
            }
            if (checkForEnabledFeature("inScope"))
            {
              trainingInstance.addFeature("in_scope_" + currentToken);
            }
            if (checkForEnabledFeature("inScopeId"))
            {
              trainingInstance.addFeature("in_scope_id_" + scope.getScopeId() + "_" + currentToken);
            }
          }

          if (a instanceof CueAnnotation)
          {
            CueAnnotation cue = (CueAnnotation)a;
            if (checkForEnabledFeature("cue"))
            {
              String cueType = cue.getCueSubType().toString();
              int cueBegin = cue.getBegin().getTokenOffset();
              if (cueBegin < conceptBeginTokenOffset) {
                  trainingInstance.addFeature("cue_" + cueType + "_left");
                  if ((conceptBeginTokenOffset - cueBegin) < 4) {
                      trainingInstance.addFeature("cue_" + cueType + "_left_3");
                  }
              } else {
                  int cueEnd = cue.getEnd().getTokenOffset();
                  trainingInstance.addFeature("cue_" + cueType + "_right");
                  if ((cueEnd - conceptEndTokenOffset) < 4) {
                      trainingInstance.addFeature("cue_" + cueType + "_right_3");
                  }
              }
            }
            if (checkForEnabledFeature("inCue"))
            {
              trainingInstance.addFeature("in_cue_" + currentToken);
            }
            if (checkForEnabledFeature("inCueForScopeId"))
            {
              trainingInstance.addFeature("in_cue_for_scope_id_" + cue.getScopeIdReference() + "_" + currentToken);
            }
          }

          if (a instanceof CueWordAnnotation)
          {
            CueWordAnnotation cueWord = (CueWordAnnotation)a;
            String cueWordType = cueWord.getCueWordType().toString();
            String cueWordText = cueWord.getCueWordText();
            String escapedCueWordText = escapeFeatureName(cueWordText);
            String escapedCueWordClass = null;
            String cueWordClass = cueWord.getCueWordClass();
            boolean cueWordClassIsNotEmpty = (cueWordClass != null) && (!cueWordClass.isEmpty());
            if (checkForEnabledFeature("cueWordClassValue") && cueWordClassIsNotEmpty)
            {
              escapedCueWordClass = escapeFeatureName(cueWordClass);
            }

            if (checkForEnabledFeature("cueWordTextPositional"))
            {
              int cueWordBegin = cueWord.getBegin().getTokenOffset();
              int cueWordEnd = cueWord.getEnd().getTokenOffset();
              if (cueWordBegin < conceptBeginTokenOffset) {
                  trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_left");
                  if ((conceptBeginTokenOffset - cueWordBegin) < 4) {
                      trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_left_3");
                  }
              } else if (cueWordBegin > conceptEndTokenOffset) {
                    trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_right");
                    if ((cueWordEnd - conceptEndTokenOffset) < 4) {
                        trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_right_3");
                    }
              } else {
                  trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_within");
              }
            }
            if (checkForEnabledFeature("cueWordTypePositional"))
            {
              int cueWordBegin = cueWord.getBegin().getTokenOffset();
              int cueWordEnd = cueWord.getEnd().getTokenOffset();
              if (cueWordBegin < conceptBeginTokenOffset) {
                  trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_left");
                  if ((conceptBeginTokenOffset - cueWordBegin) < 4) {
                      trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_left_3");
                  }
              } else if (cueWordBegin > conceptEndTokenOffset) {
                    trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_right");
                    if ((cueWordEnd - conceptEndTokenOffset) < 4) {
                        trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_right_3");
                    }
              } else {
                  trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_within");
              }
            }
            if (checkForEnabledFeature("cueWordClassPositional") && cueWordClassIsNotEmpty)
            {
              int cueWordBegin = cueWord.getBegin().getTokenOffset();
              int cueWordEnd = cueWord.getEnd().getTokenOffset();
              if (cueWordBegin < conceptBeginTokenOffset) {
                  trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_left");
                  if ((conceptBeginTokenOffset - cueWordBegin) < 4) {
                      trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_left_3");
                  }
              } else if (cueWordBegin > conceptEndTokenOffset) {
                    trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_right");
                    if ((cueWordEnd - conceptEndTokenOffset) < 4) {
                        trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_right_3");
                    }
              } else {
                  trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_within");
              }
            }
            if (checkForEnabledFeature("cueWordTextValue"))
            {
              trainingInstance.addFeature("cueWordTextValue_" + escapedCueWordText);
            }
            if (checkForEnabledFeature("cueWordTypeValue") && cueWord.getCueWordType() != null)
            {
              trainingInstance.addFeature("cueWordTypeValue_" + cueWord.getCueWordType().toString());
            }
            if (checkForEnabledFeature("cueWordClassValue") && cueWord.getCueWordClass() != null && !cueWord.getCueWordClass().isEmpty())
            {
              trainingInstance.addFeature("cueWordClassValue_" + escapedCueWordClass);
            }
          }
View Full Code Here

        {
          boolean matched = compareForCueMatch(cueItem, currentLine, tokenOffset);
          if (matched)
          {
            logger.finest(String.format("MATCHES!!!%n  BEGIN MATCH%n  cue: %s%n  inputLine: %s%n  position: %d%n  END MATCH%n%n", cueItem.toString(), convertLineToString(currentLine), tokenOffset));
            CueWordAnnotation a = new CueWordAnnotation();
            int beginOffset = tokenOffset;
            int endOffset = tokenOffset + size - 1;
            Location beginLocation = new Location();
            beginLocation.setLine(lineNumberOffset + 1);
            beginLocation.setTokenOffset(beginOffset);
            Location endLocation = new Location();
            endLocation.setLine(lineNumberOffset + 1);
            endLocation.setTokenOffset(endOffset);
            a.setBegin(beginLocation);
            a.setEnd(endLocation);
            a.setCueWordType(getCueWordType());
            a.setCueWordClass(cueItem.getCueWordClass());
            a.setCueWordText(constructWordSequenceText(currentLine, beginOffset, endOffset));
            annotationList.add(a);
          }
        }

        // generate alternative sentences for the current line with any
View Full Code Here

    try
    {
      for (Object[] current : temp)
      {
        CueWordType cueWordType = (CueWordType)current[0];
        String cueFilename = (String)current[1];
        MedFactsRunner.processCueList(cueFilename, cueWordType, textLookup, allAnnotationList, annotationsByType);
      }
    }
    catch (URISyntaxException e)
View Full Code Here

    {
      List<Annotation> annotationsAtCurrentPosition = indexer.findAnnotationsForPosition(lineNumber, i);
      if (annotationsAtCurrentPosition != null) {
        for (Annotation a : annotationsAtCurrentPosition) {
    if (a instanceof PartOfSpeechAnnotation) {
      PartOfSpeechAnnotation posAn = (PartOfSpeechAnnotation)a;
      sb.append(posAn.getPartOfSpeech());
          }
  }
      }
    }
    return StringHandling.escapeStringForFeatureName(sb.toString());
View Full Code Here

    {
      List<Annotation> annotationsAtCurrentPosition = indexer.findAnnotationsForPosition(lineNumber, i);
      if (annotationsAtCurrentPosition != null) {
        for (Annotation a : annotationsAtCurrentPosition) {
    if (a instanceof PartOfSpeechAnnotation) {
      PartOfSpeechAnnotation posAn = (PartOfSpeechAnnotation)a;
      sb.append(posAn.getPartOfSpeech());
          }
  }
      }
    }
    return StringHandling.escapeStringForFeatureName(sb.toString());
View Full Code Here

      ScopeParser scopeParser = new ScopeParser(scopeModelFilePath,
          cueModelFilePath);
      assertionDecoderConfiguration.setScopeParser(scopeParser);

      logger.info(String.format("pos model file: %s", posModelFilePath));
      PartOfSpeechTagger posTagger = new PartOfSpeechTagger(posModelFilePath);
      assertionDecoderConfiguration.setPosTagger(posTagger);

      Set<String> enabledFeatureIdSet = null;
      enabledFeatureIdSet = BatchRunner
          .loadEnabledFeaturesFromFile(enabledFeaturesFile);
View Full Code Here

      ScopeParser scopeParser = new ScopeParser(scopeModelFilePath,
          cueModelFilePath);
      assertionDecoderConfiguration.setScopeParser(scopeParser);

      logger.info(String.format("pos model file: %s", posModelFilePath));
      PartOfSpeechTagger posTagger = new PartOfSpeechTagger(posModelFilePath);
      assertionDecoderConfiguration.setPosTagger(posTagger);

      Set<String> enabledFeatureIdSet = null;
      enabledFeatureIdSet = BatchRunner
          .loadEnabledFeaturesFromFile(enabledFeaturesFile);
View Full Code Here

    }

    logger.info(String.format("cue model file: %s%n", cueModelFile.getAbsolutePath()));
    //initialize scope/cue parser
    ScopeParser scopeParser = new ScopeParser(scopeModelFile.getAbsolutePath(), cueModelFile.getAbsolutePath());
    PartOfSpeechTagger posTagger = new PartOfSpeechTagger(posModelFile.getAbsolutePath());

//    String baseDirectory = args[0];
//    logger.info(String.format("base directory: %s%n", baseDirectory);

    BatchRunner batchRunner = new BatchRunner();
View Full Code Here

TOP

Related Classes of org.mitre.medfacts.i2b2.processors.FileProcessor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.