Package org.mitre.medfacts.i2b2.annotation

Examples of org.mitre.medfacts.i2b2.annotation.PartOfSpeechTagger


    //ArrayList<ArrayList<String>> textLookup = new ArrayList<ArrayList<String>>();
    ArrayList<String[]> textLookupTemp = new ArrayList<String[]>();
    int lineNumber = 1;
    while ((currentLine = br.readLine()) != null)
    {
      Annotation c = processAnnotationLine(currentLine, pattern);
      c.setAnnotationFileLineNumber(lineNumber);
      annotationList.add(c);
      lineNumber++;
    }

    br.close();
View Full Code Here


      String inputText = "c=\"\" 10:4 10:7||t=\"xcope\"||id=\"1\"";

      ScopeFileProcessor processor = new ScopeFileProcessor();

      Pattern pattern = Pattern.compile(processor.getPatternString());
      Annotation a = processor.processAnnotationLine(inputText, pattern);
    }
View Full Code Here

      String inputText = "c=\"\" 10:4 10:4||t=\"cue\"||sub_t=\"negation\" ref=\"1\"";

      ScopeFileProcessor processor = new ScopeFileProcessor();

      Pattern pattern = Pattern.compile(processor.getPatternString());
      Annotation a = processor.processAnnotationLine(inputText, pattern);
    }
View Full Code Here

    return tempZoneAnnotationList;
  }

  public AnnotationType getAnnotationTypeFromFilename(String currentFilename)
  {
    AnnotationType currentAnnotationType = null;
    if (currentFilename.endsWith(Constants.FILE_EXTENSION_CONCEPT_FILE))
    {
      currentAnnotationType = AnnotationType.CONCEPT;
    } else if (currentFilename.endsWith(Constants.FILE_EXTENSION_ASSERTION_FILE))
    {
View Full Code Here

//    FileProcessor scopeFileProcessor = new ScopeFileProcessor();

    for (String currentFilename : getAnnotationFilenameList())
    {
      logger.info(String.format("processing annotation file \"%s\"...%n", currentFilename));
      AnnotationType currentAnnotationType = getAnnotationTypeFromFilename(currentFilename);
      //logger.info(String.format(" - annotationType of file \"%s\" is %s%n", currentFilename, currentAnnotationType);

      List<Annotation> currentAnnotationList = null;
      switch (currentAnnotationType)
      {
View Full Code Here

    String endCharacter = matcher.group(5);
    String conceptTypeText = matcher.group(6);
    String assertionValue = matcher.group(7);
//    System.out.format("    concept text: %s%n", conceptText);
//    System.out.format("    concept type text: %s%n", conceptTypeText);
    AssertionAnnotation a = new AssertionAnnotation();
    a.setConceptText(conceptText);
    a.setBegin(new Location(beginLine, beginCharacter));
    a.setEnd(new Location(endLine, endCharacter));
    a.setConceptType(ConceptType.valueOf(conceptTypeText.toUpperCase()));
    a.setAssertionValue(AssertionValue.valueOf(assertionValue.toUpperCase()));
//    System.out.format("    ASSERTION ANNOTATION OBJECT: %s%n", a);
//    System.out.format("    ASSERTION ANNOTATION OBJECT i2b2: %s%n", a.toI2B2String());
    return a;
  }
View Full Code Here

      Location annotationEnd = current.getEnd();
      // Assume that 'annotationEnd.getLine()' will return the same line number as 'beginLine'
      int endTokenOffset = annotationEnd.getTokenOffset();
      List<Annotation> annotationsForFirstToken = indexer.findAnnotationsForPosition(beginLine, beginTokenOffset);
      List<ScopeAnnotation> enclosingScopesFound = new ArrayList<ScopeAnnotation>();
      AssertionAnnotation assertion = (AssertionAnnotation)current;

//      logger.info(String.format("   ASRT: %s => %s annotations for 1st token%n", assertion.toString(), annotationsForFirstToken.size()); //for testing
      for (Annotation annotationForFirstToken : annotationsForFirstToken)
      {
        if ((annotationForFirstToken instanceof ScopeAnnotation) &&
                (endTokenOffset <= annotationForFirstToken.getEnd().getTokenOffset()))
        {
          //This annotation containing the first token of the current assertion annotation
          // is a ScopeAnnotation that contains all the tokens of the current assertion annotation.
          //Add it to the list of enclosing scopes.
          ScopeAnnotation scope = (ScopeAnnotation)annotationForFirstToken;
          enclosingScopesFound.add(scope);
        }
      }
      assertion.setEnclosingScopes(enclosingScopesFound);
    }
    //For testing -Alex Yeh
//    for (Annotation current : assertionFileAnnotationList)
//    {
//      AssertionAnnotation assertion = (AssertionAnnotation)current;
View Full Code Here

    for (Annotation a : conceptList)
    {
      ConceptAnnotation concept = (ConceptAnnotation)a;
      if (concept.getConceptType() == ConceptType.PROBLEM)
      {
        AssertionAnnotation assertion = new AssertionAnnotation();
        assertion.setAssertionValue(null);
        assertion.setBegin(concept.getBegin());
        assertion.setEnd(concept.getEnd());
        assertion.setConceptText(concept.getConceptText());
        assertion.setConceptType(concept.getConceptType());
        assertionList.add(assertion);
      }
    }
    return assertionList;
  }
View Full Code Here

    {
      Set<String> featureSet = currentEvalInstance.getFeatureSet();
      List<String> featureList = new ArrayList<String>(featureSet);
      String actualAssertionValueString = decoder.classifyInstance(featureList);

      AssertionAnnotation originalAssertion = currentEvalInstance.getAssertAnnotateForTI();
      AssertionAnnotation resultAssertion = new AssertionAnnotation();

      AssertionValue actualAssertionValue = null;
      if (actualAssertionValueString != null)
      {
        actualAssertionValue = AssertionValue.valueOf(actualAssertionValueString.toUpperCase());
      }
      resultAssertion.setAssertionValue(actualAssertionValue);
      resultAssertion.setBegin(originalAssertion.getBegin());
      resultAssertion.setEnd(originalAssertion.getEnd());
      resultAssertion.setConceptText(originalAssertion.getConceptText());
      resultAssertion.setConceptType(originalAssertion.getConceptType());

      List<AssertionAnnotation> listOfResultAssertions = mapOfResultBySourceFile.get(currentEvalInstance.getFilename());

      if (listOfResultAssertions == null)
      {
View Full Code Here

    //int lineNumber = 1;
    for (Annotation currentAnnotation : getAnnotationsByType().get(AnnotationType.ASSERTION))
    {
      final int lineNumber = currentAnnotation.getBegin().getLine();
      AssertionAnnotation currentAssertionAnnotation = (AssertionAnnotation)currentAnnotation;
      if (!currentAssertionAnnotation.getConceptType().equals(ConceptType.PROBLEM))
      {
        // skip this one
        continue;
      }

      TrainingInstance trainingInstance = new TrainingInstance();
      List<Annotation> allLineAnnotations = indexer.getAnnotationByLine().get((long) lineNumber);
      trainingInstance.setFilename(getTextFilename());
      trainingInstance.setLineNumber(lineNumber);
      trainingInstance.setAssertAnnotateForTI(currentAssertionAnnotation); //link training instance to corresponding assertion
      trainingInstance.setAnnotationsForLine(allLineAnnotations); //list of annotations for the line this training instance is on
      trainingInstance.setTokensForLine(textLookup[lineNumber-1]); //token string for the line this training instance is on

      AssertionValue assertionValue = currentAssertionAnnotation.getAssertionValue();
      String assertionValueString = (assertionValue == null) ? "" : assertionValue.toString().toLowerCase();
      trainingInstance.setExpectedValue(assertionValueString);

      String conceptText = currentAssertionAnnotation.getConceptText();
      if (checkForEnabledFeature("conceptTextFeature"))
      {
        String conceptTextFeature = MedFactsRunner.constructConceptPhraseFeature(conceptText);
        trainingInstance.addFeature(conceptTextFeature);
      }

      if (checkForEnabledFeature("conceptPseudoHeadFeature"))
      {
          int ln = currentAssertionAnnotation.getEnd().getLine();
          int pos = currentAssertionAnnotation.getEnd().getTokenOffset();
//          logger.finest(String.format("conceptPseudoHeadFeature:: ln == %d; pos == %d", ln, pos));
//          logger.finest(String.format("textLookup's size: %d", textLookup.length));
//          logger.finest(String.format("textLookup[ln-1]'s size: %d", textLookup[ln-1].length));
//          logger.finest(ZonerCli.printOutLineOfTokens(textLookup[ln-1]));

          String conceptHead = textLookup[ln-1][pos];
          trainingInstance.addFeature(constructConceptHeadFeature(conceptHead));
        //Matcher conceptHeadMatcher = conceptHeadPattern.matcher(conceptText);
        //if (conceptHeadMatcher.find())
        //{
        //  String conceptHeadText = conceptHeadMatcher.group(1);
        //  String conceptHeadFeature = constructConceptHeadFeature(conceptHeadText);
        //  trainingInstance.addFeature(conceptHeadFeature);
        //}
      }


      Location conceptBeginLocation = currentAssertionAnnotation.getBegin();
      int conceptBeginLine = conceptBeginLocation.getLine();
      int conceptBeginTokenOffset = conceptBeginLocation.getTokenOffset();
      Location conceptEndLocation = currentAssertionAnnotation.getEnd();
      int conceptEndTokenOffset = conceptEndLocation.getTokenOffset();
      String currentLine[] = textLookup[conceptBeginLine-1];

      if (checkForEnabledFeature("conceptUnigrams")) {
          for (int k = conceptBeginTokenOffset; k <= conceptEndTokenOffset; k++) {
              trainingInstance.addFeature("concept_unigram_" + StringHandling.escapeStringForFeatureName(currentLine[k]));
          }
      }

      if (checkForEnabledFeature("wordLeftFeature"))
      {
        List<String> wordLeftFeatureList = FeatureUtility.constructWordLeftFeatureList(conceptBeginTokenOffset, conceptEndTokenOffset, currentLine);
        for (String currentFeature : wordLeftFeatureList)
        {
          trainingInstance.addFeature(currentFeature);
        }
      }

      if (checkForEnabledFeature("wordRightFeature"))
      {
        List<String> wordRightFeatureList = FeatureUtility.constructWordRightFeatureList(conceptBeginTokenOffset, conceptEndTokenOffset, currentLine);
        for (String currentFeature : wordRightFeatureList)
        {
          trainingInstance.addFeature(currentFeature);
        }
      }

      if (checkForEnabledFeature("posRightFeature")){
  trainingInstance.addFeature(FeatureUtility.constructPosRightFeatureList(conceptBeginTokenOffset, conceptEndTokenOffset, (conceptBeginLine-1), currentLine, indexer));
      }

      if (checkForEnabledFeature("posLeftFeature")){
  trainingInstance.addFeature(FeatureUtility.constructPosLeftFeatureList(conceptBeginTokenOffset, conceptEndTokenOffset, (conceptBeginLine-1), currentLine, indexer));
      }

      if (checkForEnabledFeature("cueWord_DEFINITE_left_2")) {
  for (int i = 1; i < 3; i++) {
    int relPos = conceptBeginTokenOffset - i;
    if (relPos >= 0) {
      if (currentLine[relPos].equals("her") || currentLine[relPos].equals("his") || currentLine[relPos].equals("patient's") ||
    currentLine[relPos].equals("your") || currentLine[relPos].equals("this")) {
        trainingInstance.addFeature("cueWord_DEFINITE_left_2");
      }
    }
  }
      }

      if (checkForEnabledFeature("cueWordOrderingsLeft")) {
          List<CueWordAnnotation> annots = new ArrayList<CueWordAnnotation>();
          for (Annotation a : allLineAnnotations) {
      if (a instanceof CueWordAnnotation) {
        CueWordAnnotation an = (CueWordAnnotation)a;
        if ((inCWSCueWordSet(an.getCueWordType())) && (an.getBegin().getTokenOffset() < conceptBeginTokenOffset)) {
                  annots.add(an);
              }
      }
          }
    if (annots.size() > 0) {
      Collections.sort(annots);
      StringBuilder str = new StringBuilder("CWS_left");
      for (CueWordAnnotation a : annots) {
        str.append("_");
        str.append(a.getCueWordType());
      }
      trainingInstance.addFeature(str.toString());
    }
      }

      if (checkForEnabledFeature("cueWordOrderingsRight")) {
          List<CueWordAnnotation> annots = new ArrayList<CueWordAnnotation>();
          for (Annotation a : allLineAnnotations) {
      if (a instanceof CueWordAnnotation) {
        CueWordAnnotation an = (CueWordAnnotation)a;
        if ((inCWSCueWordSet(an.getCueWordType())) && (an.getBegin().getTokenOffset() > conceptEndTokenOffset)) {
    annots.add(an);
              }
      }
          }
          Collections.sort(annots);
    if (annots.size() > 0) {
      StringBuilder str = new StringBuilder("CWS_right");
      for (CueWordAnnotation a : annots) {
              str.append("_");
              str.append(a.getCueWordType());
      }
      trainingInstance.addFeature(str.toString());
    }
      }

      //logger.info(String.format("lineNumber: %d%n", lineNumber);
      String tokensOnCurrentLine[] = textLookup[lineNumber-1];
      for (int currentTokenOffset=0; currentTokenOffset < tokensOnCurrentLine.length; currentTokenOffset++)
      {
        String currentToken = tokensOnCurrentLine[currentTokenOffset];
        List<Annotation> annotationsAtCurrentPosition = indexer.findAnnotationsForPosition(lineNumber, currentTokenOffset);

        int scopeCount = 0;
        if (annotationsAtCurrentPosition != null)
        for (Annotation a : annotationsAtCurrentPosition)
        {
            if (checkForEnabledFeature("concepts")) {
            if (a instanceof ConceptAnnotation) {
                ConceptAnnotation concept = (ConceptAnnotation) a;

                String conceptType = concept.getConceptType().toString();
                int thisConceptBegin = concept.getBegin().getTokenOffset();
                int thisConceptEnd = concept.getEnd().getTokenOffset();
                if (concept.getBegin().getTokenOffset() < conceptBeginTokenOffset) {
                    trainingInstance.addFeature("concept_" + conceptType + "_left");
                    if ((conceptBeginTokenOffset - thisConceptEnd) < 4) {
                        trainingInstance.addFeature("concept_" + conceptType + "_left_3");
                    }
                } else {
                    if ((thisConceptBegin - conceptEndTokenOffset) < 4) {
                        trainingInstance.addFeature("concept_" + conceptType + "_right_3");
                    }
                    trainingInstance.addFeature("concept_" + conceptType + "_right");
                }
            }
            }

          if (a instanceof ScopeAnnotation)
          {
            ScopeAnnotation scope = (ScopeAnnotation)a;
            scopeCount++;
            if (checkForEnabledFeature("scope"))
            {
              trainingInstance.addFeature("scope");
            }
            if (checkForEnabledFeature("inScope"))
            {
              trainingInstance.addFeature("in_scope_" + currentToken);
            }
            if (checkForEnabledFeature("inScopeId"))
            {
              trainingInstance.addFeature("in_scope_id_" + scope.getScopeId() + "_" + currentToken);
            }
          }

          if (a instanceof CueAnnotation)
          {
            CueAnnotation cue = (CueAnnotation)a;
            if (checkForEnabledFeature("cue"))
            {
              String cueType = cue.getCueSubType().toString();
              int cueBegin = cue.getBegin().getTokenOffset();
              if (cueBegin < conceptBeginTokenOffset) {
                  trainingInstance.addFeature("cue_" + cueType + "_left");
                  if ((conceptBeginTokenOffset - cueBegin) < 4) {
                      trainingInstance.addFeature("cue_" + cueType + "_left_3");
                  }
              } else {
                  int cueEnd = cue.getEnd().getTokenOffset();
                  trainingInstance.addFeature("cue_" + cueType + "_right");
                  if ((cueEnd - conceptEndTokenOffset) < 4) {
                      trainingInstance.addFeature("cue_" + cueType + "_right_3");
                  }
              }
            }
            if (checkForEnabledFeature("inCue"))
            {
              trainingInstance.addFeature("in_cue_" + currentToken);
            }
            if (checkForEnabledFeature("inCueForScopeId"))
            {
              trainingInstance.addFeature("in_cue_for_scope_id_" + cue.getScopeIdReference() + "_" + currentToken);
            }
          }

          if (a instanceof CueWordAnnotation)
          {
            CueWordAnnotation cueWord = (CueWordAnnotation)a;
            String cueWordType = cueWord.getCueWordType().toString();
            String cueWordText = cueWord.getCueWordText();
            String escapedCueWordText = escapeFeatureName(cueWordText);
            String escapedCueWordClass = null;
            String cueWordClass = cueWord.getCueWordClass();
            boolean cueWordClassIsNotEmpty = (cueWordClass != null) && (!cueWordClass.isEmpty());
            if (checkForEnabledFeature("cueWordClassValue") && cueWordClassIsNotEmpty)
            {
              escapedCueWordClass = escapeFeatureName(cueWordClass);
            }

            if (checkForEnabledFeature("cueWordTextPositional"))
            {
              int cueWordBegin = cueWord.getBegin().getTokenOffset();
              int cueWordEnd = cueWord.getEnd().getTokenOffset();
              if (cueWordBegin < conceptBeginTokenOffset) {
                  trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_left");
                  if ((conceptBeginTokenOffset - cueWordBegin) < 4) {
                      trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_left_3");
                  }
              } else if (cueWordBegin > conceptEndTokenOffset) {
                    trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_right");
                    if ((cueWordEnd - conceptEndTokenOffset) < 4) {
                        trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_right_3");
                    }
              } else {
                  trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_within");
              }
            }
            if (checkForEnabledFeature("cueWordTypePositional"))
            {
              int cueWordBegin = cueWord.getBegin().getTokenOffset();
              int cueWordEnd = cueWord.getEnd().getTokenOffset();
              if (cueWordBegin < conceptBeginTokenOffset) {
                  trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_left");
                  if ((conceptBeginTokenOffset - cueWordBegin) < 4) {
                      trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_left_3");
                  }
              } else if (cueWordBegin > conceptEndTokenOffset) {
                    trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_right");
                    if ((cueWordEnd - conceptEndTokenOffset) < 4) {
                        trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_right_3");
                    }
              } else {
                  trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_within");
              }
            }
            if (checkForEnabledFeature("cueWordClassPositional") && cueWordClassIsNotEmpty)
            {
              int cueWordBegin = cueWord.getBegin().getTokenOffset();
              int cueWordEnd = cueWord.getEnd().getTokenOffset();
              if (cueWordBegin < conceptBeginTokenOffset) {
                  trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_left");
                  if ((conceptBeginTokenOffset - cueWordBegin) < 4) {
                      trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_left_3");
                  }
              } else if (cueWordBegin > conceptEndTokenOffset) {
                    trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_right");
                    if ((cueWordEnd - conceptEndTokenOffset) < 4) {
                        trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_right_3");
                    }
              } else {
                  trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_within");
              }
            }
            if (checkForEnabledFeature("cueWordTextValue"))
            {
              trainingInstance.addFeature("cueWordTextValue_" + escapedCueWordText);
            }
            if (checkForEnabledFeature("cueWordTypeValue") && cueWord.getCueWordType() != null)
            {
              trainingInstance.addFeature("cueWordTypeValue_" + cueWord.getCueWordType().toString());
            }
            if (checkForEnabledFeature("cueWordClassValue") && cueWord.getCueWordClass() != null && !cueWord.getCueWordClass().isEmpty())
            {
              trainingInstance.addFeature("cueWordClassValue_" + escapedCueWordClass);
            }
          }

          if (checkForEnabledFeature("zone"))
          {
            if (a instanceof ZoneAnnotation)
            {
              ZoneAnnotation zone = (ZoneAnnotation)a;
              final String zoneFeatureName = "zone_" + escapeFeatureName(zone.getZoneName());
              //logger.info("### zone feature: " + zoneFeatureName);
              trainingInstance.addFeature(zoneFeatureName);
            }
          }
        }
        if (scopeCount > 0)
        {
          if (checkForEnabledFeature("scopeCountNumber"))
          {
            trainingInstance.addFeature("scope_count_" + scopeCount);
          }
          if (checkForEnabledFeature("scopeCountEvenOrOdd"))
          {
            boolean scopeCountIsEven = (scopeCount % 2) == 0;
            trainingInstance.addFeature("scope_count_" + (scopeCountIsEven ? "even" : "odd"));
          }
        }
      }

      //Features based on negation and speculation scopes enclosing the text of the entire training instance -Alex Yeh
      int enclosingNegationScopeCnt = 0;
      int enclosingSpeculationScopeCnt = 0;
      AssertionAnnotation assertForTI = trainingInstance.getAssertAnnotateForTI();
      //Count number of enclosing negation and speculation scopes
      for (ScopeAnnotation enclosingScope : assertForTI.getEnclosingScopes())
      {
        CueAnnotation cueForScope = enclosingScope.getCueForScope();
        CueSubType scopeType = cueForScope.getCueSubType();
        if (scopeType == CueSubType.NEGATION) enclosingNegationScopeCnt++;
        else if (scopeType == CueSubType.SPECULATION) enclosingSpeculationScopeCnt++;
View Full Code Here

TOP

Related Classes of org.mitre.medfacts.i2b2.annotation.PartOfSpeechTagger

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.