Package org.mitre.medfacts.i2b2.processors

Examples of org.mitre.medfacts.i2b2.processors.ScopeFileProcessor


      File outFile = new java.io.File(outFilePath);
      String assertionFilePath = assertionDir + "/" + file.getName().substring(0, fname.length() - 3) + "ast";
      System.out.println("Assertion file path: " + assertionFilePath);
      File assertionFile = new java.io.File(assertionFilePath);
      LineTokenToCharacterOffsetConverter converter = new LineTokenToCharacterOffsetConverter(contents);
      AssertionFileProcessor assertionProcessor = new AssertionFileProcessor();
      List<Annotation> assertions = assertionProcessor.processAnnotationFile(assertionFile);

      JCas jcas = cas.getJCas();
      SingleDocumentProcessor p = new SingleDocumentProcessor();
      p.setContents(contents);
      p.preprocess();
View Full Code Here


      File outFile = new java.io.File(outFilePath);
      String assertionFilePath = assertionDir + "/" + file.getName().substring(0, fname.length() - 3) + "ast";
      System.out.println("Assertion file path: " + assertionFilePath);
      File assertionFile = new java.io.File(assertionFilePath);
      LineTokenToCharacterOffsetConverter converter = new LineTokenToCharacterOffsetConverter(contents);
      AssertionFileProcessor assertionProcessor = new AssertionFileProcessor();
      List<Annotation> assertions = assertionProcessor.processAnnotationFile(assertionFile);

      JCas jcas = cas.getJCas();
      SingleDocumentProcessor p = new SingleDocumentProcessor();
      p.setContents(contents);
      p.preprocess();
View Full Code Here

    @Test
    public void testSingleScopeLine()
    {
      String inputText = "c=\"\" 10:4 10:7||t=\"xcope\"||id=\"1\"";

      ScopeFileProcessor processor = new ScopeFileProcessor();

      Pattern pattern = Pattern.compile(processor.getPatternString());
      Annotation a = processor.processAnnotationLine(inputText, pattern);
    }
View Full Code Here

    @Test
    public void testSingleCueLine()
    {
      String inputText = "c=\"\" 10:4 10:4||t=\"cue\"||sub_t=\"negation\" ref=\"1\"";

      ScopeFileProcessor processor = new ScopeFileProcessor();

      Pattern pattern = Pattern.compile(processor.getPatternString());
      Annotation a = processor.processAnnotationLine(inputText, pattern);
    }
View Full Code Here

       URI negationCueFileUri = negationCueFileUrl.toURI();
       System.out.format("negation cue list uri: %s%n", negationCueFileUri);
       File negationCueFile = new File(negationCueFileUri);
       System.out.format("negation cue list url: %s%n", negationCueFile);

       CueListScanner scanner = new CueListScanner(negationCueFile, CueWordType.NEGATION);
       scanner.setTextLookup(textLookup);
       scanner.execute();
       List<Annotation> annotationList = scanner.getAnnotationList();
       System.out.println("ANNOTATIONS:");
       if (annotationList == null)
       {
         System.out.println("no annotations returned.");
       } else
View Full Code Here

       URI negationCueFileUri = negationCueFileUrl.toURI();
       System.out.format("negation cue list uri: %s%n", negationCueFileUri);
       File negationCueFile = new File(negationCueFileUri);
       System.out.format("negation cue list url: %s%n", negationCueFile);

       CueListScanner scanner = new CueListScanner(negationCueFile, CueWordType.SPECULATION);
       scanner.setTextLookup(textLookup);
       scanner.execute();
       List<Annotation> annotationList = scanner.getAnnotationList();
       System.out.println("ANNOTATIONS:");
       if (annotationList == null)
       {
         System.out.println("no annotations returned.");
       } else
View Full Code Here

      logger.fine(String.format("inside processCueList working with cue file: %s", cueListFilename));
      InputStream cueFileInputStream = classLoader.getResourceAsStream(cueListFilename);
      InputStreamReader inputStreamReader = new InputStreamReader(cueFileInputStream);
      BufferedReader bufferedReader = new BufferedReader(inputStreamReader);

      CueListScanner scanner = new CueListScanner(bufferedReader, cueWordType);
      scanner.setTextLookup(textLookup);
      scanner.execute();
      annotationList = scanner.getAnnotationList();

      bufferedReader.close();
      inputStreamReader.close();
      cueFileInputStream.close();
    } catch (IOException ex)
View Full Code Here

      } else
      {
        logger.fine("enabled feature id set is set; using selected feature set");
      }

      TrainingInstance trainingInstance = new TrainingInstance();

      if (checkForEnabledFeature("conceptUnigrams")) {
          for (int k = conceptBeginTokenOffset; k <= conceptEndTokenOffset && k < lineLength; k++) {
              trainingInstance.addFeature("concept_unigram_" + StringHandling.escapeStringForFeatureName(currentLine[k]));
          }
      }

      if (checkForEnabledFeature("wordLeftFeature"))
      {
        List<String> wordLeftFeatureList = FeatureUtility.constructWordLeftFeatureList(problemBegin.getTokenOffset(), problemEnd.getTokenOffset(), currentLine);
        for (String currentFeature : wordLeftFeatureList)
        {
          trainingInstance.addFeature(currentFeature);
        }
      }

      if (checkForEnabledFeature("wordRightFeature"))
      {
        List<String> wordRightFeatureList = FeatureUtility.constructWordRightFeatureList(problemBegin.getTokenOffset(), problemEnd.getTokenOffset(), currentLine);
        for (String currentFeature : wordRightFeatureList)
        {
          trainingInstance.addFeature(currentFeature);
        }
      }

      Set<String> featureSet = trainingInstance.getFeatureSet();
      if (featureSet != null && featureSet.size() > 0)
      {
        trainingInstanceMap.put(index, trainingInstance);
      }

      String conceptText = problem.getText();
      if (checkForEnabledFeature("conceptTextFeature"))
      {
        String conceptTextFeature = MedFactsRunner.constructConceptPhraseFeature(conceptText);
        trainingInstance.addFeature(conceptTextFeature);
      }

      if (checkForEnabledFeature("conceptPseudoHeadFeature"))
      {
          int tokenOffset = problemEnd.getTokenOffset();
          //logger.info(String.format("before creating pseudo head; token offset: %d; # tokens on line: %d, tokens: %s", tokenOffset, currentLine.length, Arrays.toString(currentLine)));
          String conceptHead = currentLine[tokenOffset];
          trainingInstance.addFeature(MedFactsRunner.constructConceptHeadFeature(conceptHead));
      }

      /////
      String tokensOnCurrentLine[] = currentLine;
      for (int currentTokenOffset=0; currentTokenOffset < tokensOnCurrentLine.length; currentTokenOffset++)
      {
        String currentToken = tokensOnCurrentLine[currentTokenOffset];
        List<Annotation> annotationsAtCurrentPosition = indexer.findAnnotationsForPosition(lineNumber, currentTokenOffset);

        int scopeCount = 0;
        if (annotationsAtCurrentPosition != null)
        for (Annotation a : annotationsAtCurrentPosition)
        {
          if (a instanceof CueWordAnnotation)
          {
            CueWordAnnotation cueWord = (CueWordAnnotation)a;
            String cueWordType = cueWord.getCueWordType().toString();
            if (checkForEnabledFeature("cueWord"))
            {
              int cueWordBegin = cueWord.getBegin().getTokenOffset();
              int cueWordEnd = cueWord.getEnd().getTokenOffset();
              if (cueWordBegin < conceptBeginTokenOffset) {
                  trainingInstance.addFeature("cueWord_" + cueWordType + "_left");
                  if ((conceptBeginTokenOffset - cueWordBegin) < 4) {
                      trainingInstance.addFeature("cueWord_" + cueWordType + "_left_3");
                  }
              } else if (cueWordBegin > conceptEndTokenOffset) {
                    trainingInstance.addFeature("cueWord_" + cueWordType + "_right");
                    if ((cueWordEnd - conceptEndTokenOffset) < 4) {
                        trainingInstance.addFeature("cueWord_" + cueWordType + "_right_3");
                    }
              } else {
                  trainingInstance.addFeature("cueWord_" + cueWordType + "_within");
              }
            }
            if (checkForEnabledFeature("cueWordValue"))
            {
              trainingInstance.addFeature("cueword_" + cueWord.getCueWordText());
            }
          }
          if (checkForEnabledFeature("concepts")) {
            if (a instanceof ConceptAnnotation) {
                ConceptAnnotation concept = (ConceptAnnotation) a;
                if ((concept.getConceptType() != null) && (concept.getConceptText() != null)) {
                //System.err.println("concept: " + concept);
                //System.err.println("concept type = " + concept.getConceptType());
                String conceptType = concept.getConceptType().toString();
                int thisConceptBegin = concept.getBegin().getTokenOffset();
                int thisConceptEnd = concept.getEnd().getTokenOffset();
                if (concept.getBegin().getTokenOffset() < conceptBeginTokenOffset) {
                    trainingInstance.addFeature("concept_" + conceptType + "_left");
                    if ((conceptBeginTokenOffset - thisConceptEnd) < 4) {
                        trainingInstance.addFeature("concept_" + conceptType + "_left_3");
                    }
                } else {
                    if ((thisConceptBegin - conceptEndTokenOffset) < 4) {
                        trainingInstance.addFeature("concept_" + conceptType + "_right_3");
                    }
                    trainingInstance.addFeature("concept_" + conceptType + "_right");
                }
            } 
            }
            }

          if (a instanceof ScopeAnnotation)
          {
            ScopeAnnotation scope = (ScopeAnnotation)a;
            scopeCount++;
            if (checkForEnabledFeature("scope"))
            {
              trainingInstance.addFeature("scope");
            }
            if (checkForEnabledFeature("inScope"))
            {
              trainingInstance.addFeature("in_scope_" + currentToken);
            }
            if (checkForEnabledFeature("inScopeId"))
            {
              trainingInstance.addFeature("in_scope_id_" + scope.getScopeId() + "_" + currentToken);
            }
          }

          if (a instanceof CueAnnotation)
          {
            CueAnnotation cue = (CueAnnotation)a;
            if (checkForEnabledFeature("cue"))
            {
              String cueType = cue.getCueSubType().toString();
              int cueBegin = cue.getBegin().getTokenOffset();
              if (cueBegin < conceptBeginTokenOffset) {
                  trainingInstance.addFeature("cue_" + cueType + "_left");
                  if ((conceptBeginTokenOffset - cueBegin) < 4) {
                      trainingInstance.addFeature("cue_" + cueType + "_left_3");
                  }
              } else {
                  int cueEnd = cue.getEnd().getTokenOffset();
                  trainingInstance.addFeature("cue_" + cueType + "_right");
                  if ((cueEnd - conceptEndTokenOffset) < 4) {
                      trainingInstance.addFeature("cue_" + cueType + "_right_3");
                  }
              }
            }
            if (checkForEnabledFeature("inCue"))
            {
              trainingInstance.addFeature("in_cue_" + currentToken);
            }
            if (checkForEnabledFeature("inCueForScopeId"))
            {
              trainingInstance.addFeature("in_cue_for_scope_id_" + cue.getScopeIdReference() + "_" + currentToken);
            }
          }

          if (checkForEnabledFeature("zone"))
          {
            if (a instanceof ZoneAnnotation)
            {
              ZoneAnnotation zone = (ZoneAnnotation)a;
              trainingInstance.addFeature("zone_" + MedFactsRunner.escapeFeatureName(zone.getZoneName()));
            }
          }

        } // end of for loop over a : annotationsAtCurrentPosition
      }
      /////

      logger.fine(String.format("TRAINING INSTANCE (%d): %s", index, trainingInstance.toString()));
    }

    return trainingInstanceMap;
  }
View Full Code Here

    Map<Integer, String> assertionMap = new TreeMap<Integer, String>();

    for (Entry<Integer, TrainingInstance> currentEntrySet : trainingInstanceMap.entrySet())
    {
      Integer index = currentEntrySet.getKey();
      TrainingInstance trainingInstance = currentEntrySet.getValue();

      Set<String> featureSet = trainingInstance.getFeatureSet();
      List<String> featureList = new ArrayList<String>(featureSet);

      JarafeMEDecoder assertionDecoder = assertionDecoderConfiguration.getAssertionDecoder();
      String assertionType = assertionDecoder.classifyInstance(featureList);
      logger.fine(String.format("ASSERTION OUTPUT: %d/%s [%s]", index, assertionType, apiConceptList.get(index)));
View Full Code Here

      {
        // skip this one
        continue;
      }

      TrainingInstance trainingInstance = new TrainingInstance();
      List<Annotation> allLineAnnotations = indexer.getAnnotationByLine().get((long) lineNumber);
      trainingInstance.setFilename(getTextFilename());
      trainingInstance.setLineNumber(lineNumber);
      trainingInstance.setAssertAnnotateForTI(currentAssertionAnnotation); //link training instance to corresponding assertion
      trainingInstance.setAnnotationsForLine(allLineAnnotations); //list of annotations for the line this training instance is on
      trainingInstance.setTokensForLine(textLookup[lineNumber-1]); //token string for the line this training instance is on

      AssertionValue assertionValue = currentAssertionAnnotation.getAssertionValue();
      String assertionValueString = (assertionValue == null) ? "" : assertionValue.toString().toLowerCase();
      trainingInstance.setExpectedValue(assertionValueString);

      String conceptText = currentAssertionAnnotation.getConceptText();
      if (checkForEnabledFeature("conceptTextFeature"))
      {
        String conceptTextFeature = MedFactsRunner.constructConceptPhraseFeature(conceptText);
        trainingInstance.addFeature(conceptTextFeature);
      }

      if (checkForEnabledFeature("conceptPseudoHeadFeature"))
      {
          int ln = currentAssertionAnnotation.getEnd().getLine();
          int pos = currentAssertionAnnotation.getEnd().getTokenOffset();
//          logger.finest(String.format("conceptPseudoHeadFeature:: ln == %d; pos == %d", ln, pos));
//          logger.finest(String.format("textLookup's size: %d", textLookup.length));
//          logger.finest(String.format("textLookup[ln-1]'s size: %d", textLookup[ln-1].length));
//          logger.finest(ZonerCli.printOutLineOfTokens(textLookup[ln-1]));

          String conceptHead = textLookup[ln-1][pos];
          trainingInstance.addFeature(constructConceptHeadFeature(conceptHead));
        //Matcher conceptHeadMatcher = conceptHeadPattern.matcher(conceptText);
        //if (conceptHeadMatcher.find())
        //{
        //  String conceptHeadText = conceptHeadMatcher.group(1);
        //  String conceptHeadFeature = constructConceptHeadFeature(conceptHeadText);
        //  trainingInstance.addFeature(conceptHeadFeature);
        //}
      }


      Location conceptBeginLocation = currentAssertionAnnotation.getBegin();
      int conceptBeginLine = conceptBeginLocation.getLine();
      int conceptBeginTokenOffset = conceptBeginLocation.getTokenOffset();
      Location conceptEndLocation = currentAssertionAnnotation.getEnd();
      int conceptEndTokenOffset = conceptEndLocation.getTokenOffset();
      String currentLine[] = textLookup[conceptBeginLine-1];

      if (checkForEnabledFeature("conceptUnigrams")) {
          for (int k = conceptBeginTokenOffset; k <= conceptEndTokenOffset; k++) {
              trainingInstance.addFeature("concept_unigram_" + StringHandling.escapeStringForFeatureName(currentLine[k]));
          }
      }

      if (checkForEnabledFeature("wordLeftFeature"))
      {
        List<String> wordLeftFeatureList = FeatureUtility.constructWordLeftFeatureList(conceptBeginTokenOffset, conceptEndTokenOffset, currentLine);
        for (String currentFeature : wordLeftFeatureList)
        {
          trainingInstance.addFeature(currentFeature);
        }
      }

      if (checkForEnabledFeature("wordRightFeature"))
      {
        List<String> wordRightFeatureList = FeatureUtility.constructWordRightFeatureList(conceptBeginTokenOffset, conceptEndTokenOffset, currentLine);
        for (String currentFeature : wordRightFeatureList)
        {
          trainingInstance.addFeature(currentFeature);
        }
      }

      if (checkForEnabledFeature("posRightFeature")){
  trainingInstance.addFeature(FeatureUtility.constructPosRightFeatureList(conceptBeginTokenOffset, conceptEndTokenOffset, (conceptBeginLine-1), currentLine, indexer));
      }

      if (checkForEnabledFeature("posLeftFeature")){
  trainingInstance.addFeature(FeatureUtility.constructPosLeftFeatureList(conceptBeginTokenOffset, conceptEndTokenOffset, (conceptBeginLine-1), currentLine, indexer));
      }

      if (checkForEnabledFeature("cueWord_DEFINITE_left_2")) {
  for (int i = 1; i < 3; i++) {
    int relPos = conceptBeginTokenOffset - i;
    if (relPos >= 0) {
      if (currentLine[relPos].equals("her") || currentLine[relPos].equals("his") || currentLine[relPos].equals("patient's") ||
    currentLine[relPos].equals("your") || currentLine[relPos].equals("this")) {
        trainingInstance.addFeature("cueWord_DEFINITE_left_2");
      }
    }
  }
      }

      if (checkForEnabledFeature("cueWordOrderingsLeft")) {
          List<CueWordAnnotation> annots = new ArrayList<CueWordAnnotation>();
          for (Annotation a : allLineAnnotations) {
      if (a instanceof CueWordAnnotation) {
        CueWordAnnotation an = (CueWordAnnotation)a;
        if ((inCWSCueWordSet(an.getCueWordType())) && (an.getBegin().getTokenOffset() < conceptBeginTokenOffset)) {
                  annots.add(an);
              }
      }
          }
    if (annots.size() > 0) {
      Collections.sort(annots);
      StringBuilder str = new StringBuilder("CWS_left");
      for (CueWordAnnotation a : annots) {
        str.append("_");
        str.append(a.getCueWordType());
      }
      trainingInstance.addFeature(str.toString());
    }
      }

      if (checkForEnabledFeature("cueWordOrderingsRight")) {
          List<CueWordAnnotation> annots = new ArrayList<CueWordAnnotation>();
          for (Annotation a : allLineAnnotations) {
      if (a instanceof CueWordAnnotation) {
        CueWordAnnotation an = (CueWordAnnotation)a;
        if ((inCWSCueWordSet(an.getCueWordType())) && (an.getBegin().getTokenOffset() > conceptEndTokenOffset)) {
    annots.add(an);
              }
      }
          }
          Collections.sort(annots);
    if (annots.size() > 0) {
      StringBuilder str = new StringBuilder("CWS_right");
      for (CueWordAnnotation a : annots) {
              str.append("_");
              str.append(a.getCueWordType());
      }
      trainingInstance.addFeature(str.toString());
    }
      }

      //logger.info(String.format("lineNumber: %d%n", lineNumber);
      String tokensOnCurrentLine[] = textLookup[lineNumber-1];
      for (int currentTokenOffset=0; currentTokenOffset < tokensOnCurrentLine.length; currentTokenOffset++)
      {
        String currentToken = tokensOnCurrentLine[currentTokenOffset];
        List<Annotation> annotationsAtCurrentPosition = indexer.findAnnotationsForPosition(lineNumber, currentTokenOffset);

        int scopeCount = 0;
        if (annotationsAtCurrentPosition != null)
        for (Annotation a : annotationsAtCurrentPosition)
        {
            if (checkForEnabledFeature("concepts")) {
            if (a instanceof ConceptAnnotation) {
                ConceptAnnotation concept = (ConceptAnnotation) a;

                String conceptType = concept.getConceptType().toString();
                int thisConceptBegin = concept.getBegin().getTokenOffset();
                int thisConceptEnd = concept.getEnd().getTokenOffset();
                if (concept.getBegin().getTokenOffset() < conceptBeginTokenOffset) {
                    trainingInstance.addFeature("concept_" + conceptType + "_left");
                    if ((conceptBeginTokenOffset - thisConceptEnd) < 4) {
                        trainingInstance.addFeature("concept_" + conceptType + "_left_3");
                    }
                } else {
                    if ((thisConceptBegin - conceptEndTokenOffset) < 4) {
                        trainingInstance.addFeature("concept_" + conceptType + "_right_3");
                    }
                    trainingInstance.addFeature("concept_" + conceptType + "_right");
                }
            }
            }

          if (a instanceof ScopeAnnotation)
          {
            ScopeAnnotation scope = (ScopeAnnotation)a;
            scopeCount++;
            if (checkForEnabledFeature("scope"))
            {
              trainingInstance.addFeature("scope");
            }
            if (checkForEnabledFeature("inScope"))
            {
              trainingInstance.addFeature("in_scope_" + currentToken);
            }
            if (checkForEnabledFeature("inScopeId"))
            {
              trainingInstance.addFeature("in_scope_id_" + scope.getScopeId() + "_" + currentToken);
            }
          }

          if (a instanceof CueAnnotation)
          {
            CueAnnotation cue = (CueAnnotation)a;
            if (checkForEnabledFeature("cue"))
            {
              String cueType = cue.getCueSubType().toString();
              int cueBegin = cue.getBegin().getTokenOffset();
              if (cueBegin < conceptBeginTokenOffset) {
                  trainingInstance.addFeature("cue_" + cueType + "_left");
                  if ((conceptBeginTokenOffset - cueBegin) < 4) {
                      trainingInstance.addFeature("cue_" + cueType + "_left_3");
                  }
              } else {
                  int cueEnd = cue.getEnd().getTokenOffset();
                  trainingInstance.addFeature("cue_" + cueType + "_right");
                  if ((cueEnd - conceptEndTokenOffset) < 4) {
                      trainingInstance.addFeature("cue_" + cueType + "_right_3");
                  }
              }
            }
            if (checkForEnabledFeature("inCue"))
            {
              trainingInstance.addFeature("in_cue_" + currentToken);
            }
            if (checkForEnabledFeature("inCueForScopeId"))
            {
              trainingInstance.addFeature("in_cue_for_scope_id_" + cue.getScopeIdReference() + "_" + currentToken);
            }
          }

          if (a instanceof CueWordAnnotation)
          {
            CueWordAnnotation cueWord = (CueWordAnnotation)a;
            String cueWordType = cueWord.getCueWordType().toString();
            String cueWordText = cueWord.getCueWordText();
            String escapedCueWordText = escapeFeatureName(cueWordText);
            String escapedCueWordClass = null;
            String cueWordClass = cueWord.getCueWordClass();
            boolean cueWordClassIsNotEmpty = (cueWordClass != null) && (!cueWordClass.isEmpty());
            if (checkForEnabledFeature("cueWordClassValue") && cueWordClassIsNotEmpty)
            {
              escapedCueWordClass = escapeFeatureName(cueWordClass);
            }

            if (checkForEnabledFeature("cueWordTextPositional"))
            {
              int cueWordBegin = cueWord.getBegin().getTokenOffset();
              int cueWordEnd = cueWord.getEnd().getTokenOffset();
              if (cueWordBegin < conceptBeginTokenOffset) {
                  trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_left");
                  if ((conceptBeginTokenOffset - cueWordBegin) < 4) {
                      trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_left_3");
                  }
              } else if (cueWordBegin > conceptEndTokenOffset) {
                    trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_right");
                    if ((cueWordEnd - conceptEndTokenOffset) < 4) {
                        trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_right_3");
                    }
              } else {
                  trainingInstance.addFeature("cueWordTextPositional_" + escapedCueWordText + "_within");
              }
            }
            if (checkForEnabledFeature("cueWordTypePositional"))
            {
              int cueWordBegin = cueWord.getBegin().getTokenOffset();
              int cueWordEnd = cueWord.getEnd().getTokenOffset();
              if (cueWordBegin < conceptBeginTokenOffset) {
                  trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_left");
                  if ((conceptBeginTokenOffset - cueWordBegin) < 4) {
                      trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_left_3");
                  }
              } else if (cueWordBegin > conceptEndTokenOffset) {
                    trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_right");
                    if ((cueWordEnd - conceptEndTokenOffset) < 4) {
                        trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_right_3");
                    }
              } else {
                  trainingInstance.addFeature("cueWordTypePositional_" + cueWordType + "_within");
              }
            }
            if (checkForEnabledFeature("cueWordClassPositional") && cueWordClassIsNotEmpty)
            {
              int cueWordBegin = cueWord.getBegin().getTokenOffset();
              int cueWordEnd = cueWord.getEnd().getTokenOffset();
              if (cueWordBegin < conceptBeginTokenOffset) {
                  trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_left");
                  if ((conceptBeginTokenOffset - cueWordBegin) < 4) {
                      trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_left_3");
                  }
              } else if (cueWordBegin > conceptEndTokenOffset) {
                    trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_right");
                    if ((cueWordEnd - conceptEndTokenOffset) < 4) {
                        trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_right_3");
                    }
              } else {
                  trainingInstance.addFeature("cueWordClassPositional_" + escapedCueWordClass + "_within");
              }
            }
            if (checkForEnabledFeature("cueWordTextValue"))
            {
              trainingInstance.addFeature("cueWordTextValue_" + escapedCueWordText);
            }
            if (checkForEnabledFeature("cueWordTypeValue") && cueWord.getCueWordType() != null)
            {
              trainingInstance.addFeature("cueWordTypeValue_" + cueWord.getCueWordType().toString());
            }
            if (checkForEnabledFeature("cueWordClassValue") && cueWord.getCueWordClass() != null && !cueWord.getCueWordClass().isEmpty())
            {
              trainingInstance.addFeature("cueWordClassValue_" + escapedCueWordClass);
            }
          }

          if (checkForEnabledFeature("zone"))
          {
            if (a instanceof ZoneAnnotation)
            {
              ZoneAnnotation zone = (ZoneAnnotation)a;
              final String zoneFeatureName = "zone_" + escapeFeatureName(zone.getZoneName());
              //logger.info("### zone feature: " + zoneFeatureName);
              trainingInstance.addFeature(zoneFeatureName);
            }
          }
        }
        if (scopeCount > 0)
        {
          if (checkForEnabledFeature("scopeCountNumber"))
          {
            trainingInstance.addFeature("scope_count_" + scopeCount);
          }
          if (checkForEnabledFeature("scopeCountEvenOrOdd"))
          {
            boolean scopeCountIsEven = (scopeCount % 2) == 0;
            trainingInstance.addFeature("scope_count_" + (scopeCountIsEven ? "even" : "odd"));
          }
        }
      }

      //Features based on negation and speculation scopes enclosing the text of the entire training instance -Alex Yeh
      int enclosingNegationScopeCnt = 0;
      int enclosingSpeculationScopeCnt = 0;
      AssertionAnnotation assertForTI = trainingInstance.getAssertAnnotateForTI();
      //Count number of enclosing negation and speculation scopes
      for (ScopeAnnotation enclosingScope : assertForTI.getEnclosingScopes())
      {
        CueAnnotation cueForScope = enclosingScope.getCueForScope();
        CueSubType scopeType = cueForScope.getCueSubType();
        if (scopeType == CueSubType.NEGATION) enclosingNegationScopeCnt++;
        else if (scopeType == CueSubType.SPECULATION) enclosingSpeculationScopeCnt++;
        else logger.info(String.format("WARNING: CUE %s%n  FOR SCOPE %s%n ENCLOSING %s%n is neither a negation nor speculation cue%n", cueForScope, enclosingScope,assertForTI));
      }
      if (checkForEnabledFeature("statusRuleMixNMatchFeature"))
      {
        //Write out status rule features for this instance that are meant to be mixed with non status rule features
        trainingInstance.addFeature("status_rule_mix_n_match_" + enclosingNegationScopeCnt + "negation_" + enclosingSpeculationScopeCnt + "spec_enclosing_scopes");
      }
      if (checkForEnabledFeature("statusRuleStandAloneFeature"))
      {
        //Write out status rule features for this instance that are meant to stand by themselves
        switch (enclosingNegationScopeCnt)
        {
          case 0:
          {
            if (enclosingSpeculationScopeCnt == 0)
              trainingInstance.addFeature("status_rule_standAlone_present");
            else if (enclosingSpeculationScopeCnt == 1)
              trainingInstance.addFeature("status_rule_standAlone_possible");
            else trainingInstance.addFeature("status_rule_standAlone_unhandled_case");
            break;
          }
          case 1:
          {
            if (enclosingSpeculationScopeCnt == 0)
              trainingInstance.addFeature("status_rule_standAlone_absent");
            else trainingInstance.addFeature("status_rule_standAlone_unhandled_case");
            break;
          }
          case 2:
          {
            if (enclosingSpeculationScopeCnt == 0)
              trainingInstance.addFeature("status_rule_standAlone_present");
             else trainingInstance.addFeature("status_rule_standAlone_unhandled_case");
           break;
          }
          default: trainingInstance.addFeature("status_rule_standAlone_unhandled_case");
        }
      }
//      logger.info(String.format("TI on line %s with value %s%n  => %s%n     has %s neg and %s spec enclosing scopes%n", trainingInstance.getLineNumber(), trainingInstance.toStringWithExpectedValue(), assertForTI.toString(), enclosingNegationScopeCnt, enclosingSpeculationScopeCnt); //For testing
     

      String featureLine = trainingInstance.toStringWithExpectedValue();
      featuresPrinter.println(featureLine);
      getMapOfTrainingInstanceLists().get(AnnotationType.ASSERTION).add(trainingInstance);

      //lineNumber++;
    }
View Full Code Here

TOP

Related Classes of org.mitre.medfacts.i2b2.processors.ScopeFileProcessor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.