Package uk.ac.cam.ch.wwmm.oscar3.recogniser.document

Examples of uk.ac.cam.ch.wwmm.oscar3.recogniser.document.NamedEntity


    }   
  }

  private boolean resolveVsNewPubChem(ProcessState state, String name) {
    try {
      NewPubChem npc = NewPubChem.getInstance();
      if(npc != null) {
        String [] results = npc.getShortestSmilesAndInChI(name);
        if(results == null) return false;
        state.smiles = results[0];
        state.inchi = results[1];
        if(state.smiles != null) setNEAttribute(state.ne, "SMILES", state.smiles);
        if(state.inchi != null) setNEAttribute(state.ne, "InChI", state.inchi);
View Full Code Here


      }
      return;
    }
   
    if(mode.equals("BuildPubChem")) {
      new NewPubChem().initialise();
      return;
    }
   
    if(mode.equals("Server")) {
      if(Oscar3Props.getInstance().serverType.equals("none")) {
View Full Code Here

    String type = a.type;
    //System.out.println(surface + " " + a.type);
    if(type.contains("_")) {
      type = type.split("_")[0];
    }
    NamedEntity ne = new NamedEntity(t.getTokens(a.startToken, endToken), surface, type);
    assert(collector instanceof NECollector);
    ((NECollector)collector).collect(ne);
    //System.out.println(surface + ": " + a.reps);
    if(a.type.startsWith("ONT")) {
      Set<String> ontIds = runAutToStateToOntIds.get(a.type).get(a.state);
      String s = OntologyTerms.idsForTerm(surface);
      if(s != null && s.length() > 0) {
        if(ontIds == null) ontIds = new HashSet<String>();
        ontIds.addAll(StringTools.arrayToList(s.split("\\s+")));       
      }
      ne.addOntIds(ontIds);
      //System.out.println(surface + "\t" + ontIds);
    }
    if(a.type.startsWith("CUST")) {
      Set<String> custTypes = runAutToStateToOntIds.get(a.type).get(a.state);
      ne.addCustTypes(custTypes);
      //System.out.println(surface + "\t" + ontIds);
    }
    //ne.setPattern(StringTools.collectionToString(a.getReps(), "_"));
  }
View Full Code Here

    String type = a.type;
    //System.out.println(surface + " " + a.type);
    if(type.contains("_")) {
      type = type.split("_")[0];
    }
    NamedEntity ne = new NamedEntity(t.getTokens(a.startToken, endToken), surface, type);
    assert(collector instanceof NECollector);
    ((NECollector)collector).collect(ne);
    //System.out.println(surface + ": " + a.reps);
    if(a.type.startsWith("ONT")) {
      Set<String> ontIds = runAutToStateToOntIds.get(a.type).get(a.state);
      String s = OntologyTerms.idsForTerm(StringTools.normaliseName(surface));
      if(s != null && s.length() > 0) {
        if(ontIds == null) ontIds = new HashSet<String>();
        ontIds.addAll(StringTools.arrayToList(s.split("\\s+")));       
      }
      ne.addOntIds(ontIds);
      //System.out.println(surface + "\t" + ontIds);
    }
    if(a.type.startsWith("CUST")) {
      //System.out.println(runAutToStateToOntIds.get(a.type));
      Set<String> custTypes = runAutToStateToOntIds.get(a.type).get(a.state);
      ne.addCustTypes(custTypes);
      //System.out.println(surface + "\t" + ontIds);
    }

    //ne.setPattern(StringTools.collectionToString(a.getReps(), "_"));
  }
View Full Code Here

        String value = token.getValue();
        value = value.toLowerCase();
        if(prwStrings.contains(value)) {
          List<Token> neTokens = new ArrayList<Token>();
          neTokens.add(token);
          NamedEntity ne = new NamedEntity(neTokens, token.getValue(), "PRW");
          safholder.appendChild(ne.toSAF());
          //System.out.println("**********");
        }
      }
      //System.out.println();
    }
 
View Full Code Here

          } else {
            featuresForAbbrev = new ArrayList<String>();
            abbrevFeatures.put(ne.getSurface(), featuresForAbbrev);
          }
          if(neByLastToken.containsKey(prev2)) {
            NamedEntity maybeAbbrev = neByLastToken.get(prev2);
            String abbrMode = "abbr1:";
            if(StringTools.testForAcronym(surf, maybeAbbrev.getSurface())) {
              abbrMode = "abbr2:";
            }
            if(surf.matches(".*\\s.*")) abbrMode += "wws:";
            for(double lthresh = -5.0;lthresh < 5.05;lthresh += 0.5) {
              double thresh = logitToProb(lthresh);
              if(maybeAbbrev.getConfidence() > thresh) {
                featuresForAbbrev.add(abbrMode + "abbr>" + thresh);
              } else {
                featuresForAbbrev.add(abbrMode + "abbr<" + thresh);
              }             
            }
View Full Code Here

 
  private static NamedEntity makeNE(List<Token> neTokens, String neType) {
    Token firstToken = neTokens.get(0);
    Token lastToken = neTokens.get(neTokens.size()-1);
    String surf = firstToken.getTokenSequence().getSubstring(firstToken.getId(), lastToken.getId());
    return new NamedEntity(neTokens, surf, "GENIA-" + neType);
  }
View Full Code Here

          Token next = t.getNAfter(1);
          Token prevPrev = t.getNAfter(-2);
          if(prev.getValue().equals("(") && next.getValue().endsWith(")")) {
            //boolean matched = false;
            if(endToNe.containsKey(prevPrev.getEnd())) {
              NamedEntity acronymOf = endToNe.get(prevPrev.getEnd());
              if(StringTools.testForAcronym(ne.getSurface(), acronymOf.getSurface())) {
                //System.out.println(ne.getSurface() + " is " + acronymOf.getSurface());
                if(acronymOf.getType().equals(NETypes.ASE) || acronymOf.getType().equals(NETypes.ASES)) {
                  //System.out.println("Skip ASE acronym");
                } else {
                  //matched = true;
                  if (acroMap.containsKey(ne.getSurface())) {
                    String newValue = ne.getType();
                    String oldValue = acroMap.get(ne.getSurface());
                    if (newValue == NETypes.POLYMER) acroMap.put(ne.getSurface(), acronymOf.getType());
                    else if (newValue == NETypes.COMPOUND && !oldValue.equals(NETypes.POLYMER)) acroMap.put(ne.getSurface(), acronymOf.getType());
                  }
                  else {
                    acroMap.put(ne.getSurface(), acronymOf.getType());
                  }
                }
              }
            }
          }
        }

        /*int index = neList.indexOf(ne);
        if(index == 0) continue;
        NamedEntity previous = neList.get(index-1);
        int prevEnd = previous.getEnd();
        String inBetween = text.substring(prevEnd, start);
        try {
          String afterWards = text.substring(end);
          if(afterWards != null && afterWards.length() > 0 &&
              inBetween.matches("\\s*\\(\\s*") &&
              afterWards.startsWith(")") &&
              StringTools.testForAcronym(ne.getSurface(), previous.getSurface())) {
            System.out.println(ne.getSurface() + " is " + previous.getSurface());
            if(previous.getType(this).equals(NETypes.ASE) || previous.getType(this).equals(NETypes.ASES)) {
              System.out.println("Skip ASE acronym");
            } else {
              acroMap.put(ne.getSurface(), previous.getType(this));
            }
          }
        } catch (Exception ex) {
          ex.printStackTrace();
        }*/

      }
    }

    stopNeList = new ArrayList<NamedEntity>();

    int i = 0;
    while(i < neList.size()) {
      NamedEntity ne = neList.get(i);
      if(ne.getType().equals(NETypes.POTENTIALACRONYM)) {
        if(acroMap.containsKey(ne.getSurface())) {
          ne.setType(acroMap.get(ne.getSurface()));
          i++;
        } else {
          neList.remove(i);
        }
      } else if(ne.getType().equals(NETypes.STOP)) {
        //System.out.println("STOP: " + neList.get(i).getSurface());
        neList.remove(i);
        stopNeList.add(ne);
      } else {
        i++;
View Full Code Here

              int endOffset = tokSeq.getToken(i+j-1).getEnd();
              String entityStr = tokSeq.getStringAtOffsets(startOffset, endOffset);
              String finalEntityType = entityType;
              if(finalEntityType.equals("NCM")) entityType = "CM";
              if(finalEntityType.equals("NRN")) entityType = "RN";
              NamedEntity ne = new NamedEntity(tokSeq.getTokens(i,i+j-1), entityStr, entityType);
              ne.setConfidence(entityProb);
              entities.put(ne, entityProb);
            }
            entitiesProb -= entityProb;
            if(entitiesProb < threshold) break;
          }
View Full Code Here

      }
    }
  }
 
  Document annotateDoc(Document doc) throws Exception {
    ProcessingDocument procDoc = ProcessingDocumentFactory.getInstance().makeTokenisedDocument(doc, false, false, false);
    //NameRecogniser nr = new NameRecogniser();
    //nr.halfProcess(doc);
    //nr.makeTokenisers(false);
    Element safholder = new Element("saf");
    Document safDoc = new Document(safholder);
    for(TokenSequence t : procDoc.getTokenSequences()) {
      for(Token token : t.getTokens()) {
        //System.out.println(token.getValue());
        String value = token.getValue();
        value = value.toLowerCase();
        if(prwStrings.contains(value)) {
View Full Code Here

TOP

Related Classes of uk.ac.cam.ch.wwmm.oscar3.recogniser.document.NamedEntity

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.