Package uk.ac.cam.ch.wwmm.ptclib.scixml

Examples of uk.ac.cam.ch.wwmm.ptclib.scixml.SciXMLDocument


   * full web server (false) or the cut-down version (true).
   * @return The XML document thus produced.
   * @throws Exception
   */
  public static SciXMLDocument makeNEPage(String name, String type, String smiles, String inchi, String ontIDs, boolean cutDown) throws Exception {
    SciXMLDocument doc = new SciXMLDocument();
   
    Element ne = new Element("ne");
    ne.appendChild(name);
    ne.addAttribute(new Attribute("type", type));
    if(smiles != null) ne.addAttribute(new Attribute("SMILES", smiles));
    if(inchi != null) ne.addAttribute(new Attribute("InChI", inchi));
    doc.setTitle(ne);
    Element list = doc.addList();
         
    list.appendChild(doc.makeListItem("Name: " + name));
    list.appendChild(doc.makeListItem("Type: " + type));
    if(smiles != null) list.appendChild(doc.makeListItem("SMILES: " + smiles));
    if(inchi != null) list.appendChild(doc.makeListItem("InChI: " + inchi));
   
    if(inchi != null) {
      Collection<String> synonyms = ChemNameDictSingleton.getNamesFromInChI(inchi);
      if(synonyms != null) {
        Element li = doc.makeListItem("Synonyms:");
        li.appendChild(doc.makeList(synonyms));
        list.appendChild(li);
      }
    }
   
    String scrubbedName = name;
    if(scrubbedName.endsWith("s")) scrubbedName = scrubbedName.substring(0, scrubbedName.length()-1);
    scrubbedName = scrubbedName.replaceAll("\u00ad", "");
    String urlName = URLEncoder.encode(scrubbedName, "UTF-8");
   
    list.appendChild(doc.makeListItem(doc.makeLink("http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?CMD=search&amp;db=pccompound&amp;term=" +
        URLEncoder.encode("\"" + scrubbedName + "\"[CSYN]", "UTF-8"), "Search PubChem for " + scrubbedName)));
    //list.appendChild(doc.makeListItem(doc.makeLink("PubChem?name=" + urlName, "Fetch structures for " + scrubbedName + " from PubChem")));
    if(!cutDown) list.appendChild(doc.makeListItem(doc.makeLink("Search?query=" + urlName +
        "&type=word&resultsType=snippets", "Search local by name")));
    if(!cutDown) list.appendChild(doc.makeListItem(doc.makeLink("/ChemNameDict?action=stop&word=" + urlName, "Add " + scrubbedName + " as stopword")));

    if(inchi != null) {
      String urlInchi = URLEncoder.encode(inchi, "UTF-8");
      list.appendChild(doc.makeListItem(doc.makeLink("http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?CMD=search&amp;db=pccompound&amp;term=" +
            URLEncoder.encode("\"" + inchi + "\"[InChI]", "UTF-8"), "Search PubChem by InChI")));       
      if(!cutDown) list.appendChild(doc.makeListItem(doc.makeLink("Search?query=" + urlInchi +
          "&type=inchi&resultsType=snippets", "Search local by InChI")));
      if(!cutDown) list.appendChild(doc.makeListItem(doc.makeLink("Search?query=" + urlInchi +
          "&type=inchi&resultsType=compoundsList", "Search local for co-occuring compounds")));
    }
   
    if(ontIDs != null) {
      String [] oo = ontIDs.split("\\s+");
      for(int i=0;i<oo.length;i++) {
        String ontID = oo[i];
        String urlOntID = URLEncoder.encode(ontID, "UTF-8");
        list.appendChild(doc.makeListItem("Ontology ID: " + ontID));
        String def = OBOOntology.getInstance().getDefinitionForID(ontID);
        if(def != null) {
          list.appendChild(doc.makeListItem(def));
        }
       
        if (ontID.startsWith("PID")) {
         
          String pid = ontID.substring(4, ontID.length());
          String lastDigit = pid.substring(pid.length()-1, pid.length());
          String penultimateDigit = pid.substring(pid.length()-2, pid.length()-1);
         
          list.appendChild(doc.makeListItem(doc.makeLink("http://wwmm.ch.cam.ac.uk/polymers/polyinfo/" +
              penultimateDigit + "/" + lastDigit + "/" + pid + "/" + pid + ".html", "Get polymer data for " + ontID)));
         
        }
       
       
        else {

          list.appendChild(doc.makeListItem(doc.makeLink("http://www.ebi.ac.uk/ontology-lookup/browse.do?termId=" +
            urlOntID, "Look up " + ontID + " using EBI Ontology Lookup Service")));
        }
      }
    }
   
View Full Code Here


     
      return;
    }
   
    if(request.getParameter("selectcorpus") != null) {
      SciXMLDocument doc = new SciXMLDocument();
      doc.setTitle("Select corpus for search");
      Element list = doc.addList();
      File [] dirs = new File(Oscar3Props.getInstance().workspace, "corpora").listFiles();
      for(int i=0;i<dirs.length;i++) {
        String name = dirs[i].getName();
        if(dirs[i].isDirectory() && !name.startsWith("."))  {
          Element a = doc.makeLink("Search?setcorpus=" + URLEncoder.encode(name, "UTF-8"), name);
          list.appendChild(doc.makeListItem(a));
        }
      }

      Element a = doc.makeLink("Search?settoscrap", "ScrapBook");
      list.appendChild(doc.makeListItem(a));

      a = doc.makeLink("Search?indexall", "Index entire workspace");
      list.appendChild(doc.makeListItem(a));

      doc.addServerProcessingInstructions();
      response.setContentType("application/xml");
      new Serializer(response.getOutputStream()).write(doc);
      return;
    }
   
    if(request.getParameter("settoscrap") != null) {
      response.setContentType("text/plain");
      PrintWriter out = response.getWriter();
      try {
        indexerSearcher = new LuceneIndexerSearcher(true);
        indexerSearcher.addScrapBook();
        out.println("Corpus changed to scrapbook OK!");
      } catch (Exception e) {
        out.println("Yikes, that didn't work!");
      }
      return;   
    }
   
    if(request.getParameter("indexall") != null) {
      response.setContentType("text/plain");
      PrintWriter out = response.getWriter();
      try {
        indexerSearcher = new LuceneIndexerSearcher(true);
        indexerSearcher.addScrapBook();
        indexerSearcher.addDirectory(new File(Oscar3Props.getInstance().workspace, "corpora"), out);
        out.println("Indexed entire workspace OK");
      } catch (Exception e) {
        out.println("Yikes, that didn't work!");
      }
      return;   
     
    }
   
    if(request.getParameter("setcorpus") != null) {
      response.setContentType("text/plain");
      PrintWriter out = response.getWriter();
      try {
        File activeCorpus = new File(new File(Oscar3Props.getInstance().workspace, "corpora"), request.getParameter("setcorpus"));
        indexerSearcher = new LuceneIndexerSearcher(true);
        indexerSearcher.addDirectory(activeCorpus, new PrintWriter(System.out, true));
        out.println("Corpus changed to " + request.getParameter("setcorpus") + " OK!");
      } catch (Exception e) {
        e.printStackTrace();
        out.println("Yikes, that didn't work!");
      }
      return;
    }

    if(request.getParameter("concordanceresults") != null) {
      response.setContentType("text/plain");
      PrintWriter out = response.getWriter();
      int number = Integer.parseInt(request.getParameter("number"));
      for(int i=0;i<number;i++) {
        out.println(request.getParameter("s" + i));
        out.println(URLDecoder.decode(request.getParameter("s" + i + "start"), "UTF-8"));
        out.println(URLDecoder.decode(request.getParameter("s" + i + "end"), "UTF-8"));
        out.println(URLDecoder.decode(request.getParameter("s" + i + "file"), "UTF-8"));
        out.println();
      }
      return;
    }
   
    if(request.getParameter("concordance") != null) {
      try {
        String word = request.getParameter("concordance");
        String word1 = null;
        String word2 = null;
        if(word.matches("\\S+\\s+\\S+")) {
          String [] ww = word.split("\\s+");
          word1 = ww[0];
          word2 = ww[1];
        }
        String mode = request.getParameter("mode");
        if("form".equals(mode)) {
          String optstr = request.getParameter("types");
          List<String> options = StringTools.arrayToList(optstr.split("\\s+"));
          List<ConcordanceEntry> ces = Concordance.makeConcordance(indexerSearcher.filesForWord(word), word, 150, mode);                 
          response.setContentType("text/html");
          PrintWriter out = response.getWriter();
          out.println("<html><head><title>foo</title></head><body><form method='POST' action='Search'>");
          out.println("<input name='concordanceresults' type='hidden' value='foo'>");         
          out.println("<table>");
         
          boolean shade = false;
          int i=0;
          for(ConcordanceEntry ce : ces) {
            out.print("<tr" + (shade ? " style='background-color: lightgrey'" : "") + ">");
            out.print("<td><tt>" + ce.text + "</tt></td>");
            out.print("<td><select name='s" + i + "'>");
            for(String option : options) {
              out.print("<option value='" + StringTools.urlEncodeUTF8NoThrow(option) + "'>");
              out.print(option);
              out.print("</option>");
            }
            out.print("</select></td>");
            out.println("</tr>");
            out.println("<input type='hidden' name='s" + i + "start' value='" + StringTools.urlEncodeUTF8NoThrow(ce.start) + "'>");
            out.println("<input type='hidden' name='s" + i + "end' value='" + StringTools.urlEncodeUTF8NoThrow(ce.end) + "'>");
            out.println("<input type='hidden' name='s" + i + "file' value='" + StringTools.urlEncodeUTF8NoThrow(ce.file) + "'>");
            shade = !shade;
            i++;
          }     
          out.println("</table>");
          out.println("<input type='hidden' name='number' value='" + i + "'>");
          out.println("<input type='submit' value='Submit'>");
          out.println("</form></body></html>");
        } else {
          response.setContentType("text/plain")
          PrintWriter out = response.getWriter();
          long time = System.currentTimeMillis();
          List<String> concordance;
          if(word1 == null) {
            List<ConcordanceEntry> ces = Concordance.makeConcordance(indexerSearcher.filesForWord(word), word, 160, mode);                 
            concordance = new ArrayList<String>();
            for(ConcordanceEntry ce : ces) {
              concordance.add(ce.text);// + "\t" + ce.file + "\t" + ce.start + "\t" + ce.end);
            }
          } else {
            concordance = Concordance.biConcordance(indexerSearcher.filesForWordPair(word1, word2), word1, word2, 160, mode);                           
          }
          if(Oscar3Props.getInstance().verbose) System.out.println(System.currentTimeMillis() - time);
          for(String line : concordance) out.println(line);         
        }
      } catch (Exception e) {
        e.printStackTrace();
     
      return;
    }
   
    SciXMLDocument resultsDoc = null;
   
    if(request.getParameter("resultsType").equals("bigCompoundsList")) {
      try {
        resultsDoc = indexerSearcher.getBigCompoundsList();
      } catch (Exception e) {
        e.printStackTrace();
      }
    /*} else if(request.getParameter("resultsType").equals("pubchem")) {
      try {
        resultsDoc = indexerSearcher.getBigShoppingList();
        FetchFromPubChem.fillCNDFromCompoundsList(resultsDoc);
      } catch (Exception e) {
         e.printStackTrace();
      }*/
    } else {
      UserQuery.ResultsType rt;
     
      if(request.getParameter("resultsType").equals("compoundsList")) {
        rt = UserQuery.ResultsType.COMPOUNDSLIST;
      } else if(request.getParameter("resultsType").equals("hitsList")) {
        rt = UserQuery.ResultsType.HITSLIST;
      } else if(request.getParameter("resultsType").equals("assoc")) {
        rt = UserQuery.ResultsType.ASSOC;
      } else {
        rt = UserQuery.ResultsType.SNIPPETS;     
      }
     
      int size = 5;
      int skip = 0;
     
      if(request.getParameter("size") != null) size = Integer.parseInt(request.getParameter("size"));
      if(request.getParameter("skip") != null) skip = Integer.parseInt(request.getParameter("skip"));
     
      UserQuery uq = new UserQuery(rt, size, skip);
      if(request.getParameter("morelikethis") != null) {
        uq.setToMoreLikeThis(Integer.parseInt(request.getParameter("morelikethis")));
      }
      if(request.getParameter("query") != null) {
        String query = request.getParameter("query");
        String queryType = request.getParameter("type");
        String parameter = request.getParameter("parameter");
       
        uq.addTerm(query, queryType, parameter);
       
        for(int i=2;request.getParameter("query" + Integer.toString(i)) != null;i++) {
          query = request.getParameter("query" + Integer.toString(i));
          queryType = request.getParameter("type" + Integer.toString(i));
          parameter = request.getParameter("parameter" + Integer.toString(i));
          uq.addTerm(query, queryType, parameter);     
        }
       
      }
   
      try {
        resultsDoc = indexerSearcher.getResultsByUserQuery(uq);
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
   
    if(resultsDoc != null) {
      response.setContentType("application/xml");
      resultsDoc.addServerProcessingInstructions();
      new Serializer(response.getOutputStream()).write(resultsDoc);
    }
  }
View Full Code Here

        return name.endsWith(".src");
      }
    })) {
      try {
        String contents = FileUtils.readFileToString(f);
        SciXMLDocument doc = TextToSciXML.textToSciXML(contents);
        OscarFlow flow = new OscarFlow(doc);
        flow.runFlow("recognise resolve");
        if (++count > 200)
          break;
      } catch (Exception e) {
View Full Code Here

      if(assocs.get(a) > 3.8414589250873323) assocsList.add(a);
      //if(assocs.get(a) > 0.0) assocsList.add(a);
    }
    StringTools.sortStringList(assocsList, assocs);
   
    SciXMLDocument sxd = new SciXMLDocument();
    sxd.setTitle("Term Associations");
   
    Element list = sxd.addList();

   
    for(String a : assocsList) {
      //System.out.println(a + "\t" + assocs.get(a));
      Element li = new Element("LI");
View Full Code Here

   * @param args
   */
  public static void main(String[] args) {
    try {
      String contents = FileUtils.readFileToString(new File("test.html"));
      SciXMLDocument doc = TextToSciXML.textToSciXML(HtmlCleaner
          .cleanHTML(contents));
      OscarFlow flow = new OscarFlow(doc);
      flow.runFlow("recognise resolve");
    } catch (Exception e) {
      e.printStackTrace();
View Full Code Here

    return e;
  }
 
  @Override
  public Document startMakingDocument() {
    sciDoc = new SciXMLDocument();   
    return super.startMakingDocument();
  }
View Full Code Here

        Serializer ser = new Serializer(new FileOutputStream(new File(f, "source.xml")));
        ser.write(sciDoc);       
      } catch (Exception e) {
        e.printStackTrace();
      }
      sciDoc = new SciXMLDocument();
      System.out.println();
      System.out.println();
      //System.out.println(elem.toXML());
      return new Nodes();
    }
View Full Code Here

  public static void main(String[] args) throws Exception {
    OBOOntology ontology = new OBOOntology();
    ontology.read(new File("/home/ptc24/gene_ontology.obo"));
    System.out.println(ontology.getTerms().size());
    int termID = 0;
    SciXMLDocument sxd = new SciXMLDocument();
    sxd.setTitle("GO terms");
    int reacts = 0;
    for(String id : ontology.getTerms().keySet()) {
      OntologyTerm term = ontology.getTerms().get(id);
      //SciXMLDocument sxd = new SciXMLDocument();
      //sxd.setTitle(term.getName());
      sxd.getNewDiv(term.getId());
      sxd.addPara().appendChild(term.getName());
     
      if(term.getDef() != null) {
        sxd.addPara().appendChild(term.getDef());
        if(term.getDef().matches(".*[Rr]eaction.*")) reacts++;
      }
      termID++;
      if(termID % 100 == 0) {
        System.out.println(termID);
        //Serializer ser = new Serializer(new FileOutputStream(new File("/home/ptc24/tmp/goscixml", "g" + termID + ".xml")));
        //ser.write(sxd);
        sxd = new SciXMLDocument();
        sxd.setTitle("GO terms");
      }
    }
    System.out.println(termID);
    //Serializer ser = new Serializer(new FileOutputStream(new File("/home/ptc24/tmp/goscixml", "g" + termID + ".xml")));
    //ser.write(sxd);
View Full Code Here

   
    File sxDir = new File("/local/scratch/ptc24/biocreative2/bc2sciXML");
    if(true) {
      for(int i=0;i<150;i++) {
        List<Element> batch = sentenceElems.subList(i*100, (i+1)*100);
        SciXMLDocument sxd = new SciXMLDocument();
        Element div = sxd.getDiv();
        for(Element p : batch) {
          div.appendChild(p);
          div.appendChild("\n");
        }
        File outDir = new File(sxDir, i + "");
        outDir.mkdir();
        File outFile = new File(outDir, "markedup.xml");
        new Serializer(new FileOutputStream(outFile)).write(sxd);
        Nodes n = sxd.query("//ne");
        for(int j=0;j<n.size();j++) {
          XOMTools.removeElementPreservingText((Element)n.get(j));
        }
        outFile = new File(outDir, "source.xml");
        new Serializer(new FileOutputStream(outFile)).write(sxd);
View Full Code Here

      }     
      return;
    }
   
    if(request.getParameter("selectcorpus") != null) {
      SciXMLDocument doc = new SciXMLDocument();
      doc.setTitle("Select corpus to analyse");
      Element list = doc.addList();
      File [] dirs = new File(Oscar3Props.getInstance().workspace, "corpora").listFiles();
      for(int i=0;i<dirs.length;i++) {
        String name = dirs[i].getName();
        if(dirs[i].isDirectory() && !name.startsWith("."))  {
          Element a = doc.makeLink("TermSimilarity?setcorpus=" + URLEncoder.encode(name, "UTF-8"), name);
          list.appendChild(doc.makeListItem(a));
        }
      }

      //Element a = doc.makeLink("Search?settoscrap", "ScrapBook");
      //list.appendChild(doc.makeListItem(a));

      //a = doc.makeLink("Search?indexall", "Index entire workspace");
      //list.appendChild(doc.makeListItem(a));

      doc.addServerProcessingInstructions();
      response.setContentType("application/xml");
      new Serializer(response.getOutputStream()).write(doc);
      return;
    }
View Full Code Here

TOP

Related Classes of uk.ac.cam.ch.wwmm.ptclib.scixml.SciXMLDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.