Package it.eng.spagobi.engines.documentcomposition.configuration

Examples of it.eng.spagobi.engines.documentcomposition.configuration.DocumentCompositionConfiguration$Document


   * @return The HTML document.
   * @throws Exception
   */
  Document loadURL(URL url) throws Exception{
    SAXBuilder builder=new SAXBuilder();
    Document xml=null;
    HttpURLConnection con=(HttpURLConnection) url.openConnection();
    con.setInstanceFollowRedirects(true);
   
    InputStream in;
    try
View Full Code Here


   * @throws Exception
   */
  public Count getWikiCounts(String lemma) throws Exception{
    double w=0.0;
    URL count=new URL(this.path+"/w/index.php?title=Special%3ASearch&profile=default&search="+lemma.replace(" ", "%20")+"&fulltext=Search");   
    Document xml=this.loadURL(count)
    if(xml!=null)
    {
      String aux="mw-search-formheader";
      for(Element e:this.getContentNode(xml).getDescendants(new ElementFilter("div")))
      {
View Full Code Here

    {
      ArrayList<String> syns=new ArrayList<String>(1);
      syns.add(sid);
      s=new Sense(sid,"",syns);
      URL url=new URL(this.path+"wiki/"+sid.replace(" ", "_"));
      Document xml=this.loadURL(url);
      Element body=this.getContentNode(xml);
      //Check if definition is a section
      if(!xml.getRootElement().getValue().contains(this.redirect))
      {
        boolean start=false;
       
        for(Element e:body.getChildren())
        {
          if(e.getName().equals("p"))
          {
            start=true;
            s.addBagOfWords(e.getValue(),e.getValue().split(" "),this.name);
            for(Element ge:e.getChildren())
            {
              //Retrieve all the <a> for extracting the inGloss relation
              if(ge.getName().equals("a"))
              {
                String nurl=ge.getAttributeValue("href");
                s.addRelation("inGloss", new Relation("inGloss", nurl.replace("/wiki/", ""), ""));           
              }
            }
          }
          else
          {
            if(start)
              break;
          }
        }

      }
      else//Look for the start of the section
      {
        String all=xml.getRootElement().getValue();
        String section=all.substring(all.indexOf(this.redirect)+this.redirect.length());
        section=section.substring(0,section.indexOf("\")"));
       
        boolean start=false;
        for(Element e:body.getChildren())
        {
          if(start)
          {
            if(e.getName().equals("p"))
            {
              s.addBagOfWords(e.getValue(),e.getValue().split(" "),this.name);
              for(Element ge:e.getChildren())
              {
                //Retrieve all the <a> for extracting the inGloss relation
                if(ge.getName().equals("a"))
                {
                  String nurl=ge.getAttributeValue("href");
                  s.addRelation("inGloss", new Relation("inGloss", nurl.replace("/wiki/", ""), ""));           
                }
              }
            }
            else
            {
              break;
            }

          }
          else
          {
            if(e.getName().startsWith("h"))
            {
              for(Element es:e.getChildren())
              {
                if(es.getName().equals("span")&&section.equals(es.getAttributeValue("id")))
                {               
                  start=true;
                }
              }
            }
          }
         
        }
      }
      Element navbox=null;
      for(Element e:xml.getRootElement().getDescendants(new ElementFilter("table")))
      {
        if(e.getAttributeValue("class")!=null&&e.getAttributeValue("class").equals("navbox"))
        {
            navbox=e;
            break;
View Full Code Here

      File f=new File(path);
      f.mkdirs();
      FileWriter fout=new FileWriter(path+sid+".sgf");
      BufferedWriter out=new BufferedWriter(fout);
    URL url=new URL(this.path+"wiki/"+sid);
    Document xml=this.loadURL(url);
    Element body=this.getContentNode(xml);   
    DataBroker db=new DataBroker("gannuNLP.dictionaries.Wiki",this.language);
    db.setPath(this.path);   
    db.load("Glosses");
   
View Full Code Here

    if(!sense.equals(sense2))
    {
      qry+="+AND+"+"\""+sense2.getSid()+"\"";
    }
    URL count=new URL(this.path+"/w/index.php?title=Special%3ASearch&profile=default&search="+qry+"&fulltext=Search");   
    Document xml=this.loadURL(count)
    if(xml!=null)
    {
      String aux="mw-search-formheader";
      for(Element e:this.getContentNode(xml).getDescendants(new ElementFilter("div")))
      {
View Full Code Here

        System.out.println("-----------Checking file: "+file.getName()+ " "+String.valueOf(x)+"/"+String.valueOf(files.size())+"--------------");
        x++;
        try
        {
          SAXBuilder builder=new SAXBuilder();
          Document xml=(Document)builder.build(file);
          ArrayList<Element> words=new ArrayList<Element>();
          for(Element word:xml.getDescendants(new ElementFilter("wf")))
            words.add(word);
          for(int w=0;w<words.size();w++)
          {
            Element word=words.get(w);
            if(word.getAttribute("ot")!=null)
View Full Code Here

     * @throws IOException
     */
    private Element htmlStringToElement( String html ) throws JDOMException, IOException
    {
        SAXBuilder builder = new SAXBuilder( CYBERNEKO_PARSER, true );
        Document doc = builder.build( new StringReader( html ) );
        Element element = doc.getRootElement();
        return element;
    }
View Full Code Here

     @param element The element to get HTML from.
     *  @return HTML
     */
    public static String element2String( Element element )
    {
        Document document = new Document( element );
        XMLOutputter outputter = new XMLOutputter();
        return outputter.outputString( document );
    }
View Full Code Here

        Enumeration< URL > resources = XmlUtil.class.getClassLoader().getResources( xml );
        while( resources.hasMoreElements() ) {
                  URL resource = resources.nextElement();
                 
                  log.debug( "reading " + resource.toString() );
                  Document doc = builder.build( resource );
                  XPathFactory xpfac = XPathFactory.instance();
                  XPathExpression< Element > xp = xpfac.compile( requestedNodes, Filters.element() );
                  readed.addAll( xp.evaluate( doc ) ); // filter out repeated items
              }
        return new ArrayList< Element >( readed );
View Full Code Here

   */
  public static List< Element > parse( InputStream xmlStream, String requestedNodes ) {
    if( xmlStream != null && StringUtils.isNotEmpty( requestedNodes ) ) {
      SAXBuilder builder = new SAXBuilder();
      try {
                Document doc = builder.build( xmlStream );
                XPathFactory xpfac = XPathFactory.instance();
                XPathExpression< Element > xp = xpfac.compile( requestedNodes, Filters.element() );
               
        return xp.evaluate( doc );
      } catch ( IOException ioe ) {
View Full Code Here

TOP

Related Classes of it.eng.spagobi.engines.documentcomposition.configuration.DocumentCompositionConfiguration$Document

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.