Examples of fullSequentialParse()


Examples of net.htmlparser.jericho.Source.fullSequentialParse()

            if (MainFrame.downloadTomcatFlag.isSelected()) {

                Pattern pattern = Pattern.compile("^http://.*/tomcat/.*bin/apache-tomcat-[[0-9]+\\.]+zip");
                Source source = new Source(new URL("http://tomcat.apache.org/download-70.cgi"));
                source.setLogger(null);
                source.fullSequentialParse();
                List<Element> linkElements = source.getAllElements(HTMLElementName.A);

                for (Element linkElement : linkElements) {
                    String href = linkElement.getAttributeValue("href");
                    if (href != null) {
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

        List<DBpediaResource> entities = new ArrayList<DBpediaResource>();

        try {
            InputStream is = new ByteArrayInputStream(html.getBytes("UTF-8"));
            parser = new Source(is);
            parser.fullSequentialParse();
            parser.getElementById("div");
        } catch (IOException e) {
            throw new AnnotationException("Error reading output from WikiMachine ",e);
        }
        List<Element>KeywordElements=parser.getAllElementsByClass("keywords");
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

     * @return plain text
     */
    public static String getPlainText ( final String html, final String lineSeparator )
    {
        final Source source = new Source ( html );
        final Tag[] tags = source.fullSequentialParse ();
        if ( tags.length > 0 )
        {
            final Renderer renderer = source.getRenderer ();
            renderer.setIncludeHyperlinkURLs ( false );
            renderer.setIncludeAlternateText ( false );
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

    public static boolean hasTag ( final String text, final String tag )
    {
        if ( text != null && text.trim ().length () > 0 )
        {
            final Source source = new Source ( text );
            source.fullSequentialParse ();
            return source.getFirstElement ( tag ) != null;
        }
        else
        {
            return false;
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

    private void loadFirstResource ( final List<ResourceFile> resources, final List<String> xmlContent, final List<String> xmlNames,
                                     final List<ResourceFile> xmlFiles ) throws IOException
    {
        final ResourceFile rf = resources.get ( 0 );
        final Source xmlSource = new Source ( ReflectUtils.getClassSafely ( rf.getClassName () ).getResource ( rf.getSource () ) );
        xmlSource.fullSequentialParse ();

        final Element baseClassTag = xmlSource.getFirstElement ( SkinInfoConverter.CLASS_NODE );
        final String baseClass = baseClassTag != null ? baseClassTag.getContent ().toString () : null;

        for ( final Element includeTag : xmlSource.getAllElements ( SkinInfoConverter.INCLUDE_NODE ) )
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

    MicrosoftConditionalCommentTagTypes.register();
    PHPTagTypes.register();
    PHPTagTypes.PHP_SHORT.deregister(); // remove PHP short tags for this example otherwise they override processing instructions
    MasonTagTypes.register();
    Source source=new Source(rawPage);
    source.fullSequentialParse();

    if (depth==0 || depth==2) {
      List<Element> linkElements=source.getAllElements(HTMLElementName.FRAME);
      for (Element linkElement : linkElements) {
        String link=linkElement.getAttributeValue("src");
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

  }

  public String changeTagCase(String contents, boolean uppercase) {
    Source source = new Source(contents);
    source.fullSequentialParse();
    OutputDocument outputDocument = new OutputDocument(source);
    List<Tag> tags = source.getAllTags();
    int pos = 0;
    for (Tag tag : tags) {
      Element tagElement = tag.getElement();
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

  }

  @Test
  public void extractLinksWithText() throws IOException {
    Source source = new Source(TableOfLinks.getUrl());
    source.fullSequentialParse();
    List<Link> links = ScraperUtil.extractLinks(source.toString());
    log.info("found following links in table: {}", links);
  }

  @Test
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

  @Test
  public void canParseLIWithStrong() {
    String li = "<li><strong> Minimum Term&nbsp;&nbsp;&nbsp;</strong> &nbsp;</li>";

    Source source = new Source(li);
    source.fullSequentialParse();

    String[] parsedOnClosingTag = source.toString().split("</");

    log.info("split on close tag: {} and {}", parsedOnClosingTag[0], parsedOnClosingTag[1]);
    Element liElement = source.getAllElements(HTMLElementName.LI).get(0);
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

   */
  public List<URL> getLinks() throws IOException {
    List<URL> links = new ArrayList<URL>();

    Source source = new Source(url);
    source.fullSequentialParse();
    List<Element> linkElements = source.getAllElements(HTMLElementName.A);
    for (Element linkElement : linkElements) {
      String href = linkElement.getAttributeValue("href");
      if (href == null) {
        continue;
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.