Package com.flaptor.hounder.crawler.pagedb

Examples of com.flaptor.hounder.crawler.pagedb.Link


                        // so they can be fetched in the next cycle
                        if (links.length == 0) {
                            // We need to avoid dangling nodes.
                            // A simple way is to add a link to itself
                            links = new Link[1];
                            links[0] = new Link(pageurl, "");
                        }
                        for (Link link : links) {
                            try {
                                if (!(page.getDistance() > maxDistance && pageurl.equals(link.getUrl()))) { // dont add self-links in a discovery front page
                                    if (Crawler.urlFilter(link.getUrl()) != null) { // if the url is a valid web page (not an image, etc)
View Full Code Here


                links = new Link[children.length];
                for (int i=0; i<children.length; i++) {
                    String link = SimWeb.pageToUrl(children[i]);
// System.out.println("     outlink["+i+"]="+link);
                    String anchor = TestUtils.randomText(2,5);
                    links[i] = new Link(link, anchor);
                }
                text = TestUtils.randomText(5,50);
                title = TestUtils.randomText(2,5);
                content = text.getBytes();
                header.put("length",String.valueOf(content.length));
View Full Code Here

                this.title = out.getTitle();
                List<Pair<String,String>> ol = out.getLinks();
                links = new Link[ol.size()];
                int i = 0;
                for (Pair<String,String> lnk : ol) {
                    links[i++] = new Link(lnk.first(), lnk.last());
                }
                checkTextChanges();
            } catch (Exception e) {
                logger.error("Parsing content",e);
            }
View Full Code Here

TOP

Related Classes of com.flaptor.hounder.crawler.pagedb.Link

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.