Package bixo.urls

Examples of bixo.urls.SimpleUrlValidator


    public void importUrls(boolean debug) throws Exception {


        try {
            Tap urlSource = _platform.makeTap(_platform.makeTextScheme(), _inputFilePath);
            Pipe importPipe = new Each("url importer", new Fields("line"), new CreateUrlFromTextFunction(new SimpleUrlNormalizer(), new SimpleUrlValidator()));

            Tap urlSink = _platform.makeTap(_platform.makeBinaryScheme(CrawlDbDatum.FIELDS), _destDirPath, SinkMode.REPLACE);

            FlowConnector flowConnector = _platform.makeFlowConnector();
            Flow flow = flowConnector.connect(urlSource, urlSink, importPipe);
View Full Code Here


public class SimpleUrlValidatorTest {

    private SimpleUrlValidator _validator;
    @Before
    public void setup() {
       _validator = new SimpleUrlValidator();
    }
View Full Code Here

                WritableSequenceFile writableSeqScheme = new WritableSequenceFile(new Fields(CrawlConfig.WRITABLE_SEQ_FILE_KEY_FN, CrawlConfig.WRITABLE_SEQ_FILE_VALUE_FN), Text.class, Text.class);
                writableSeqFileSink = platform.makeTap(writableSeqScheme, writableSeqFileDataPath, SinkMode.REPLACE);
            }
       
        Pipe urlFromOutlinksPipe = new Pipe("url from outlinks", parsePipe.getTailPipe());
        urlFromOutlinksPipe = new Each(urlFromOutlinksPipe, new CreateUrlDatumFromOutlinksFunction(new SimpleUrlNormalizer(), new SimpleUrlValidator()));
        if (urlFilter != null) {
            urlFromOutlinksPipe = new Each(urlFromOutlinksPipe, new UrlFilter(urlFilter));
        }
       
        urlFromOutlinksPipe = TupleLogger.makePipe(urlFromOutlinksPipe, true);
View Full Code Here

    @SuppressWarnings("rawtypes")
    @Override
    public void prepare(FlowProcess process, OperationCall<NullContext> operationCall) {
        LOGGER.info("Starting creation of outlink URLs");
        _normalizer = new SimpleUrlNormalizer();
        _validator = new SimpleUrlValidator();
    }
View Full Code Here

TOP

Related Classes of bixo.urls.SimpleUrlValidator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.