Examples of openForRead()


Examples of cascading.tap.MultiSourceTap.openForRead()

    GlobHfs source1 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r]}.txt" );
    GlobHfs source2 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{owe?}.txt" );

    MultiSourceTap source = new MultiSourceTap( source1, source2 );

    validateLength( source.openForRead( getPlatform().getFlowProcess() ), 10 );

    GlobHfs sourceMulti = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r],owe?}.txt" );

    source = new MultiSourceTap( sourceMulti );
View Full Code Here

Examples of cascading.tap.MultiSourceTap.openForRead()

    GlobHfs sourceMulti = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r],owe?}.txt" );

    source = new MultiSourceTap( sourceMulti );

    validateLength( source.openForRead( getPlatform().getFlowProcess() ), 10, null );
    }

  @Test
  public void testCommitResource() throws Exception
    {
View Full Code Here

Examples of cascading.tap.Tap.openForRead()

  @SuppressWarnings({ "unchecked", "rawtypes" })
    private static void processStatus(BasePlatform platform, BasePath curDirPath) throws Exception {
        BasePath statusPath = platform.makePath(curDirPath, CrawlConfig.STATUS_SUBDIR_NAME);
        Tap statusTap = platform.makeTap(platform.makeTextScheme(), statusPath);
       
        TupleEntryIterator iter = statusTap.openForRead(platform.makeFlowProcess());
       
        LOGGER.info("Analyzing: " +  CrawlConfig.STATUS_SUBDIR_NAME);
        UrlStatus[] statusValues = UrlStatus.values();
        int[] statusCounts = new int[statusValues.length];
        int totalEntries = 0;
View Full Code Here

Examples of cascading.tap.Tap.openForRead()

    private static void processCrawlDb(BixoPlatform platform, BasePath latestCrawlDirPath, boolean exportDb) throws Exception {
        TupleEntryIterator iter;
        int totalEntries;
        BasePath crawlDbPath = platform.makePath(latestCrawlDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);
        Tap crawldbTap = platform.makeTap(platform.makeBinaryScheme(CrawlDbDatum.FIELDS), crawlDbPath);
        iter = crawldbTap.openForRead(platform.makeFlowProcess());
        totalEntries = 0;
        int fetchedUrls = 0;
        int unfetchedUrls = 0;
        LOGGER.info("Analyzing: " +  CrawlConfig.CRAWLDB_SUBDIR_NAME);
View Full Code Here

Examples of cascading.tap.Tap.openForRead()

        flow.complete();
       
        // verify that the resulting pipe has the latest tuple
       
        Tap testSink = platform.makeTap(platform.makeBinaryScheme(UrlDatum.FIELDS), resultsPath);
        TupleEntryIterator reader = testSink.openForRead(platform.makeFlowProcess());
        int count = 0;
        long latest = 0;
        while (reader.hasNext()) {
            TupleEntry next = reader.next();
            UrlDatum datum = new UrlDatum(next);
View Full Code Here

Examples of cascading.tap.Tap.openForRead()

        Flow flow = flowConnector.connect(in, FetchPipe.makeSinkMap(status, content), fetchPipe);
        flow.complete();
       
        // Test for all valid fetches.
        Tap validate = platform.makeTap(platform.makeBinaryScheme(StatusDatum.FIELDS), statusPath);
        TupleEntryIterator tupleEntryIterator = validate.openForRead(platform.makeFlowProcess());
        while (tupleEntryIterator.hasNext()) {
            TupleEntry entry = tupleEntryIterator.next();
            StatusDatum sd = new StatusDatum(entry);
            if (sd.getStatus() != UrlStatus.FETCHED) {
                LOGGER.error(String.format("Fetched failed! Status is %s for %s", sd.getStatus(), sd.getUrl()));
View Full Code Here

Examples of cascading.tap.Tap.openForRead()

        Flow flow = flowConnector.connect(in, FetchPipe.makeSinkMap(status, content), fetchPipe);
        flow.complete();
       
        // Test for 10 good fetches.
        Tap validate = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentPath);
        TupleEntryIterator tupleEntryIterator = validate.openForRead(platform.makeFlowProcess());
        int fetchedPages = 0;
        while (tupleEntryIterator.hasNext()) {
            TupleEntry entry = tupleEntryIterator.next();
            new FetchedDatum(entry);
            fetchedPages += 1;
View Full Code Here

Examples of cascading.tap.Tap.openForRead()

            // Now we should have an output/1-<timestamp>/ directory, where the
            // /urls dir has 11 entries with
            // one being previously crawled, and the other 10 being pending.

            Tap crawldbTap = platform.makeTap(platform.makeBinaryScheme(CrawlDbDatum.FIELDS), crawlDbPath);
            TupleEntryIterator iter = crawldbTap.openForRead(platform.makeFlowProcess());

            int numFetched = 0;
            int numPending = 0;
            while (iter.hasNext()) {
                CrawlDbDatum datum = new CrawlDbDatum(iter.next());
View Full Code Here

Examples of cascading.tap.Tap.openForRead()

            flow.complete();
            // Update crawldb path
            crawlDbPath = platform.makePath(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);

            crawldbTap = platform.makeTap(platform.makeBinaryScheme(CrawlDbDatum.FIELDS), crawlDbPath);
            iter = crawldbTap.openForRead(platform.makeFlowProcess());

            numFetched = 0;
            numPending = 0;
            int numDepth0 = 0;
            int numDepth1 = 0;
View Full Code Here

Examples of cascading.tap.Tap.openForRead()

            sourceTap = platform.makeTap(platform.makeTextScheme(), dataPath);
        } else {
            sourceTap = platform.makeTap(platform.makeBinaryScheme(fields), dataPath);
        }
       
        TupleEntryIterator tupleEntryIterator = sourceTap.openForRead(platform.makeFlowProcess());
        int numEntries = 0;
        while (tupleEntryIterator.hasNext()) {
          tupleEntryIterator.next();
          numEntries++;
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.