Package org.apache.flink.api.common.io.statistics

Examples of org.apache.flink.api.common.io.statistics.BaseStatistics


    try {
      final DummyFileInputFormat format = new DummyFileInputFormat();
      format.setFilePath("file:///some/none/existing/directory/");
      format.configure(new Configuration());
     
      BaseStatistics stats = format.getStatistics(null);
      Assert.assertNull("The file statistics should be null.", stats);
    } catch (Exception ex) {
      ex.printStackTrace();
      Assert.fail(ex.getMessage());
    }
View Full Code Here


     
      final DummyFileInputFormat format = new DummyFileInputFormat();
      format.setFilePath(tempFile);
      format.configure(new Configuration());
     
      BaseStatistics stats = format.getStatistics(null);
      Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize());
    } catch (Exception ex) {
      ex.printStackTrace();
      Assert.fail(ex.getMessage());
    }
  }
View Full Code Here

     
      final DummyFileInputFormat format = new DummyFileInputFormat();
      format.setFilePath(tempDir);
      format.configure(new Configuration());
     
      BaseStatistics stats = format.getStatistics(null);
      Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, stats.getTotalInputSize());
    } catch (Exception ex) {
      ex.printStackTrace();
      Assert.fail(ex.getMessage());
    }
  }
View Full Code Here

      format = new DummyFileInputFormat();
      format.setFilePath(tempFile);
      format.configure(new Configuration());
     
      FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
      BaseStatistics latest = format.getStatistics(fakeStats);
      Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
     
      // insert fake stats with the expired modification time. the call should return new accurate stats
      format = new DummyFileInputFormat();
      format.setFilePath(tempFile);
      format.configure(new Configuration());
     
      FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime()-1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
      BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
      Assert.assertEquals("The file size from the statistics is wrong.", SIZE, reGathered.getTotalInputSize());
     
    } catch (Exception ex) {
      ex.printStackTrace();
      Assert.fail(ex.getMessage());
    }
View Full Code Here

      format = new DummyFileInputFormat();
      format.setFilePath(tempDir);
      format.configure(new Configuration());
     
      FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
      BaseStatistics latest = format.getStatistics(fakeStats);
      Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
     
      // insert fake stats with the correct modification time. the call should return the fake stats
      format = new DummyFileInputFormat();
      format.setFilePath(tempDir);
      format.configure(new Configuration());
     
      FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime()-1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
      BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
      Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, reGathered.getTotalInputSize());
     
    } catch (Exception ex) {
      ex.printStackTrace();
      Assert.fail(ex.getMessage());
    }
View Full Code Here

      final Configuration conf = new Configuration();
     
      final TestDelimitedInputFormat format = new TestDelimitedInputFormat();
      format.setFilePath(tempFile);
      format.configure(conf);
      BaseStatistics stats = format.getStatistics(null);
     
      final int numLines = TEST_DATA_1_LINES;
      final float avgWidth = ((float) TEST_DATA1.length()) / TEST_DATA_1_LINES;
      Assert.assertTrue("Wrong record count.", stats.getNumberOfRecords() < numLines + 1 & stats.getNumberOfRecords() > numLines - 1);
      Assert.assertTrue("Wrong avg record size.", stats.getAverageRecordWidth() < avgWidth + 1 & stats.getAverageRecordWidth() > avgWidth - 1);
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
View Full Code Here

      final Configuration conf = new Configuration();
     
      final TestDelimitedInputFormat format = new TestDelimitedInputFormat();
      format.setFilePath(tempFile);
      format.configure(conf);
      BaseStatistics stats = format.getStatistics(null);
     
      final int maxNumLines = (int) Math.ceil(TOTAL_SIZE / ((double) Math.min(TEST_DATA_1_LINEWIDTH, TEST_DATA_2_LINEWIDTH)));
      final int minNumLines = (int) (TOTAL_SIZE / ((double) Math.max(TEST_DATA_1_LINEWIDTH, TEST_DATA_2_LINEWIDTH)));
      final float maxAvgWidth = ((float) (TOTAL_SIZE)) / minNumLines;
      final float minAvgWidth = ((float) (TOTAL_SIZE)) / maxNumLines;
     
      if (!(stats.getNumberOfRecords() <= maxNumLines  & stats.getNumberOfRecords() >= minNumLines)) {
        System.err.println("Records: " + stats.getNumberOfRecords() + " out of (" + minNumLines + ", " + maxNumLines + ").");
        Assert.fail("Wrong record count.");
      }
      if (!(stats.getAverageRecordWidth() <= maxAvgWidth & stats.getAverageRecordWidth() >= minAvgWidth)) {
        Assert.fail("Wrong avg record size.");
      }
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
View Full Code Here

      final TestDelimitedInputFormat format = new TestDelimitedInputFormat();
      format.setFilePath(tempFile);
      format.setDelimiter(DELIMITER);
      format.configure(conf);
     
      BaseStatistics stats = format.getStatistics(null);
      final int numLines = TEST_DATA_1_LINES;
      final float avgWidth = ((float) testData.length()) / TEST_DATA_1_LINES;
     
      Assert.assertTrue("Wrong record count.", stats.getNumberOfRecords() < numLines + 1 & stats.getNumberOfRecords() > numLines - 1);
      Assert.assertTrue("Wrong avg record size.", stats.getAverageRecordWidth() < avgWidth + 1 & stats.getAverageRecordWidth() > avgWidth - 1);
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
View Full Code Here

      final TestDelimitedInputFormat format = new TestDelimitedInputFormat();
      format.setFilePath("test://" + tempFile);
      format.configure(conf);
     
      TestFileSystem.resetStreamOpenCounter();
      BaseStatistics stats = format.getStatistics(null);
      Assert.assertEquals("Wrong number of samples taken.", DEFAULT_NUM_SAMPLES, TestFileSystem.getNumtimeStreamOpened());
     
      final TestDelimitedInputFormat format2 = new TestDelimitedInputFormat();
      format2.setFilePath("test://" + tempFile);
      format2.configure(conf);
     
      TestFileSystem.resetStreamOpenCounter();
      BaseStatistics stats2 = format2.getStatistics(stats);
      Assert.assertTrue("Using cached statistics should cicumvent sampling.", 0 == TestFileSystem.getNumtimeStreamOpened());
      Assert.assertTrue("Using cached statistics should cicumvent sampling.", stats == stats2);
     
    } catch (Exception e) {
      e.printStackTrace();
View Full Code Here

      }
      catch (Throwable t) {}
     
      // first of all, get the statistics from the cache
      final String statisticsKey = getPactContract().getStatisticsKey();
      final BaseStatistics cachedStatistics = statistics.getBaseStatistics(statisticsKey);
     
      BaseStatistics bs = null;
      try {
        bs = format.getStatistics(cachedStatistics);
      }
      catch (Throwable t) {
        if (PactCompiler.LOG.isWarnEnabled()) {
          PactCompiler.LOG.warn("Error obtaining statistics from input format: " + t.getMessage(), t);
        }
      }
     
      if (bs != null) {
        final long len = bs.getTotalInputSize();
        if (len == BaseStatistics.SIZE_UNKNOWN) {
          if (PactCompiler.LOG.isInfoEnabled()) {
            PactCompiler.LOG.info("Compiler could not determine the size of input '" + inFormatDescription + "'. Using default estimates.");
          }
        }
        else if (len >= 0) {
          this.estimatedOutputSize = len;
        }
       
        final long card = bs.getNumberOfRecords();
        if (card != BaseStatistics.NUM_RECORDS_UNKNOWN) {
          this.estimatedNumRecords = card;
        }
      }
    }
View Full Code Here

TOP

Related Classes of org.apache.flink.api.common.io.statistics.BaseStatistics

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.