Package org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat

Examples of org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit


 
  @Override
  public void setup(Context context) {
    Configuration conf = context.getConfiguration();
   
    WikipediaInputSplit wiSplit = (WikipediaInputSplit)context.getInputSplit();
    myGroup = wiSplit.getPartition();
    numGroups = WikipediaConfiguration.getNumGroups(conf);
   
    FileSplit split = wiSplit.getFileSplit();
    String fileName = split.getPath().getName();
    Matcher matcher = languagePattern.matcher(fileName);
    if (matcher.matches()) {
      language = matcher.group(1).replace('_', '-').toLowerCase();
    } else {
View Full Code Here


    tablename = new Text(WikipediaConfiguration.getTableName(conf));
    indexTableName = new Text(tablename + "Index");
    reverseIndexTableName = new Text(tablename + "ReverseIndex");
    metadataTableName = new Text(tablename + "Metadata");
   
    WikipediaInputSplit wiSplit = (WikipediaInputSplit)context.getInputSplit();
    myGroup = wiSplit.getPartition();
    numGroups = WikipediaConfiguration.getNumGroups(conf);
   
    FileSplit split = wiSplit.getFileSplit();
    String fileName = split.getPath().getName();
    Matcher matcher = languagePattern.matcher(fileName);
    if (matcher.matches()) {
      language = matcher.group(1).replace('_', '-').toLowerCase();
    } else {
View Full Code Here

  public void testIncorrectArgs() throws Exception {
    File f = createFile(xml1);
   
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
    AggregatingRecordReader reader = new AggregatingRecordReader();
    try {
      // Clear the values for BEGIN and STOP TOKEN
      conf.set(AggregatingRecordReader.START_TOKEN, null);
      conf.set(AggregatingRecordReader.END_TOKEN, null);
View Full Code Here

  public void testCorrectXML() throws Exception {
    File f = createFile(xml1);
   
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
   
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue(reader.nextKeyValue());
View Full Code Here

  public void testPartialXML() throws Exception {
    File f = createFile(xml2);
   
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
   
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue(reader.nextKeyValue());
View Full Code Here

    conf.set(AggregatingRecordReader.RETURN_PARTIAL_MATCHES, Boolean.toString(false));
    File f = createFile(xml3);
   
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
   
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue(reader.nextKeyValue());
View Full Code Here

  public void testPartialXML2() throws Exception {
    File f = createFile(xml3);
   
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
   
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue(reader.nextKeyValue());
View Full Code Here

  public void testLineSplitting() throws Exception {
    File f = createFile(xml4);
   
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
   
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue(reader.nextKeyValue());
View Full Code Here

  @Test
  public void testNoEndTokenHandling() throws Exception {
    File f = createFile(xml5);
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
   
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue("Not enough records returned.", reader.nextKeyValue());
View Full Code Here

    Assert.assertNotNull(url);
    File data = new File(url.toURI());
    Path tmpFile = new Path(data.getAbsolutePath());
   
    // Setup the Mapper
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null),0);
    AggregatingRecordReader rr = new AggregatingRecordReader();
    Path ocPath = new Path(tmpFile, "oc");
    OutputCommitter oc = new FileOutputCommitter(ocPath, context);
    fs.deleteOnExit(ocPath);
    StandaloneStatusReporter sr = new StandaloneStatusReporter();
View Full Code Here

TOP

Related Classes of org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.