conf.set(JetS3tARCSource.P_AWS_ACCESS_KEY_ID, conf.get("fs.s3n.awsAccessKeyId"));
conf.set(JetS3tARCSource.P_AWS_SECRET_ACCESS_KEY, conf.get("fs.s3n.awsSecretAccessKey"));
conf.set(JetS3tARCSource.P_BUCKET_NAME, "commoncrawl-crawl-002");
ARCInputFormat.setARCSourceClass(conf, JetS3tARCSource.class);
ARCInputFormat inputFormat = new ARCInputFormat();
inputFormat.configure(conf);
conf.setInputFormat(ARCInputFormat.class);
conf.setMapperClass(FilterTextHtmlMapper.class);
conf.setMaxMapTaskFailuresPercent(100);