/**
* Copyright 2010 Nube Technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
*/
package co.nubetech.hiho.job;
import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;
import org.apache.pig.PigServer;
import org.apache.pig.backend.executionengine.ExecException;
import org.hsqldb.Server;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import co.nubetech.hiho.common.HIHOException;
import co.nubetech.hiho.common.HihoTestCase;
public class TestDBQueryInputJobWithCluster extends HihoTestCase {
final static Logger logger = Logger
.getLogger(co.nubetech.hiho.job.TestDBQueryInputJobWithCluster.class);
private static final String DB_URL = "jdbc:hsqldb:hsql://localhost/URLAccess";
private static final String DRIVER_CLASS = "org.hsqldb.jdbcDriver";
private static Server server;
private static Connection connection;
@Test
public void testBasicTableImport() throws Exception{
DBQueryInputJob job = new DBQueryInputJob();
String[] args = new String[] {"-jdbcDriver", "org.hsqldb.jdbcDriver",
"-jdbcUrl", "jdbc:hsqldb:hsql://localhost/URLAccess",
// "-jdbcUsername", "",
// "-jdbcPassword", "",
"-outputPath", "testBasicTableImport",
"-outputStrategy", "delimited",
"-delimiter", "DELIM",
"-numberOfMappers", "2",
"-inputTableName", "Pageview",
"-inputOrderBy", "pageview"
};
int res = ToolRunner.run(createJobConf(), job, args);
assertEquals(0, res);
//lets verify the result now
FileSystem outputFS = getFileSystem();
//Path outputPath = getOutputDir();
Path outputPath = new Path(outputFS.getHomeDirectory(), "testBasicTableImport");
FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
assertTrue(outputFS.exists(outputPath));
List<String> expectedOutput = new ArrayList<String>();
expectedOutput.add("/aDELIM1000");
expectedOutput.add("/bDELIM2000");
expectedOutput.add("/cDELIM3000");
expectedOutput.add("/dDELIM4000");
expectedOutput.add("/eDELIM5000");
expectedOutput.add("/fDELIM6000");
expectedOutput.add("/gDELIM7000");
expectedOutput.add("/hDELIM8000");
expectedOutput.add("/iDELIM9000");
expectedOutput.add("/jDELIM10000");
int count = 0;
for (FileStatus fileStat: status) {
logger.debug("File status is " + fileStat.getPath());
FSDataInputStream in = outputFS.open(fileStat.getPath());
String line = null;
while ((line = in.readLine()) != null) {
logger.debug("Output is " + line);
assertTrue("Matched output " + line , expectedOutput.contains(line));
expectedOutput.remove(line);
count++;
}
in.close();
}
assertEquals(10, count);
}
//Still to complete
@Test
public void testBasicAvroTableImport() throws Exception{
DBQueryInputJob job = new DBQueryInputJob();
String[] args = new String[] {"-jdbcDriver", "org.hsqldb.jdbcDriver",
"-jdbcUrl", "jdbc:hsqldb:hsql://localhost/URLAccess",
"-outputPath", "testQueryBasedImport",
"-inputQuery", "select url,pageview,commentCount from Pageview, PageComment where Pageview.url = PageComment.url",
"-inputBoundingQuery", "select min(commentCount), max(commentCount) from PageComment",
"-outputStrategy", "AVRO",
"-delimiter", "DELIM",
"-numberOfMappers", "2",
"-inputOrderBy", "Pageview.pageview"
};
int res = ToolRunner.run(createJobConf(), job, args);
assertEquals(0, res);
//lets verify the result now
FileSystem outputFS = getFileSystem();
//Path outputPath = getOutputDir();
Path outputPath = new Path(outputFS.getHomeDirectory(), "testBasicTableImport");
FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
assertTrue(outputFS.exists(outputPath));
/* List<String> expectedOutput = new ArrayList<String>();
expectedOutput.add("/aDELIM1000");
expectedOutput.add("/bDELIM2000");
expectedOutput.add("/cDELIM3000");
expectedOutput.add("/dDELIM4000");
expectedOutput.add("/eDELIM5000");
expectedOutput.add("/fDELIM6000");
expectedOutput.add("/gDELIM7000");
expectedOutput.add("/hDELIM8000");
expectedOutput.add("/iDELIM9000");
expectedOutput.add("/jDELIM10000");
int count = 0;
for (FileStatus fileStat: status) {
logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
FSDataInputStream in = outputFS.open(fileStat.getPath());
String line = null;
while ((line = in.readLine()) != null) {
logger.debug("Output is " + line);
assertTrue("Matched output " + line , expectedOutput.contains(line));
expectedOutput.remove(line);
count++;
}
in.close();
}
assertEquals(10, count); */
}
@Test
public void testQueryBasedImport() throws Exception{
DBQueryInputJob job = new DBQueryInputJob();
String[] args = new String[] {"-jdbcDriver", "org.hsqldb.jdbcDriver",
"-jdbcUrl", "jdbc:hsqldb:hsql://localhost/URLAccess",
"-outputPath", "testQueryBasedImport",
"-inputQuery", "select url,pageview,commentCount from Pageview, PageComment where Pageview.url = PageComment.url",
"-inputBoundingQuery", "select min(commentCount), max(commentCount) from PageComment",
"-outputStrategy", "delimited",
"-delimiter", "DELIM",
"-numberOfMappers", "2",
"-inputOrderBy", "Pageview.pageview"
};
int res = ToolRunner.run(createJobConf(), job, args);
assertEquals(0, res);
//lets verify the result now
FileSystem outputFS = getFileSystem();
Path outputPath = new Path(outputFS.getHomeDirectory(), "testQueryBasedImport");
FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
assertTrue(outputFS.exists(outputPath));
List<String> expectedOutput = new ArrayList<String>();
expectedOutput.add("/aDELIM1000DELIM10");
expectedOutput.add("/bDELIM2000DELIM10");
expectedOutput.add("/cDELIM3000DELIM10");
expectedOutput.add("/dDELIM4000DELIM10");
expectedOutput.add("/eDELIM5000DELIM10");
expectedOutput.add("/fDELIM6000DELIM10");
expectedOutput.add("/gDELIM7000DELIM10");
expectedOutput.add("/hDELIM8000DELIM10");
expectedOutput.add("/iDELIM9000DELIM10");
expectedOutput.add("/jDELIM10000DELIM10");
int count = 0;
for (FileStatus fileStat: status) {
logger.debug("File status is " + fileStat.getPath());
FSDataInputStream in = outputFS.open(fileStat.getPath());
String line = null;
while ((line = in.readLine()) != null) {
logger.debug("Output is " + line);
assertTrue("Matched output " + line , expectedOutput.contains(line));
expectedOutput.remove(line);
count++;
}
in.close();
}
assertEquals(10, count);
}
@Test
public void testGeneratePigScript() throws Exception, HIHOException {
DBQueryInputJob job = new DBQueryInputJob();
String[] args = new String[] {
"-jdbcDriver", "org.hsqldb.jdbcDriver",
"-jdbcUrl", "jdbc:hsqldb:hsql://localhost/URLAccess",
"-inputLoadTo", "pig",
"-inputLoadToPath", "/tmp",
"-outputPath", "output",
"-outputStrategy", "delimited",
"-delimiter", ",",
"-numberOfMappers", "2",
"-inputTableName", "Pageview" ,
"-inputOrderBy", "pageview" };
int res = ToolRunner.run(createJobConf(), job, args);
assertEquals(0, res);
// lets verify the result now
File pigScript = new File("/tmp/pigScript.txt");
if(!(pigScript.exists())){
throw new HIHOException("Unable to generate Pig script");
}
logger.debug("Pig script output is " + pigScript.exists());
}
//still to complete.
@Test
public void testContentOfPigScript() throws ExecException, IOException {
PigServer pigServer = new PigServer("LOCAL");
pigServer.registerScript("/tmp/pigScript.txt");
//pigServer.registerQuery("A = LOAD 'mapreduce.jdbc.hiho.input.outputPath' USING PigStorage(',') AS (URL:chararray,PAGEVIEW:long);");
/*pigServer.dumpSchema("A") ;
String s;
InputStream fileWithStdOutContents = new DataInputStream( new BufferedInputStream( new FileInputStream(new File("stdout.redirected"))));
BufferedReader reader = new BufferedReader(new InputStreamReader(fileWithStdOutContents));
while ((s = reader.readLine()) != null) {
assertTrue(s.equals("a: {field1: int,field2: float,field3: chararray}") == true);
}
reader.close();*/
}
private static void startHsqldbServer() {
server = new Server();
server.setDatabasePath(0, System.getProperty("test.build.data", "/tmp")
+ "/URLAccess");
server.setDatabaseName(0, "URLAccess");
server.start();
}
private static void createConnection(String driverClassName, String url)
throws Exception {
Class.forName(driverClassName);
connection = DriverManager.getConnection(url);
connection.setAutoCommit(false);
}
@AfterClass
public static void shutdown() {
try {
connection.commit();
connection.close();
} catch (Throwable ex) {
logger.warn("Exception occurred while closing connection :"
+ ex);
} finally {
try {
if (server != null) {
server.shutdown();
}
} catch (Throwable ex) {
logger.warn("Exception occurred while shutting down HSQLDB :"
+ ex);
}
}
}
@BeforeClass
public static void initialize()
throws Exception {
startHsqldbServer();
createConnection(DRIVER_CLASS, DB_URL);
dropTables();
createTables();
populateAccess();
verify();
}
private static void dropTables() {
String dropAccess = "DROP TABLE Access";
String dropPageview = "DROP TABLE Pageview";
String dropPageComment="DROP Table PageComment";
Statement st = null;
try {
st = connection.createStatement();
st.executeUpdate(dropAccess);
st.executeUpdate(dropPageview);
st.executeUpdate(dropPageComment);
connection.commit();
st.close();
} catch (SQLException ex) {
try {
if (st != null) {
st.close();
}
} catch (Exception e) {
}
}
}
private static void createTables() throws SQLException {
String createAccess = "CREATE TABLE "
+ "Access(url VARCHAR(100) NOT NULL,"
+ " referrer VARCHAR(100)," + " time BIGINT NOT NULL, "
+ " PRIMARY KEY (url, time))";
String createPageview = "CREATE TABLE "
+ "Pageview(url VARCHAR(100) NOT NULL,"
+ " pageview BIGINT NOT NULL, " + " PRIMARY KEY (url))";
String createPageComment = "CREATE TABLE "
+ "PageComment(url VARCHAR(100) NOT NULL,"
+ " commentCount BIGINT NOT NULL, " + " PRIMARY KEY (url))";
Statement st = connection.createStatement();
try {
st.executeUpdate(createAccess);
st.executeUpdate(createPageview);
st.executeUpdate(createPageComment);
connection.commit();
} finally {
st.close();
}
}
/**
* Populates the Access table with generated records.
*/
private static void populateAccess() throws SQLException {
PreparedStatement statement = null;
try {
statement = connection
.prepareStatement("INSERT INTO Access(url, referrer, time)"
+ " VALUES (?, ?, ?)");
Random random = new Random();
int time = random.nextInt(50) + 50;
final int PROBABILITY_PRECISION = 100; // 1 / 100
final int NEW_PAGE_PROBABILITY = 15; // 15 / 100
// Pages in the site :
String[] pages = { "/a", "/b", "/c", "/d", "/e", "/f", "/g", "/h",
"/i", "/j" };
// linkMatrix[i] is the array of pages(indexes) that page_i links
// to.
int[][] linkMatrix = { { 1, 5, 7 }, { 0, 7, 4, 6, },
{ 0, 1, 7, 8 }, { 0, 2, 4, 6, 7, 9 }, { 0, 1 },
{ 0, 3, 5, 9 }, { 0 }, { 0, 1, 3 }, { 0, 2, 6 },
{ 0, 2, 6 } };
// a mini model of user browsing a la pagerank
int currentPage = random.nextInt(pages.length);
String referrer = null;
for (int i = 0; i < time; i++) {
statement.setString(1, pages[currentPage]);
statement.setString(2, referrer);
statement.setLong(3, i);
statement.execute();
int action = random.nextInt(PROBABILITY_PRECISION);
// go to a new page with probability
// NEW_PAGE_PROBABILITY / PROBABILITY_PRECISION
if (action < NEW_PAGE_PROBABILITY) {
currentPage = random.nextInt(pages.length); // a random page
referrer = null;
} else {
referrer = pages[currentPage];
action = random.nextInt(linkMatrix[currentPage].length);
currentPage = linkMatrix[currentPage][action];
}
}
PreparedStatement statement1 = connection
.prepareStatement("INSERT INTO Pageview(url, pageview)"
+ " VALUES (?, ?)");
int i = 1;
for (String page: pages) {
statement1.setString(1, page);
statement1.setInt(2, 1000*i);
i++;
statement1.execute();
}
PreparedStatement statement2 = connection
.prepareStatement("INSERT INTO PageComment(url, commentCount)"
+ " VALUES (?, ?)");
int j = 1;
for (String page: pages) {
statement2.setString(1, page);
statement2.setInt(2, 10*j);
i++;
statement2.execute();
}
connection.commit();
} catch (SQLException ex) {
connection.rollback();
throw ex;
} finally {
if (statement != null) {
statement.close();
}
}
}
/** Verifies the results are correct */
private static boolean verify() throws SQLException {
// check total num pageview
String countAccessQuery = "SELECT COUNT(*) FROM Access";
String sumPageviewQuery = "SELECT SUM(pageview) FROM Pageview";
Statement st = null;
ResultSet rs = null;
try {
st = connection.createStatement();
rs = st.executeQuery(countAccessQuery);
rs.next();
long totalPageview = rs.getLong(1);
rs = st.executeQuery(sumPageviewQuery);
rs.next();
long sumPageview = rs.getLong(1);
logger.info("totalPageview=" + totalPageview);
logger.info("sumPageview=" + sumPageview);
return totalPageview == sumPageview && totalPageview != 0;
} finally {
if (st != null)
st.close();
if (rs != null)
rs.close();
}
}
}