}
i++;
}
// Execute the query
IDynamicResultSet result;
String queryText = sb.toString();
long startTime = System.currentTimeMillis();
// Get a dynamic resultset. Contract for dynamic resultset is that if
// one is returned, it MUST be closed, or a connection will leak.
try
{
result = connection.executeUncachedQuery(queryText,paramList,-1);
}
catch (ManifoldCFException e)
{
// If failure, record the failure.
activities.recordActivity(new Long(startTime), ACTIVITY_EXTERNAL_QUERY, null,
createQueryString(queryText,paramList), "ERROR", e.getMessage(), null);
throw e;
}
try
{
// If success, record that too.
activities.recordActivity(new Long(startTime), ACTIVITY_EXTERNAL_QUERY, null,
createQueryString(queryText,paramList), "OK", null, null);
while (true)
{
IDynamicResultRow row = result.getNextRow();
if (row == null)
break;
try
{
Object o = row.getValue(JDBCConstants.idReturnColumnName);
if (o == null)
throw new ManifoldCFException("Bad document query; doesn't return $(IDCOLUMN) column. Try using quotes around $(IDCOLUMN) variable, e.g. \"$(IDCOLUMN)\".");
String id = JDBCConnection.readAsString(o);
String version = (String)map.get(id);
if (version != null)
{
// This document was marked as "not scan only", so we expect to find it.
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("JDBC: Document data result found for '"+id+"'");
o = row.getValue(JDBCConstants.urlReturnColumnName);
if (o != null)
{
// This is not right - url can apparently be a BinaryInput
String url = JDBCConnection.readAsString(o);
boolean validURL;
try
{
// Check to be sure url is valid
new java.net.URI(url);
validURL = true;
}
catch (java.net.URISyntaxException e)
{
validURL = false;
}
if (validURL)
{
// Process the document itself
Object contents = row.getValue(JDBCConstants.dataReturnColumnName);
// Null data is allowed; we just ignore these
if (contents != null)
{
// We will ingest something, so remove this id from the map in order that we know what we still
// need to delete when all done.
map.remove(id);
String contentType;
o = row.getValue(JDBCConstants.contentTypeReturnColumnName);
if (o != null)
contentType = JDBCConnection.readAsString(o);
else
contentType = null;
if (contentType == null || activities.checkMimeTypeIndexable(contentType))
{
if (contents instanceof BinaryInput)
{
// An ingestion will take place for this document.
RepositoryDocument rd = new RepositoryDocument();
// Default content type is application/octet-stream for binary data
if (contentType == null)
rd.setMimeType("application/octet-stream");
else
rd.setMimeType(contentType);
applyAccessTokens(rd,version,spec);
applyMetadata(rd,row);
BinaryInput bi = (BinaryInput)contents;
try
{
// Read the stream
InputStream is = bi.getStream();
try
{
rd.setBinary(is,bi.getLength());
activities.ingestDocument(id, version, url, rd);
}
finally
{
is.close();
}
}
catch (java.net.SocketTimeoutException e)
{
throw new ManifoldCFException("Socket timeout reading database data: "+e.getMessage(),e);
}
catch (InterruptedIOException e)
{
throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
}
catch (IOException e)
{
throw new ManifoldCFException("Error reading database data: "+e.getMessage(),e);
}
}
else if (contents instanceof CharacterInput)
{
// An ingestion will take place for this document.
RepositoryDocument rd = new RepositoryDocument();
// Default content type is application/octet-stream for binary data
if (contentType == null)
rd.setMimeType("text/plain; charset=utf-8");
else
rd.setMimeType(contentType);
applyAccessTokens(rd,version,spec);
applyMetadata(rd,row);
CharacterInput ci = (CharacterInput)contents;
try
{
// Read the stream
InputStream is = ci.getUtf8Stream();
try
{
rd.setBinary(is,ci.getUtf8StreamLength());
activities.ingestDocument(id, version, url, rd);
}
finally
{
is.close();
}
}
catch (java.net.SocketTimeoutException e)
{
throw new ManifoldCFException("Socket timeout reading database data: "+e.getMessage(),e);
}
catch (InterruptedIOException e)
{
throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
}
catch (IOException e)
{
throw new ManifoldCFException("Error reading database data: "+e.getMessage(),e);
}
}
else
{
// Turn it into a string, and then into a stream
String value = contents.toString();
try
{
byte[] bytes = value.getBytes("utf-8");
RepositoryDocument rd = new RepositoryDocument();
// Default content type is text/plain for character data
if (contentType == null)
rd.setMimeType("text/plain");
else
rd.setMimeType(contentType);
applyAccessTokens(rd,version,spec);
applyMetadata(rd,row);
InputStream is = new ByteArrayInputStream(bytes);
try
{
rd.setBinary(is,bytes.length);
activities.ingestDocument(id, version, url, rd);
}
finally
{
is.close();
}
}
catch (InterruptedIOException e)
{
throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
}
catch (IOException e)
{
throw new ManifoldCFException("Error reading database data: "+e.getMessage(),e);
}
}
}
else
Logging.connectors.warn("JDBC: Document '"+id+"' excluded because of mime type - skipping");
}
else
Logging.connectors.warn("JDBC: Document '"+id+"' seems to have null data - skipping");
}
else
Logging.connectors.warn("JDBC: Document '"+id+"' has an illegal url: '"+url+"' - skipping");
}
else
Logging.connectors.warn("JDBC: Document '"+id+"' has a null url - skipping");
}
}
finally
{
row.close();
}
}
// Now, go through the original id's, and see which ones are still in the map. These
// did not appear in the result and are presumed to be gone from the database, and thus must be deleted.
i = 0;
while (i < documentIdentifiers.length)
{
if (!scanOnly[i])
{
String documentIdentifier = documentIdentifiers[i];
if (map.get(documentIdentifier) != null)
{
// This means we did not see it (or data for it) in the result set. Delete it!
activities.deleteDocument(documentIdentifier,versions[i]);
}
}
i++;
}
}
finally
{
result.close();
}
}