// http://xxxx/index.php?main=search&azmethod=post_basic:SearchString1=%s&SearchString=&search=Search
ResourceDownloaderFactory rdf = StaticUtilities.getResourceDownloaderFactory();
URL initial_url;
ResourceDownloader initial_url_rd;
int post_pos = searchURL.indexOf( "azmethod=" );
if ( post_pos > 0 ){
String post_params = searchURL.substring( post_pos+9 );
searchURL = searchURL.substring( 0, post_pos-1 );
debugLog( "search_url: " + searchURL + ", post=" + post_params );
initial_url = new URL(searchURL);
int sep = post_params.indexOf( ':' );
String type = post_params.substring( 0, sep );
if ( !type.equals( "post_basic" )){
throw( new SearchException( "Only basic type supported" ));
}
post_params = post_params.substring( sep+1 );
// already URL encoded
initial_url_rd = rdf.create( initial_url, post_params );
initial_url_rd.setProperty( "URL_Content-Type", "application/x-www-form-urlencoded" );
}else{
debugLog( "search_url: " + searchURL );
initial_url = new URL(searchURL);
initial_url_rd = rdf.create( initial_url );
}
setHeaders( initial_url_rd, headers );
if ( needsAuth && local_cookies != null ){
initial_url_rd.setProperty( "URL_Cookie", local_cookies );
}
if ( only_if_modified ){
String last_modified = getLocalString( LD_LAST_MODIFIED );
String etag = getLocalString( LD_ETAG );
if ( last_modified != null ){
initial_url_rd.setProperty( "URL_If-Modified-Since", last_modified );
}
if ( etag != null ){
initial_url_rd.setProperty( "URL_If-None-Match", etag );
}
}
InputStream is;
String content_charset = "UTF-8";
ResourceDownloader mr_rd = null;
if ( initial_url.getProtocol().equalsIgnoreCase( "file" )){
// handle file://c:/ - map to file:/c:/
String str = initial_url.toExternalForm();
if ( initial_url.getAuthority() != null ){
str = str.replaceFirst( "://", ":/" );
}
int pos = str.indexOf( '?' );
if ( pos != -1 ){
str = str.substring( 0, pos );
}
is = new FileInputStream( new File( new URL( str ).toURI()));
}else{
mr_rd = rdf.getMetaRefreshDownloader( initial_url_rd );
try{
is = mr_rd.download();
}catch( ResourceDownloaderException e ){
Long response = (Long)mr_rd.getProperty( "URL_HTTP_Response" );
if ( response != null && response.longValue() == 304 ){
// not modified
return( new pageDetails( initial_url, initial_url, "" ));
}else{
throw( e );
}
}
if ( needsAuth ){
List cookies_list = (List)mr_rd.getProperty( "URL_Set-Cookie" );
List cookies_set = new ArrayList();
if ( cookies_list != null ){
for (int i=0;i<cookies_list.size();i++){
String[] cookies = ((String)cookies_list.get(i)).split(";");
for (int j=0;j<cookies.length;j++){
String cookie = cookies[j].trim();
if ( cookie.indexOf('=') != -1 ){
cookies_set.add( cookie );
}
}
}
}
// well, not much we can do with the cookies anyway as in general the ones
// set are the ones missing/expired, not the existing ones. That is, we can't
// deduce anything from the fact that a required cookie is not 'set' here
// the most we could do is catch a server that explicitly deleted invalid
// cookies by expiring it, but I doubt this is a common practice.
// Also note the complexity of cookie syntax
// Set-Cookie: old standard using expires=, new using MaxAge
// Set-Cookie2:
// Maybe use http://jcookie.sourceforge.net/ if needed
}
if ( only_if_modified ){
String last_modified = extractProperty( mr_rd.getProperty( "URL_Last-Modified" ));
String etag = extractProperty( mr_rd.getProperty( "URL_ETag" ));
if ( last_modified != null ){
setLocalString( LD_LAST_MODIFIED, last_modified );
}
if ( etag != null ){
setLocalString( LD_ETAG, etag );
}
}
List cts = (List)mr_rd.getProperty( "URL_Content-Type" );
if ( cts != null && cts.size() > 0 ){
String content_type = (String)cts.get(0);
int pos = content_type.toLowerCase().indexOf( "charset" );
if ( pos != -1 ){
content_type = content_type.substring( pos+1 );
pos = content_type.indexOf('=');
if ( pos != -1 ){
content_type = content_type.substring( pos+1 ).trim();
pos = content_type.indexOf(';');
if ( pos != -1 ){
content_type = content_type.substring(0,pos).trim();
}
try{
if ( Charset.isSupported( content_type )){
debugLog( "charset: " + content_type );
content_charset = content_type;
}
}catch( Throwable e ){
try{
// handle lowercase 'utf-8' for example
content_type = content_type.toUpperCase();
if ( Charset.isSupported( content_type )){
debugLog( "charset: " + content_type );
content_charset = content_type;
}
}catch( Throwable f ){
log( "Content type '" + content_type + "' not supported", f );
}
}
}
}
}
}
ByteArrayOutputStream baos = new ByteArrayOutputStream(8192);
byte[] buffer = new byte[8192];
while( true ){
int len = is.read( buffer );
if ( len <= 0 ){
break;
}
baos.write( buffer, 0, len );
}
byte[] data = baos.toByteArray();
if ( vuze_file ){
try{
VuzeFileHandler vfh = VuzeFileHandler.getSingleton();
VuzeFile vf = vfh.loadVuzeFile( data );
vfh.handleFiles( new VuzeFile[]{ vf }, VuzeFileComponent.COMP_TYPE_NONE );
}catch( Throwable e ){
Debug.out( e );
}
return( new pageDetails( initial_url, initial_url, null ));
}
String page = null;
String content = new String( data, 0, Math.min( data.length, 2048 ), content_charset );
String lc_content = content.toLowerCase();
{
// first look for xml charset
// e.g. <?xml version="1.0" encoding="windows-1251" ?>
int pos1 = lc_content.indexOf( "<?xml" );
if ( pos1 != -1 ){
int pos2 = lc_content.indexOf( "?>" );
if ( pos2 != -1 ){
int pos3 = lc_content.indexOf( "encoding", pos1 );
if ( pos3 != -1 ){
pos3 = lc_content.indexOf( "\"", pos3 );
}
if ( pos3 > pos1 && pos3 < pos2 ){
pos3++;
int pos4 = lc_content.indexOf( "\"", pos3 );
if ( pos4 > pos3 && pos4 < pos2 ){
String encoding = content.substring( pos3, pos4 ).trim();
try{
if ( Charset.isSupported( encoding )){
debugLog( "charset from xml tag: " + encoding );
content_charset = encoding;
// some feeds have crap at the start which makes pos2 mismatch for the above '?' - adjust if necessary
int data_start = pos2;
int max_skip = 64;
while( data[data_start] != '?' && max_skip-- > 0 ){
data_start++;
}
page = content.substring( 0, pos3 ) + "utf-8" + content.substring( pos4, pos2 ) + new String( data, data_start, data.length - data_start, content_charset );
}
}catch( Throwable e ){
log( "Content type '" + encoding + "' not supported", e );
}
}
}
}
}
}
if ( page == null ){
// next look for http-equiv charset
// e.g. <meta http-equiv="Content-Type" content="text/html; charset=windows-1251" />
int pos = 0;
while( true ){
int pos1 = lc_content.indexOf( "http-equiv", pos );
if ( pos1 != -1 ){
int pos2 = lc_content.indexOf( ">", pos1 );
if ( pos2 != -1 ){
int pos3 = lc_content.indexOf( "charset", pos1 );
if ( pos3 != -1 && pos3 < pos2 ){
pos3 = lc_content.indexOf( "=", pos3 );
if ( pos3 != -1 ){
pos3++;
int pos4 = lc_content.indexOf( "\"", pos3 );
if ( pos4 != -1 ){
int pos5 = lc_content.indexOf( ";", pos3 );
if ( pos5 != -1 && pos5 < pos4 ){
pos4 = pos5;
}
String encoding = content.substring( pos3, pos4 ).trim();
try{
if ( Charset.isSupported( encoding )){
debugLog( "charset from http-equiv : " + encoding );
content_charset = encoding;
// some feeds have crap at the start which makes pos2 mismatch for the above '?' - adjust if necessary
int data_start = pos2;
int max_skip = 64;
while( data[data_start] != '?' && max_skip-- > 0 ){
data_start++;
}
page = content.substring( 0, pos3 ) + "utf-8" + content.substring( pos4, pos2 ) + new String( data, data_start, data.length - data_start, content_charset );
}
}catch( Throwable e ){
log( "Content type '" + encoding + "' not supported", e );
}
break;
}
}
}
pos = pos2;
}else{
break;
}
}else{
break;
}
}
}
if ( page == null ){
page = new String( data, content_charset );
}
debugLog( "page:" );
debugLog( page );
// List cookie = (List)url_rd.getProperty( "URL_Set-Cookie" );
try {
Matcher m = baseTagPattern.matcher(page);
if(m.find()) {
basePage = m.group(1);
debugLog( "base_page: " + basePage );
}
} catch(Exception e) {
//No BASE tag in the page
}
URL final_url = initial_url;
if ( mr_rd != null ){
URL x = (URL)mr_rd.getProperty( "URL_URL" );
if ( x != null ){
final_url = x;
}