package org.xmlcml.ckan;
import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.entity.mime.content.FileBody;
import org.apache.http.message.BasicNameValuePair;
import org.apache.log4j.Logger;
import org.junit.Ignore;
import org.junit.Test;
public class UploadDataTest {
private static final String TEST_TXT = "src/test/resources/org/xmlcml/ckan/test.txt";
private static final String TEST_XML = "src/test/resources/org/xmlcml/ckan/test.xml";
private static final String TEST_JSON = "src/test/resources/org/xmlcml/ckan/test.json";
private final static Logger LOG = Logger.getLogger(UploadDataTest.class);
@Test
public void testCreateUrlKey() {
UploadData uploadData = new UploadData();
File file = new File(TEST_XML); // good as any
uploadData.createUrlAndKey(file);
}
@Test
@Ignore
public void testExecPostRequestForFileUpload() throws Exception {
UploadData uploadData = new UploadData();
File file = new File(TEST_XML);
uploadData.execPostRequestForFileUpload(Client.getDataHubClient(), file);
String body = uploadData.getResponseBody(); // gives
/**
{"action": "http://ckannet-storage.commondatastorage.googleapis.com/",
"fields": [
{"name": "x-goog-meta-uploaded-by", "value": "was APIKEY"},
{"name": "acl", "value": "public-read"},
{"name": "success_action_redirect", "value": "http://datahub.io/storage/upload/success_empty?label=2013-12-22T13%3A24%3A12.853Z%2Ftest.xml"},
{"name": "policy", "value": "eyJleHBpcmF0aW9uIjogIjIwMTMtMTItMjNUMDk6MjQ6MThaIiwKImNvbmRpdGlvbnMiOiBbeyJ4LWdvb2ctbWV0YS11cGxvYWRlZC1ieSI6ICIzYmFlNDAxMy1kMDNlLTQyN2ItYWMyZS05ZTVkMDg0ZDBlYTkifSx7ImJ1Y2tldCI6ICJja2FubmV0LXN0b3JhZ2UifSx7ImtleSI6ICIyMDEzLTEyLTIyVDEzOjI0OjEyLjg1M1ovdGVzdC54bWwifSx7ImFjbCI6ICJwdWJsaWMtcmVhZCJ9LHsic3VjY2Vzc19hY3Rpb25fcmVkaXJlY3QiOiAiaHR0cDovL2RhdGFodWIuaW8vc3RvcmFnZS91cGxvYWQvc3VjY2Vzc19lbXB0eT9sYWJlbD0yMDEzLTEyLTIyVDEzJTNBMjQlM0ExMi44NTNaJTJGdGVzdC54bWwifSxbImNvbnRlbnQtbGVuZ3RoLXJhbmdlIiwgMCwgMTAwMDAwMDAwMDBdXX0="},
{"name": "GoogleAccessId", "value": "GOOGC6OU3AYPNY47B66M"},
{"name": "signature", "value": "VFYngO/4ZQRYt4YXnRGXitiJyNE="},
{"name": "key", "value": "2013-12-22T13:24:12.853Z/test.xml"}]}
*/
}
@Test
@Ignore
public void testExecFileUpload() throws Exception {
UploadData uploadData = new UploadData();
File file = new File(TEST_JSON);
uploadData.upload(file);
}
@Test
public void PHPExample() {
/**
// make a unique url for this file as per
* https://github.com/okfn/ckan/blob/118c2b62988956b5bc80d42a8a3e12964b3f9543/ckan/public/scripts/application.js#L790
$upload_key = date('c') // use ISO date format
. "/"
. str_replace(" ", "-", basename($file_path)); // replace spaces with dashes in from file name
// check unique url is actually unique before uploading this file using filestore auth api
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$ckan_url."/api/storage/auth/form/".$upload_key);
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Authorization:'.$api_key
,'X-CKAN-API-Key:'.$api_key));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$auth_ticket = json_decode(curl_exec ($ch),true);
curl_close ($ch);
print_r($auth_ticket);
// should get back {"action": "/storage/upload_handle", "fields": [{"name": "key", "value": "2013-11-05T05:22:49.268Z/datastoretest.php"}]}
// if file url already exists, you get an error 409
// upload file via POST request
$post = array('key' => $auth_ticket['fields'][0]['value'], // use echoed back url to ensure we fail if the url was not unique
'file'=> curl_file_create($file_path,'',basename($file_path)));
print_r($post);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$ckan_url."/storage/upload_handle"); // note, no trailing slash, /api/ or upload filename in this URL!
curl_setopt($ch, CURLOPT_POST,1);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Expect:' // Avoid PHP CURL file upload causing 417 - expectation failed errors http://www.php.net/manual/en/function.curl-setopt.php#82418
,'Authorization:'.$api_key
,'X-CKAN-API-Key:'.$api_key));
curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
if( ! $result = curl_exec($ch))
{
trigger_error(curl_error($ch));
}
curl_close($ch);
echo print_r(preg_grep('/storage\/f/',explode('<p>',$result))); // upload result is a whole HTML page, grep out the returned URL as a sanity test
print_r(json_decode(file_get_contents($ckan_url.'/api/storage/metadata/'.$upload_key))); // json metadata checks that the file uploaded and gives final URL to download file!
*/
}
// ================================================
/**
def storage_auth_get(self, label, headers):
url = self._storage_auth_url(label)
payload = self._dumpstr(headers)
self.open_url(url, payload, method="POST")
return self.last_message
*/
private String storageAuthGet(String label, List<String> headers) {
String lastMessage = null;
return lastMessage;
}
/**
*
I believe I am competent enough in HTTP to implement this if I have accurate information. At present I have 2-3 incompatible incomplete documents which require significant detective work to even start.
## Architecture
CKAN
- Database for metadata
- File storage (blob) => 2 options
- locally on disk
- cloud storage
DataHub uses cloud storage (viz google storage - similar to s3)
- [Optionally] Store data in database structure as well (structured data0
1. Get upload credentials
/api/storage/auth/form/{label}
=> credentials to do an upload
Action: http://ckannet-storage.commondatastorage.googleapis.com/
- x-goog-meta-uploaded-by => <KEY>
- acl => public-read
- success_action_redirect => http://datahub.io/storage/upload/success_empty?label=2013-12-23T112228%2Ftest.xml
- policy => eyJleHBpcmF0aW9uIjogIj
...
MDAwMDAwMDBdXX0=
- GoogleAccessId => GOOGC6OU3AYPNY47B66M
- signature => iEI/LmjmVce2sfh7xd7T3xKo0lI=
- key => 2013-12-23T112228/test.xml
2. Do the upload to storage (commonddatastorage ...)
POST request (form style) to:
http://ckannet-storage.commondatastorage.googleapis.com/
- x-goog-meta-uploaded-by => <KEY>
- acl => public-read
- policy => eyJleHBpcmF0aW9uIjogIj ....
- GoogleAccessId => GOOGC6OU3AYPNY47B66M
- signature => iEI/LmjmVce2sfh7xd7T3xKo0lI=
- key => 2013-12-23T112228/test.xml
POST in "form" style multipart/form-data
3. Update your "resource" with the url to your uploaded data file
*
*/
/**
https://developers.google.com/storage/docs/reference-methods#postobject
*
*/
@Test
public void testMultipartGoogle() throws Exception {
HttpClient httpClient = Connection.createHttpClientOrProxy();
// HttpPost httpPost = new HttpPost("http://ckannet-storage.commondatastorage.googleapis.com/");
HttpPost httpPost = new HttpPost("http://ckannet-storage.commondatastorage.googleapis.com/");
httpPost.setHeader("Connection", "keep-alive");
// httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1");
// httpPost.setHeader("Accept", " text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
// httpPost.setHeader("Accept-Language", "en-us,en;q=0.5");
// httpPost.setHeader("Host", "ec2-23-20-44-83.compute-1.amazonaws.com");
//Set parameters
ArrayList<NameValuePair> nameValuePairList = new ArrayList<NameValuePair>();
httpPost.setEntity(new UrlEncodedFormEntity(nameValuePairList));
nameValuePairList.add(new BasicNameValuePair( "x-goog-meta-uploaded-by", "was APIKEY"));
nameValuePairList.add(new BasicNameValuePair( "acl", "public-read"));
nameValuePairList.add(new BasicNameValuePair( "success_action_redirect", "http://datahub.io/storage/upload/success_empty?label=2013-12-22T13%3A24%3A12.853Z%2Ftest.xml"));
nameValuePairList.add(new BasicNameValuePair( "policy", "eyJleHBpcmF0aW9uIjogIjIwMTMtMTItMjNUMDk6MjQ6MThaIiwKImNvbmRpdGlvbnMiOiBbeyJ4LWdvb2ctbWV0YS11cGxvYWRlZC1ieSI6ICIzYmFlNDAxMy1kMDNlLTQyN2ItYWMyZS05ZTVkMDg0ZDBlYTkifSx7ImJ1Y2tldCI6ICJja2FubmV0LXN0b3JhZ2UifSx7ImtleSI6ICIyMDEzLTEyLTIyVDEzOjI0OjEyLjg1M1ovdGVzdC54bWwifSx7ImFjbCI6ICJwdWJsaWMtcmVhZCJ9LHsic3VjY2Vzc19hY3Rpb25fcmVkaXJlY3QiOiAiaHR0cDovL2RhdGFodWIuaW8vc3RvcmFnZS91cGxvYWQvc3VjY2Vzc19lbXB0eT9sYWJlbD0yMDEzLTEyLTIyVDEzJTNBMjQlM0ExMi44NTNaJTJGdGVzdC54bWwifSxbImNvbnRlbnQtbGVuZ3RoLXJhbmdlIiwgMCwgMTAwMDAwMDAwMDBdXX0="));
nameValuePairList.add(new BasicNameValuePair( "GoogleAccessId", "GOOGC6OU3AYPNY47B66M"));
nameValuePairList.add(new BasicNameValuePair( "signature", "VFYngO/4ZQRYt4YXnRGXitiJyNE="));
nameValuePairList.add(new BasicNameValuePair( "key", "2013-12-22T13:24:12.853Z/test97.xml"));
MultipartEntityBuilder multipartEntityBuilder =
MultipartEntityBuilder.create();
multipartEntityBuilder.addBinaryBody("test", new File("src/test/resources/org/xmlcml/ckan/test.xml"));
HttpEntity entity = multipartEntityBuilder.build();
httpPost.setEntity(entity);
//Send request
HttpResponse httpResponse = httpClient.execute(httpPost);
//Get Response body
LOG.debug("HTTP response: "+httpResponse);
LOG.debug("Msg>> "+IOUtils.toString(httpResponse.getEntity().getContent()));
}
@Test
public void testMultipartGooglePut() throws Exception {
HttpClient httpClient = Connection.createHttpClientOrProxy();
// HttpPut httpPut = new HttpPut("http://ckannet-storage.commondatastorage.googleapis.com/");
HttpPut httpPut = new HttpPut("http://ckannet-storage.commondatastorage.googleapis.com/");
httpPut.setHeader("Connection", "keep-alive");
// httpPut.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1");
// httpPut.setHeader("Accept", " text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
// httpPut.setHeader("Accept-Language", "en-us,en;q=0.5");
// httpPut.setHeader("Hut", "ec2-23-20-44-83.compute-1.amazonaws.com");
//Set parameters
ArrayList<NameValuePair> nameValuePairList = new ArrayList<NameValuePair>();
nameValuePairList.add(new BasicNameValuePair( "x-goog-meta-uploaded-by", "was APIKEY"));
nameValuePairList.add(new BasicNameValuePair( "acl", "public-read"));
nameValuePairList.add(new BasicNameValuePair( "success_action_redirect", "http://datahub.io/storage/upload/success_empty?label=2013-12-22T13%3A24%3A12.853Z%2Ftest.xml"));
nameValuePairList.add(new BasicNameValuePair( "policy", "eyJleHBpcmF0aW9uIjogIjIwMTMtMTItMjNUMDk6MjQ6MThaIiwKImNvbmRpdGlvbnMiOiBbeyJ4LWdvb2ctbWV0YS11cGxvYWRlZC1ieSI6ICIzYmFlNDAxMy1kMDNlLTQyN2ItYWMyZS05ZTVkMDg0ZDBlYTkifSx7ImJ1Y2tldCI6ICJja2FubmV0LXN0b3JhZ2UifSx7ImtleSI6ICIyMDEzLTEyLTIyVDEzOjI0OjEyLjg1M1ovdGVzdC54bWwifSx7ImFjbCI6ICJwdWJsaWMtcmVhZCJ9LHsic3VjY2Vzc19hY3Rpb25fcmVkaXJlY3QiOiAiaHR0cDovL2RhdGFodWIuaW8vc3RvcmFnZS91cGxvYWQvc3VjY2Vzc19lbXB0eT9sYWJlbD0yMDEzLTEyLTIyVDEzJTNBMjQlM0ExMi44NTNaJTJGdGVzdC54bWwifSxbImNvbnRlbnQtbGVuZ3RoLXJhbmdlIiwgMCwgMTAwMDAwMDAwMDBdXX0="));
nameValuePairList.add(new BasicNameValuePair( "GoogleAccessId", "GOOGC6OU3AYPNY47B66M"));
nameValuePairList.add(new BasicNameValuePair( "signature", "VFYngO/4ZQRYt4YXnRGXitiJyNE="));
nameValuePairList.add(new BasicNameValuePair( "key", "2013-12-22T13:24:12.853Z/test97.xml"));
MultipartEntityBuilder multipartEntityBuilder =
MultipartEntityBuilder.create();
multipartEntityBuilder.addBinaryBody("test", new File("src/test/resources/org/xmlcml/ckan/test.xml"));
for (NameValuePair nvp : nameValuePairList) {
multipartEntityBuilder.addTextBody(nvp.getName(), nvp.getValue());
}
HttpEntity entity = multipartEntityBuilder.build();
LOG.debug("entity "+entity.getClass()+entity.getContentLength());
httpPut.setEntity(new UrlEncodedFormEntity(nameValuePairList));
httpPut.setEntity(entity);
//Send request
HttpResponse httpResponse = httpClient.execute(httpPut);
//Get Response body
LOG.debug("HTTP response: "+httpResponse);
LOG.debug("Msg>> "+IOUtils.toString(httpResponse.getEntity().getContent()));
}
@Test
@Ignore
public void testJavaMultipart() throws Exception {
// HttpClient httpClient = new DefaultHttpClient();
HttpClient httpClient = Connection.createHttpClientOrProxy();
HttpPost httpPost = new HttpPost("http://www.your.targer.url.com/page.html");
//setup headers (Server understand request throw by some browser)
httpPost.setHeader("Connection", "keep-alive");
// httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1");
httpPost.setHeader("Accept", " text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
httpPost.setHeader("Accept-Language", "en-us,en;q=0.5");
// httpPost.setHeader("Host", "ec2-23-20-44-83.compute-1.amazonaws.com");
// httpPost.setHeader("Referer",resultUrl+resultUrlAsp);
//Set parameters
ArrayList<NameValuePair> nameValuePairList = new ArrayList<NameValuePair>();
nameValuePairList.add(new BasicNameValuePair("key",""));
nameValuePairList.add(new BasicNameValuePair("txtenroll","095020693015"));
httpPost.setEntity(new UrlEncodedFormEntity(nameValuePairList));
MultipartEntityBuilder multipartEntityBuilder =
MultipartEntityBuilder.create();
multipartEntityBuilder.addTextBody("foo", "foo body");
multipartEntityBuilder.addTextBody("foo", "foo body");
ContentType contentType = ContentType.create("application/octet-stream");
String filename = "src/test/resources/org/xmlcml/ckan/test.xml";
File file = new File(filename);
FileBody fooFileBody = new FileBody(file, contentType);
// this doesn't compile even though the signatureis offered by Eclipse
// multipartEntityBuilder.addBinaryBody("foofile", fooFileBody, contentType, filename);
multipartEntityBuilder.addBinaryBody(filename, new FileInputStream(file), contentType, filename);
HttpEntity entity = multipartEntityBuilder.build();
httpPost.setEntity(entity);
HttpResponse response = httpClient.execute(httpPost);
HttpEntity result = response.getEntity();
//Send request
HttpResponse httpResponse = httpClient.execute(httpPost);
LOG.debug("Msg>> "+IOUtils.toString(httpResponse.getEntity().getContent()));
}
}