package net.sf.jabref.imports;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.Date;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import net.sf.jabref.BibtexEntry;
import net.sf.jabref.BibtexEntryType;
import net.sf.jabref.GUIGlobals;
import net.sf.jabref.Globals;
import net.sf.jabref.OutputPrinter;
import net.sf.jabref.Util;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
/**
*
* This class can be used to access any archive offering an OAI2 interface. By
* default it will access ArXiv.org
*
* @author Ulrich Stärk
* @author Christian Kopf
*
* @version $Revision: 3203 $ ($Date: 2010-03-04 19:05:17 +0100 (Thu, 04 Mar 2010) $)
*
*/
public class OAI2Fetcher implements EntryFetcher {
public static final String OAI2_ARXIV_PREFIXIDENTIFIER = "oai%3AarXiv.org%3A";
public static final String OAI2_ARXIV_HOST = "export.arxiv.org";
public static final String OAI2_ARXIV_SCRIPT = "oai2";
public static final String OAI2_ARXIV_METADATAPREFIX = "arXiv";
public static final String OAI2_ARXIV_ARCHIVENAME = "ArXiv.org";
public static final String OAI2_IDENTIFIER_FIELD = "oai2identifier";
private SAXParserFactory parserFactory;
private SAXParser saxParser;
private String oai2Host;
private String oai2Script;
private String oai2MetaDataPrefix;
private String oai2PrefixIdentifier;
private String oai2ArchiveName;
private boolean shouldContinue = true;
private OutputPrinter status;
/**
* some archives - like arxive.org - might expect of you to wait some time
*/
private boolean shouldWait() {
return waitTime > 0;
}
private long waitTime = -1;
private Date lastCall;
/**
*
*
* @param oai2Host
* the host to query without leading http:// and without trailing /
* @param oai2Script
* the relative location of the oai2 interface without leading
* and trailing /
* @param oai2Metadataprefix
* the urlencoded metadataprefix
* @param oai2Prefixidentifier
* the urlencoded prefix identifier
* @param waitTimeMs
* Time to wait in milliseconds between query-requests.
*/
public OAI2Fetcher(String oai2Host, String oai2Script, String oai2Metadataprefix,
String oai2Prefixidentifier, String oai2ArchiveName, long waitTimeMs) {
this.oai2Host = oai2Host;
this.oai2Script = oai2Script;
this.oai2MetaDataPrefix = oai2Metadataprefix;
this.oai2PrefixIdentifier = oai2Prefixidentifier;
this.oai2ArchiveName = oai2ArchiveName;
this.waitTime = waitTimeMs;
try {
parserFactory = SAXParserFactory.newInstance();
saxParser = parserFactory.newSAXParser();
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
}
}
/**
* Default Constructor. The archive queried will be ArXiv.org
*
*/
public OAI2Fetcher() {
this(OAI2_ARXIV_HOST, OAI2_ARXIV_SCRIPT, OAI2_ARXIV_METADATAPREFIX,
OAI2_ARXIV_PREFIXIDENTIFIER, OAI2_ARXIV_ARCHIVENAME, 20000L);
}
/**
* Construct the query URL
*
* @param key
* The key of the OAI2 entry that the url should point to.
*
* @return a String denoting the query URL
*/
public String constructUrl(String key) {
String identifier = "";
try {
identifier = URLEncoder.encode(key, "UTF-8");
} catch (UnsupportedEncodingException e) {
return "";
}
StringBuffer sb = new StringBuffer("http://").append(oai2Host).append("/");
sb.append(oai2Script).append("?");
sb.append("verb=GetRecord");
sb.append("&identifier=");
sb.append(oai2PrefixIdentifier);
sb.append(identifier);
sb.append("&metadataPrefix=").append(oai2MetaDataPrefix);
return sb.toString();
}
/**
* Strip subcategories from ArXiv key.
*
* @param key The key to fix.
* @return Fixed key.
*/
public static String fixKey(String key){
if (key.toLowerCase().startsWith("arxiv:")){
key = key.substring(6);
}
int dot = key.indexOf('.');
int slash = key.indexOf('/');
if (dot > -1 && dot < slash)
key = key.substring(0, dot) + key.substring(slash, key.length());
return key;
}
public static String correctLineBreaks(String s){
s = s.replaceAll("\\n(?!\\s*\\n)", " ");
s = s.replaceAll("\\s*\\n\\s*", "\n");
return s.replaceAll(" {2,}", " ").replaceAll("(^\\s*|\\s+$)", "");
}
/**
* Import an entry from an OAI2 archive. The BibtexEntry provided has to
* have the field OAI2_IDENTIFIER_FIELD set to the search string.
*
* @param key
* The OAI2 key to fetch from ArXiv.
* @return The imnported BibtexEntry or null if none.
*/
public BibtexEntry importOai2Entry(String key) {
/**
* Fix for problem reported in mailing-list:
* https://sourceforge.net/forum/message.php?msg_id=4087158
*/
key = fixKey(key);
String url = constructUrl(key);
try {
URL oai2Url = new URL(url);
HttpURLConnection oai2Connection = (HttpURLConnection) oai2Url.openConnection();
oai2Connection.setRequestProperty("User-Agent", "Jabref");
InputStream inputStream = oai2Connection.getInputStream();
/* create an empty BibtexEntry and set the oai2identifier field */
BibtexEntry be = new BibtexEntry(Util.createNeutralId(), BibtexEntryType.ARTICLE);
be.setField(OAI2_IDENTIFIER_FIELD, key);
DefaultHandler handlerBase = new OAI2Handler(be);
/* parse the result */
saxParser.parse(inputStream, handlerBase);
/* Correct line breaks and spacing */
for (String name : be.getAllFields()){
be.setField(name, OAI2Fetcher.correctLineBreaks(be.getField(name).toString()));
}
if (key.matches("\\d\\d\\d\\d\\..*")){
be.setField("year", "20" + key.substring(0,2));
int month = Integer.parseInt(key.substring(2,4));
if (month >= 1 && month <= 12){
be.setField("month", "#" + Globals.MONTHS[month - 1] + "#");
}
}
return be;
} catch (IOException e) {
status.showMessage(Globals.lang(
"An Exception ocurred while accessing '%0'", url)
+ "\n\n" + e.toString(), Globals.lang(getKeyName()), JOptionPane.ERROR_MESSAGE);
} catch (SAXException e) {
status.showMessage(Globals.lang(
"An SAXException ocurred while parsing '%0':", new String[]{url})
+ "\n\n" + e.getMessage(), Globals.lang(getKeyName()), JOptionPane.ERROR_MESSAGE);
} catch (RuntimeException e){
status.showMessage(Globals.lang(
"An Error occurred while fetching from OAI2 source (%0):", new String[]{url})
+ "\n\n" + e.getMessage(), Globals.lang(getKeyName()), JOptionPane.ERROR_MESSAGE);
}
return null;
}
public String getHelpPage() {
// there is no helppage
return null;
}
public URL getIcon() {
return GUIGlobals.getIconUrl("www");
}
public String getKeyName() {
return "Fetch " + oai2ArchiveName;
}
public JPanel getOptionsPanel() {
// we have no additional options
return null;
}
public String getTitle() {
return Globals.menuTitle(getKeyName());
}
public boolean processQuery(String query, ImportInspector dialog, OutputPrinter status) {
this.status = status;
try {
shouldContinue = true;
/* multiple keys can be delimited by ; or space */
query = query.replaceAll(" ", ";");
String[] keys = query.split(";");
for (int i = 0; i < keys.length; i++) {
String key = keys[i];
/*
* some archives - like arxive.org - might expect of you to wait
* some time
*/
if (shouldWait() && lastCall != null) {
long elapsed = new Date().getTime() - lastCall.getTime();
while (elapsed < waitTime) {
status.setStatus(Globals.lang("Waiting for ArXiv...") + ((waitTime - elapsed) / 1000) + " s");
Thread.sleep(1000);
elapsed = new Date().getTime() - lastCall.getTime();
}
}
status.setStatus(Globals.lang("Processing ") + key);
/* the cancel button has been hit */
if (!shouldContinue)
break;
/* query the archive and load the results into the BibtexEntry */
BibtexEntry be = importOai2Entry(key);
if (shouldWait())
lastCall = new Date();
/* add the entry to the inspection dialog */
if (be != null)
dialog.addEntry(be);
/* update the dialogs progress bar */
dialog.setProgress(i + 1, keys.length);
}
return true;
} catch (Exception e) {
status.setStatus(Globals.lang("Error while fetching from OIA2")+ ": " + e.getMessage());
e.printStackTrace();
}
return false;
}
public void stopFetching() {
shouldContinue = false;
}
}