Package net.sf.jabref.imports

Source Code of net.sf.jabref.imports.OAI2Fetcher

package net.sf.jabref.imports;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.Date;

import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import net.sf.jabref.BibtexEntry;
import net.sf.jabref.BibtexEntryType;
import net.sf.jabref.GUIGlobals;
import net.sf.jabref.Globals;
import net.sf.jabref.OutputPrinter;
import net.sf.jabref.Util;

import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/**
*
* This class can be used to access any archive offering an OAI2 interface. By
* default it will access ArXiv.org
*
* @author Ulrich Stärk
* @author Christian Kopf
*
* @version $Revision: 3203 $ ($Date: 2010-03-04 19:05:17 +0100 (Thu, 04 Mar 2010) $)
*
*/
public class OAI2Fetcher implements EntryFetcher {

    public static final String OAI2_ARXIV_PREFIXIDENTIFIER = "oai%3AarXiv.org%3A";

    public static final String OAI2_ARXIV_HOST = "export.arxiv.org";

    public static final String OAI2_ARXIV_SCRIPT = "oai2";

    public static final String OAI2_ARXIV_METADATAPREFIX = "arXiv";

    public static final String OAI2_ARXIV_ARCHIVENAME = "ArXiv.org";

    public static final String OAI2_IDENTIFIER_FIELD = "oai2identifier";

    private SAXParserFactory parserFactory;

    private SAXParser saxParser;

    private String oai2Host;

    private String oai2Script;

    private String oai2MetaDataPrefix;

    private String oai2PrefixIdentifier;

    private String oai2ArchiveName;

    private boolean shouldContinue = true;

    private OutputPrinter status;

    /**
     * some archives - like arxive.org - might expect of you to wait some time
     */
    private boolean shouldWait() {
        return waitTime > 0;
    }

    private long waitTime = -1;

    private Date lastCall;

    /**
     *
     *
     * @param oai2Host
     *            the host to query without leading http:// and without trailing /
     * @param oai2Script
     *            the relative location of the oai2 interface without leading
     *            and trailing /
     * @param oai2Metadataprefix
     *            the urlencoded metadataprefix
     * @param oai2Prefixidentifier
     *            the urlencoded prefix identifier
     * @param waitTimeMs
     *            Time to wait in milliseconds between query-requests.
     */
    public OAI2Fetcher(String oai2Host, String oai2Script, String oai2Metadataprefix,
        String oai2Prefixidentifier, String oai2ArchiveName, long waitTimeMs) {
        this.oai2Host = oai2Host;
        this.oai2Script = oai2Script;
        this.oai2MetaDataPrefix = oai2Metadataprefix;
        this.oai2PrefixIdentifier = oai2Prefixidentifier;
        this.oai2ArchiveName = oai2ArchiveName;
        this.waitTime = waitTimeMs;
        try {
            parserFactory = SAXParserFactory.newInstance();
            saxParser = parserFactory.newSAXParser();
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        } catch (SAXException e) {
            e.printStackTrace();
        }
    }

    /**
     * Default Constructor. The archive queried will be ArXiv.org
     *
     */
    public OAI2Fetcher() {
        this(OAI2_ARXIV_HOST, OAI2_ARXIV_SCRIPT, OAI2_ARXIV_METADATAPREFIX,
            OAI2_ARXIV_PREFIXIDENTIFIER, OAI2_ARXIV_ARCHIVENAME, 20000L);
    }

    /**
     * Construct the query URL
     *
     * @param key
     *            The key of the OAI2 entry that the url should point to.
     *           
     * @return a String denoting the query URL
     */
    public String constructUrl(String key) {
        String identifier = "";
        try {
            identifier = URLEncoder.encode(key, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            return "";
        }
        StringBuffer sb = new StringBuffer("http://").append(oai2Host).append("/");
        sb.append(oai2Script).append("?");
        sb.append("verb=GetRecord");
        sb.append("&identifier=");
        sb.append(oai2PrefixIdentifier);
        sb.append(identifier);
        sb.append("&metadataPrefix=").append(oai2MetaDataPrefix);
        return sb.toString();
    }
   
    /**
     * Strip subcategories from ArXiv key.
     *
     * @param key The key to fix.
     * @return Fixed key.
     */
    public static String fixKey(String key){
       
        if (key.toLowerCase().startsWith("arxiv:")){
            key = key.substring(6);
        }
       
        int dot = key.indexOf('.');
        int slash = key.indexOf('/');
       
        if (dot > -1 && dot < slash)
            key = key.substring(0, dot) + key.substring(slash, key.length());
   
        return key;
    }

    public static String correctLineBreaks(String s){
        s = s.replaceAll("\\n(?!\\s*\\n)", " ");
        s = s.replaceAll("\\s*\\n\\s*", "\n");
        return s.replaceAll(" {2,}", " ").replaceAll("(^\\s*|\\s+$)", "");
    }
   
    /**
     * Import an entry from an OAI2 archive. The BibtexEntry provided has to
     * have the field OAI2_IDENTIFIER_FIELD set to the search string.
     *
     * @param key
     *            The OAI2 key to fetch from ArXiv.
     * @return The imnported BibtexEntry or null if none.
     */
    public BibtexEntry importOai2Entry(String key) {
        /**
         * Fix for problem reported in mailing-list:
         *   https://sourceforge.net/forum/message.php?msg_id=4087158
         */
        key = fixKey(key);
       
        String url = constructUrl(key);
        try {
            URL oai2Url = new URL(url);
            HttpURLConnection oai2Connection = (HttpURLConnection) oai2Url.openConnection();
            oai2Connection.setRequestProperty("User-Agent", "Jabref");
            InputStream inputStream = oai2Connection.getInputStream();
   
            /* create an empty BibtexEntry and set the oai2identifier field */
            BibtexEntry be = new BibtexEntry(Util.createNeutralId(), BibtexEntryType.ARTICLE);
            be.setField(OAI2_IDENTIFIER_FIELD, key);
            DefaultHandler handlerBase = new OAI2Handler(be);
            /* parse the result */
            saxParser.parse(inputStream, handlerBase);
           
            /* Correct line breaks and spacing */
            for (String name : be.getAllFields()){
                be.setField(name, OAI2Fetcher.correctLineBreaks(be.getField(name).toString()));
            }
           
            if (key.matches("\\d\\d\\d\\d\\..*")){
                be.setField("year", "20" + key.substring(0,2));
               
                int month = Integer.parseInt(key.substring(2,4));
                if (month >= 1 && month <= 12){
                    be.setField("month", "#" + Globals.MONTHS[month - 1] + "#");
                }
            }
           
            return be;
        } catch (IOException e) {
            status.showMessage(Globals.lang(
                "An Exception ocurred while accessing '%0'", url)
                + "\n\n" + e.toString(), Globals.lang(getKeyName()), JOptionPane.ERROR_MESSAGE);
        } catch (SAXException e) {
            status.showMessage(Globals.lang(
                "An SAXException ocurred while parsing '%0':", new String[]{url})
                + "\n\n" + e.getMessage(), Globals.lang(getKeyName()), JOptionPane.ERROR_MESSAGE);
        } catch (RuntimeException e){
            status.showMessage(Globals.lang(
                "An Error occurred while fetching from OAI2 source (%0):", new String[]{url})
                + "\n\n" + e.getMessage(), Globals.lang(getKeyName()), JOptionPane.ERROR_MESSAGE);
        }
        return null;
    }

    public String getHelpPage() {
        // there is no helppage
        return null;
    }

    public URL getIcon() {
        return GUIGlobals.getIconUrl("www");
    }

    public String getKeyName() {
        return "Fetch " + oai2ArchiveName;
    }

    public JPanel getOptionsPanel() {
        // we have no additional options
        return null;
    }

    public String getTitle() {
        return Globals.menuTitle(getKeyName());
    }

    public boolean processQuery(String query, ImportInspector dialog, OutputPrinter status) {

        this.status = status;

        try {
            shouldContinue = true;
           
            /* multiple keys can be delimited by ; or space */
            query = query.replaceAll(" ", ";");
            String[] keys = query.split(";");
            for (int i = 0; i < keys.length; i++) {
                String key = keys[i];
               
                /*
                 * some archives - like arxive.org - might expect of you to wait
                 * some time
                 */
                if (shouldWait() && lastCall != null) {

                    long elapsed = new Date().getTime() - lastCall.getTime();

                    while (elapsed < waitTime) {
                        status.setStatus(Globals.lang("Waiting for ArXiv...") + ((waitTime - elapsed) / 1000) + " s");
                        Thread.sleep(1000);
                        elapsed = new Date().getTime() - lastCall.getTime();
                    }
                }

                status.setStatus(Globals.lang("Processing ") + key);

                /* the cancel button has been hit */
                if (!shouldContinue)
                    break;
               
                /* query the archive and load the results into the BibtexEntry */
                BibtexEntry be = importOai2Entry(key);

                if (shouldWait())
                    lastCall = new Date();
               
                /* add the entry to the inspection dialog */
                if (be != null)
                    dialog.addEntry(be);

                /* update the dialogs progress bar */
                dialog.setProgress(i + 1, keys.length);
            }
           
            return true;
        } catch (Exception e) {
            status.setStatus(Globals.lang("Error while fetching from OIA2")+ ": " + e.getMessage());
            e.printStackTrace();
        }
        return false;
    }

    public void stopFetching() {
        shouldContinue = false;
    }
}
TOP

Related Classes of net.sf.jabref.imports.OAI2Fetcher

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.