Source Code of net.sf.jabref.imports.IsiImporter

package net.sf.jabref.imports;


import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


import net.sf.jabref.BibtexEntry;
import net.sf.jabref.BibtexFields;
import net.sf.jabref.Globals;
import net.sf.jabref.Util;
import net.sf.jabref.util.CaseChanger;


/**
 * Importer for the ISI Web of Science, INSPEC and Medline format.
 * 
 * Documentation about ISI WOS format:
 * 
 * <ul>
 * <li>http://wos.isitrial.com/help/helpprn.html</li>
 * </ul>
 * 
 * <ul>
 * <li>Check compatibility with other ISI2Bib tools like:
 * http://www-lab.imr.tohoku.ac.jp/~t-nissie/computer/software/isi/ or
 * http://www.tug.org/tex-archive/biblio/bibtex/utils/isi2bibtex/isi2bibtex or
 * http://web.mit.edu/emilio/www/utils.html</li>
 * <li>Deal with capitalization correctly</li>
 * </ul>
 * 
 * @author $Author: mortenalver $
 * @version $Revision: 3047 $ ($Date: 2009-08-21 18:32:56 +0200 (Fri, 21 Aug 2009) $)
 * 
 */
public class IsiImporter extends ImportFormat {
  /**
   * Return the name of this import format.
   */
  public String getFormatName() {
    return "ISI";
  }


  /*
   * (non-Javadoc)
   * 
   * @see net.sf.jabref.imports.ImportFormat#getCLIId()
   */
  public String getCLIId() {
    return "isi";
  }


    // 2006.09.05: Modified pattern to avoid false positives for other files due to an
    // extra | at the end:
    static final Pattern isiPattern = Pattern.compile("FN ISI Export Format|VR 1.|PY \\d{4}");


  /**
   * Check whether the source is in the correct format for this importer.
   */
  public boolean isRecognizedFormat(InputStream stream) throws IOException {


    BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream));


    String str;
        int i=0;
        while (((str = in.readLine()) != null) && (i < 50)) {


      /**
       * The following line gives false positives for RIS files, so it
       * should not be uncommented. The hypen is a characteristic of the
       * RIS format.
       * 
       * str = str.replace(" - ", "")
       */
      if (isiPattern.matcher(str).find())
        return true;


            i++;
        }


    return false;
  }


  static Pattern subsupPattern = Pattern.compile("/(sub|sup)\\s+(.*?)\\s*/");


  static public void processSubSup(HashMap<String, String> map) {


    String[] subsup = { "title", "abstract", "review", "notes" };


    for (int i = 0; i < subsup.length; i++) {
      if (map.containsKey(subsup[i])) {


        Matcher m = subsupPattern.matcher(map.get(subsup[i]));
        StringBuffer sb = new StringBuffer();


        while (m.find()) {


          String group2 = m.group(2);
          group2 = group2.replaceAll("\\$", "\\\\\\\\\\\\\\$"); // Escaping
          // insanity!
          // :-)
          if (group2.length() > 1) {
            group2 = "{" + group2 + "}";
          }
          if (m.group(1).equals("sub")) {
            m.appendReplacement(sb, "\\$_" + group2 + "\\$");
          } else {
            m.appendReplacement(sb, "\\$^" + group2 + "\\$");
          }
        }
        m.appendTail(sb);
        map.put(subsup[i], sb.toString());
      }
    }
  }


  static public void processCapitalization(HashMap<String, String> map) {


    String[] subsup = { "title", "journal", "publisher" };


    for (int i = 0; i < subsup.length; i++) {


      if (map.containsKey(subsup[i])) {


        String s = map.get(subsup[i]);
                if (s.toUpperCase().equals(s)) {
                    s = CaseChanger.changeCase(s, CaseChanger.UPPER_EACH_FIRST, true);
          map.put(subsup[i], s);
        }
      }
    }
  }


  /**
   * Parse the entries in the source, and return a List of BibtexEntry
   * objects.
   */
  public List<BibtexEntry> importEntries(InputStream stream) throws IOException {
    if (stream == null) {
      throw new IOException("No stream given.");
    }


    ArrayList<BibtexEntry> bibitems = new ArrayList<BibtexEntry>();
    StringBuffer sb = new StringBuffer();


    BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream));


    // Pattern fieldPattern = Pattern.compile("^AU |^TI |^SO |^DT |^C1 |^AB
    // |^ID |^BP |^PY |^SE |^PY |^VL |^IS ");
    String str;


    while ((str = in.readLine()) != null) {
      if (str.length() < 3)
        continue;


      // begining of a new item
      if (str.substring(0, 3).equals("PT "))
        sb.append("::").append(str);
      else {
        String beg = str.substring(0, 3).trim();


        // I could have used the fieldPattern regular expression instead
        // however this seems to be
        // quick and dirty and it works!
        if (beg.length() == 2) {
          sb.append(" ## "); // mark the begining of each field
          sb.append(str);
        } else {
          sb.append("EOLEOL"); // mark the end of each line
          sb.append(str.trim()); // remove the initial spaces
        }
      }
    }


    String[] entries = sb.toString().split("::");


    HashMap<String, String> hm = new HashMap<String, String>();


    // skip the first entry as it is either empty or has document header
    for (int i = 0; i < entries.length; i++) {
      String[] fields = entries[i].split(" ## ");


      if (fields.length == 0)
        fields = entries[i].split("\n");


      String Type = "";
      String PT = "";
      String pages = "";
      hm.clear();


      nextField: for (int j = 0; j < fields.length; j++) {
        // empty field don't do anything
        if (fields[j].length() <= 2)
          continue;


        String beg = fields[j].substring(0, 2);
        String value = fields[j].substring(3);
        if (value.startsWith(" - ")) {
          value = value.substring(3);
        }
        value = value.trim();


        if (beg.equals("PT")) {
          if (value.startsWith("J")) {
            PT = "article";
          } else {
            PT = value;
          }
          Type = "article"; // make all of them PT?
        } else if (beg.equals("TY")) {
          if ("JOUR".equals(value))
            Type = "article";
          else if ("CONF".equals(value))
            Type = "inproceedings";
        } else if (beg.equals("JO"))
          hm.put("booktitle", value);
        else if (beg.equals("AU")) {
          String author = isiAuthorsConvert(value.replaceAll("EOLEOL", " and "));


          // if there is already someone there then append with "and"
          if (hm.get("author") != null)
            author = hm.get("author") + " and " + author;


          hm.put("author", author);
        } else if (beg.equals("TI"))
          hm.put("title", value.replaceAll("EOLEOL", " "));
        else if (beg.equals("SO") || beg.equals("JA"))
          hm.put("journal", value.replaceAll("EOLEOL", " "));
        else if (beg.equals("ID") || beg.equals("KW")) {
        
          value = value.replaceAll("EOLEOL", " ");
          String existingKeywords = hm.get("keywords");
          if (existingKeywords != null && existingKeywords.indexOf(value) == -1) {
            existingKeywords += ", " + value;
          } else {
            existingKeywords = value;
          }
          hm.put("keywords", existingKeywords);


        } else if (beg.equals("AB"))
          hm.put("abstract", value.replaceAll("EOLEOL", " "));
        else if (beg.equals("BP") || beg.equals("BR") || beg.equals("SP"))
          pages = value;
        else if (beg.equals("EP")) {
          int detpos = value.indexOf(' ');


          // tweak for IEEE Explore
          if (detpos != -1 && value.substring(0, detpos).trim().length() > 0)
            value = value.substring(0, detpos);


          pages = pages + "--" + value;
        } else if (beg.equals("PS")) {
          pages = parsePages(value);
        } else if (beg.equals("AR"))
          pages = value;
        else if (beg.equals("IS"))
          hm.put("number", value);
        else if (beg.equals("PY"))
          hm.put("year", value);
        else if (beg.equals("VL"))
          hm.put("volume", value);
        else if (beg.equals("PU"))
          hm.put("publisher", value);
                else if (beg.equals("DI"))
                    hm.put("doi", value);
        else if (beg.equals("PD")) {


          String month = parseMonth(value);
          if (month != null) {
            hm.put("month", month);
            continue nextField;
          }


        } else if (beg.equals("DT")) {
          Type = value;
          if (Type.equals("Review")) {
            Type = "article"; // set "Review" in Note/Comment?
          } else if (Type.startsWith("Article") || Type.startsWith("Journal")
            || PT.equals("article")) {
            Type = "article";
            continue;
          } else {
            Type = "misc";
          }
        } else if (beg.equals("CR")) {
          hm.put("CitedReferences", value.replaceAll("EOLEOL", " ; ").trim());
        } else {
          // Preserve all other entries except
          if (beg.equals("ER") || beg.equals("EF") || beg.equals("VR")
            || beg.equals("FN"))
            continue nextField;
          hm.put(beg, value);
        }
      }


      if (!"".equals(pages))
        hm.put("pages", pages);


      // Skip empty entries
      if (hm.size() == 0)
        continue;


      BibtexEntry b = new BibtexEntry(BibtexFields.DEFAULT_BIBTEXENTRY_ID, Globals
        .getEntryType(Type));
      // id assumes an existing database so don't


      // Remove empty fields:
      ArrayList<Object> toRemove = new ArrayList<Object>();
      for (Iterator<String> it = hm.keySet().iterator(); it.hasNext();) {
        Object key = it.next();
        String content = hm.get(key);
        if ((content == null) || (content.trim().length() == 0))
          toRemove.add(key);
      }
      for (Iterator<Object> iterator = toRemove.iterator(); iterator.hasNext();) {
        hm.remove(iterator.next());


      }


      // Polish entries
      processSubSup(hm);
      processCapitalization(hm);


      b.setField(hm);


      bibitems.add(b);
    }


    return bibitems;
  }


  public static String parsePages(String value) {
    int lastDash = value.lastIndexOf("-");
    return value.substring(0, lastDash) + "--" + value.substring(lastDash + 1);
  }


  public static String parseMonth(String value) {


    String[] parts = value.split("\\s|\\-");
    for (int ii = 0; ii < parts.length; ii++) {
      if (Globals.MONTH_STRINGS.containsKey(parts[ii].toLowerCase())) {
        return "#" + parts[ii].toLowerCase() + "#";
      }
    }


    // Try two digit month
    for (int ii = 0; ii < parts.length; ii++) {
      int number;
      try {
        number = Integer.parseInt(parts[ii]);
        if (number >= 1 && number <= 12) {
          return "#" + Globals.MONTHS[number - 1] + "#";
        }
      } catch (NumberFormatException e) {


      }
    }
    return null;
  }


  /**
   * Will expand ISI first names.
   * 
   * Fixed bug from:
   * http://sourceforge.net/tracker/index.php?func=detail&aid=1542552&group_id=92314&atid=600306
   * 
   */
  public static String isiAuthorConvert(String author) {


    String[] s = author.split(",");
    if (s.length != 2)
      return author;


    StringBuffer sb = new StringBuffer();


    String last = s[0].trim();
    sb.append(last).append(", ");


    String first = s[1].trim();


    String[] firstParts = first.split("\\s+");


    for (int i = 0; i < firstParts.length; i++) {


      first = firstParts[i];


      // Do we have only uppercase chars?
      if (first.toUpperCase().equals(first)) {
        first = first.replaceAll("\\.", "");
        for (int j = 0; j < first.length(); j++) {
          sb.append(first.charAt(j)).append(".");


          if (j < first.length() - 1)
            sb.append(" ");
        }
      } else {
        sb.append(first);
      }
      if (i < firstParts.length - 1) {
        sb.append(" ");
      }
    }
    return sb.toString();


  }


  public static String[] isiAuthorsConvert(String[] authors) {


    String[] result = new String[authors.length];
    for (int i = 0; i < result.length; i++) {
      result[i] = isiAuthorConvert(authors[i]);
    }
    return result;
  }


  public static String isiAuthorsConvert(String authors) {
    String[] s = isiAuthorsConvert(authors.split(" and |;"));
    return Util.join(s, " and ", 0, s.length);
  }


}
Source Code of net.sf.jabref.imports.IsiImporter

Related Classes of net.sf.jabref.imports.IsiImporter