/*
* Created on April 01, 2007
* Updated on May 03, 2007
* */
package net.sf.jabref.msbib;
import java.io.StringWriter;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import net.sf.jabref.BibtexEntry;
import net.sf.jabref.BibtexEntryType;
import net.sf.jabref.BibtexFields;
import net.sf.jabref.export.layout.LayoutFormatter;
import net.sf.jabref.export.layout.format.XMLChars;
import net.sf.jabref.mods.PageNumbers;
import net.sf.jabref.mods.PersonName;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
* @author S M Mahbub Murshed
* @email udvranto@yahoo.com
*
* @version 2.0.0
* @see http://mahbub.wordpress.com/2007/03/24/details-of-microsoft-office-2007-bibliographic-format-compared-to-bibtex/
* @see http://mahbub.wordpress.com/2007/03/22/deciphering-microsoft-office-2007-bibliography-format/
*
* Date: May 15, 2007; May 03, 2007
*
* History
* May 03, 2007 - Added export functionality
* May 15, 2007 - Added import functionality
* May 16, 2007 - Changed all interger entries to strings,
* except LCID which must be an integer.
* To avoid exception during integer parsing
* the exception is caught and LCID is set to zero.
*/
public class MSBibEntry {
protected String sourceType = "Misc";
protected String bibTexEntry = null;
protected String tag = null;
protected String GUID = null;
protected int LCID = -1;
protected List<PersonName> authors = null;
protected List<PersonName> bookAuthors = null;
protected List<PersonName> editors = null;
protected List<PersonName> translators = null;
protected List<PersonName> producerNames = null;
protected List<PersonName> composers = null;
protected List<PersonName> conductors = null;
protected List<PersonName> performers = null;
protected List<PersonName> writers = null;
protected List<PersonName> directors = null;
protected List<PersonName> compilers = null;
protected List<PersonName> interviewers = null;
protected List<PersonName> interviewees = null;
protected List<PersonName> inventors = null;
protected List<PersonName> counsels = null;
protected String title = null;
protected String year = null;
protected String month = null;
protected String day = null;
protected String shortTitle = null;
protected String comments = null;
protected PageNumbers pages = null;
protected String volume = null;
protected String numberOfVolumes = null;
protected String edition = null;
protected String standardNumber = null;
protected String publisher = null;
protected String address = null;
protected String bookTitle = null;
protected String chapterNumber = null;
protected String journalName = null;
protected String issue = null;
protected String periodicalTitle = null;
protected String conferenceName = null;
protected String department = null;
protected String institution = null;
protected String thesisType = null;
protected String internetSiteTitle = null;
protected String dateAccessed = null;
protected String url = null;
protected String productionCompany = null;
protected String publicationTitle = null;
protected String medium = null;
protected String albumTitle = null;
protected String recordingNumber = null;
protected String theater = null;
protected String distributor = null;
protected String broadcastTitle = null;
protected String broadcaster = null;
protected String station = null;
protected String type = null;
protected String patentNumber = null;
protected String court = null;
protected String reporter = null;
protected String caseNumber = null;
protected String abbreviatedCaseNumber = null;
protected String bibTex_Series = null;
protected String bibTex_Abstract = null;
protected String bibTex_KeyWords = null;
protected String bibTex_CrossRef = null;
protected String bibTex_HowPublished = null;
protected String bibTex_Affiliation = null;
protected String bibTex_Contents = null;
protected String bibTex_Copyright = null;
protected String bibTex_Price = null;
protected String bibTex_Size = null;
private final String BIBTEX = "BIBTEX_";
private final String MSBIB = "msbib-";
private final String bcol = "b:";
private final boolean FORMATXML = false;
public MSBibEntry() {
}
public MSBibEntry(BibtexEntry bibtex) {
this();
populateFromBibtex(bibtex);
}
public MSBibEntry(Element entry, String _bcol) {
this();
populateFromXml(entry,_bcol);
}
protected String getFromXml(String name, Element entry) {
String value = null;
NodeList nodeLst = entry.getElementsByTagName(name);
if(nodeLst.getLength()>0)
value = nodeLst.item(0).getTextContent();
return value;
}
protected void populateFromXml(Element entry, String _bcol) {
String temp = null;
sourceType = getFromXml(_bcol+"SourceType", entry);
tag = getFromXml(_bcol+"Tag", entry);
temp = getFromXml(_bcol+"LCID", entry);
if(temp!=null)
{
try {
LCID = Integer.parseInt(temp); }
catch (Exception e) {
LCID = -1;
}
}
title = getFromXml(_bcol+"Title", entry);
year = getFromXml(_bcol+"Year", entry);
month = getFromXml(_bcol+"Month", entry);
day = getFromXml(_bcol+"Day", entry);
shortTitle = getFromXml(_bcol+"ShortTitle", entry);
comments = getFromXml(_bcol+"Comments", entry);
temp = getFromXml(_bcol+"Pages", entry);
if(temp != null)
pages = new PageNumbers(temp);
volume = getFromXml(_bcol+"Volume", entry);
numberOfVolumes = getFromXml(_bcol+"NumberVolumes", entry);
edition = getFromXml(_bcol+"Edition", entry);
standardNumber = getFromXml(_bcol+"StandardNumber", entry);
publisher = getFromXml(_bcol+"Publisher", entry);
String city = getFromXml(_bcol+"City", entry);
String state = getFromXml(_bcol+"StateProvince", entry);
String country = getFromXml(_bcol+"CountryRegion", entry);
address = "";
if(city != null)
address += city + ", ";
if(state != null)
address += state + " ";
if(country != null)
address += country;
address = address.trim();
if(address.equals("") || address.equals(","))
address = null;
bookTitle = getFromXml(_bcol+"BookTitle", entry);
chapterNumber = getFromXml(_bcol+"ChapterNumber", entry);
journalName = getFromXml(_bcol+"JournalName", entry);
issue = getFromXml(_bcol+"Issue", entry);
periodicalTitle = getFromXml(_bcol+"PeriodicalTitle", entry);
conferenceName = getFromXml(_bcol+"ConferenceName", entry);
department = getFromXml(_bcol+"Department", entry);
institution = getFromXml(_bcol+"Institution", entry);
thesisType = getFromXml(_bcol+"ThesisType", entry);
internetSiteTitle = getFromXml(_bcol+"InternetSiteTitle", entry);
String month = getFromXml(_bcol+"MonthAccessed", entry);
String day = getFromXml(_bcol+"DayAccessed", entry);
String year = getFromXml(_bcol+"YearAccessed", entry);
dateAccessed = "";
if(month != null)
dateAccessed += month + " ";
if(day != null)
dateAccessed += day + ", ";
if(year != null)
dateAccessed += year;
dateAccessed = dateAccessed.trim();
if(dateAccessed.equals("") || dateAccessed.equals(","))
dateAccessed = null;
url = getFromXml(_bcol+"URL", entry);
productionCompany = getFromXml(_bcol+"ProductionCompany", entry);
publicationTitle = getFromXml(_bcol+"PublicationTitle", entry);
medium = getFromXml(_bcol+"Medium", entry);
albumTitle = getFromXml(_bcol+"AlbumTitle", entry);
recordingNumber = getFromXml(_bcol+"RecordingNumber", entry);
theater = getFromXml(_bcol+"Theater", entry);
distributor = getFromXml(_bcol+"Distributor", entry);
broadcastTitle = getFromXml(_bcol+"BroadcastTitle", entry);
broadcaster = getFromXml(_bcol+"Broadcaster", entry);
station = getFromXml(_bcol+"Station", entry);
type = getFromXml(_bcol+"Type", entry);
patentNumber = getFromXml(_bcol+"PatentNumber", entry);
court = getFromXml(_bcol+"Court", entry);
reporter = getFromXml(_bcol+"Reporter", entry);
caseNumber = getFromXml(_bcol+"CaseNumber", entry);
abbreviatedCaseNumber = getFromXml(_bcol+"AbbreviatedCaseNumber", entry);
bibTex_Series = getFromXml(_bcol+BIBTEX+"Series", entry);
bibTex_Abstract = getFromXml(_bcol+BIBTEX+"Abstract", entry);
bibTex_KeyWords = getFromXml(_bcol+BIBTEX+"KeyWords", entry);
bibTex_CrossRef = getFromXml(_bcol+BIBTEX+"CrossRef", entry);
bibTex_HowPublished = getFromXml(_bcol+BIBTEX+"HowPublished", entry);
bibTex_Affiliation = getFromXml(_bcol+BIBTEX+"Affiliation", entry);
bibTex_Contents = getFromXml(_bcol+BIBTEX+"Contents", entry);
bibTex_Copyright = getFromXml(_bcol+BIBTEX+"Copyright", entry);
bibTex_Price = getFromXml(_bcol+BIBTEX+"Price", entry);
bibTex_Size = getFromXml(_bcol+BIBTEX+"Size", entry);
NodeList nodeLst = entry.getElementsByTagName(_bcol+"Author");
if(nodeLst.getLength()>0)
getAuthors((Element)(nodeLst.item(0)),_bcol);
}
protected void populateFromBibtex(BibtexEntry bibtex) {
// date = getDate(bibtex);
sourceType = getMSBibSourceType(bibtex);
if (bibtex.getField("bibtexkey") != null)
tag = bibtex.getField("bibtexkey").toString();
if (bibtex.getField("language") != null)
LCID = getLCID(bibtex.getField("language").toString());
if (bibtex.getField("title") != null)
title = bibtex.getField("title").toString();
if (bibtex.getField("year") != null)
year = bibtex.getField("year").toString();
if (bibtex.getField("month") != null)
month = bibtex.getField("month").toString();
if (bibtex.getField(MSBIB+"day") != null)
day = bibtex.getField(MSBIB+"day").toString();
if (bibtex.getField(MSBIB+"shorttitle") != null)
shortTitle = bibtex.getField(MSBIB+"shorttitle").toString();
if (bibtex.getField("note") != null)
comments = bibtex.getField("note").toString();
if (bibtex.getField("pages") != null)
pages = new PageNumbers(bibtex.getField("pages").toString());
if (bibtex.getField("volume") != null)
volume = bibtex.getField("volume").toString();
if (bibtex.getField(MSBIB+"numberofvolume") != null)
numberOfVolumes = bibtex.getField(MSBIB+"numberofvolume").toString();
if (bibtex.getField("edition") != null)
edition = bibtex.getField("edition").toString();
standardNumber = new String();
if (bibtex.getField("ISBN") != null)
standardNumber += ":ISBN:" + bibtex.getField("ISBN").toString();
if (bibtex.getField("ISSN") != null)
standardNumber += ":ISSN:"+ bibtex.getField("ISSN").toString();
if (bibtex.getField("LCCN") != null)
standardNumber += ":LCCN:"+ bibtex.getField("LCCN").toString();
if (bibtex.getField("mrnumber") != null)
standardNumber += ":MRN:"+ bibtex.getField("mrnumber").toString();
if(standardNumber.equals(""))
standardNumber = null;
if (bibtex.getField("publisher") != null)
publisher = bibtex.getField("publisher").toString();
if (bibtex.getField("address") != null)
address = bibtex.getField("address").toString();
if (bibtex.getField("booktitle") != null)
bookTitle = bibtex.getField("booktitle").toString();
if (bibtex.getField("chapter") != null)
chapterNumber = bibtex.getField("chapter").toString();
if (bibtex.getField("journal") != null)
journalName = bibtex.getField("journal").toString();
if (bibtex.getField("number") != null)
issue = bibtex.getField("number").toString();
if (bibtex.getField(MSBIB+"periodical") != null)
periodicalTitle = bibtex.getField(MSBIB+"periodical").toString();
if (bibtex.getField("organization") != null)
conferenceName = bibtex.getField("organization").toString();
if (bibtex.getField("school") != null)
department = bibtex.getField("school").toString();
if (bibtex.getField("institution") != null)
institution = bibtex.getField("institution").toString();
if (bibtex.getField("type") != null)
thesisType = bibtex.getField("type").toString();
if ( (sourceType.equals("InternetSite")==true || sourceType.equals("DocumentFromInternetSite")==true)
&& bibtex.getField("title") != null)
internetSiteTitle = bibtex.getField("title").toString();
if (bibtex.getField(MSBIB+"accessed") != null)
dateAccessed = bibtex.getField(MSBIB+"accessed").toString();
if (bibtex.getField("URL") != null)
url = bibtex.getField("URL").toString();
if (bibtex.getField(MSBIB+"productioncompany") != null)
productionCompany = bibtex.getField(MSBIB+"productioncompany").toString();
if ( (sourceType.equals("ElectronicSource")==true
|| sourceType.equals("Art")==true
|| sourceType.equals("Misc")==true)
&& bibtex.getField("title") != null)
publicationTitle = bibtex.getField("title").toString();
if (bibtex.getField(MSBIB+"medium") != null)
medium = bibtex.getField(MSBIB+"medium").toString();
if (sourceType.equals("SoundRecording")==true && bibtex.getField("title") != null)
albumTitle = bibtex.getField("title").toString();
if (bibtex.getField(MSBIB+"recordingnumber") != null)
recordingNumber = bibtex.getField(MSBIB+"recordingnumber").toString();
if (bibtex.getField(MSBIB+"theater") != null)
theater = bibtex.getField(MSBIB+"theater").toString();
if (bibtex.getField(MSBIB+"distributor") != null)
distributor = bibtex.getField(MSBIB+"distributor").toString();
if (sourceType.equals("Interview")==true && bibtex.getField("title") != null)
broadcastTitle = bibtex.getField("title").toString();
if (bibtex.getField(MSBIB+"broadcaster") != null)
broadcaster = bibtex.getField(MSBIB+"broadcaster").toString();
if (bibtex.getField(MSBIB+"station") != null)
station = bibtex.getField(MSBIB+"station").toString();
if (bibtex.getField(MSBIB+"type") != null)
type = bibtex.getField(MSBIB+"type").toString();
if (bibtex.getField(MSBIB+"patentnumber") != null)
patentNumber = bibtex.getField(MSBIB+"patentnumber").toString();
if (bibtex.getField(MSBIB+"court") != null)
court = bibtex.getField(MSBIB+"court").toString();
if (bibtex.getField(MSBIB+"reporter") != null)
reporter = bibtex.getField(MSBIB+"reporter").toString();
if (bibtex.getField(MSBIB+"casenumber") != null)
caseNumber = bibtex.getField(MSBIB+"casenumber").toString();
if (bibtex.getField(MSBIB+"abbreviatedcasenumber") != null)
abbreviatedCaseNumber = bibtex.getField(MSBIB+"abbreviatedcasenumber").toString();
if (bibtex.getField("series") != null)
bibTex_Series = bibtex.getField("series").toString();
if (bibtex.getField("abstract") != null)
bibTex_Abstract = bibtex.getField("abstract").toString();
if (bibtex.getField("keywords") != null)
bibTex_KeyWords = bibtex.getField("keywords").toString();
if (bibtex.getField("crossref") != null)
bibTex_CrossRef = bibtex.getField("crossref").toString();
if (bibtex.getField("howpublished") != null)
bibTex_HowPublished = bibtex.getField("howpublished").toString();
if (bibtex.getField("affiliation") != null)
bibTex_Affiliation = bibtex.getField("affiliation").toString();
if (bibtex.getField("contents") != null)
bibTex_Contents = bibtex.getField("contents").toString();
if (bibtex.getField("copyright") != null)
bibTex_Copyright = bibtex.getField("copyright").toString();
if (bibtex.getField("price") != null)
bibTex_Price = bibtex.getField("price").toString();
if (bibtex.getField("size") != null)
bibTex_Size = bibtex.getField("size").toString();
if (bibtex.getField("author") != null)
authors = getAuthors(bibtex.getField("author").toString());
if (bibtex.getField("editor") != null)
editors = getAuthors(bibtex.getField("editor").toString());
if(FORMATXML)
{
title = format(title);
// shortTitle = format(shortTitle);
// publisher = format(publisher);
// conferenceName = format(conferenceName);
// department = format(department);
// institution = format(institution);
// internetSiteTitle = format(internetSiteTitle);
// publicationTitle = format(publicationTitle);
// albumTitle = format(albumTitle);
// theater = format(theater);
// distributor = format(distributor);
// broadcastTitle = format(broadcastTitle);
// broadcaster = format(broadcaster);
// station = format(station);
// court = format(court);
// reporter = format(reporter);
// bibTex_Series = format(bibTex_Series);
bibTex_Abstract = format(bibTex_Abstract);
}
}
private String format(String value)
{
if(value == null)
return null;
String result = null;
LayoutFormatter chars = new XMLChars();
result = chars.format(value);
return result;
}
// http://www.microsoft.com/globaldev/reference/lcid-all.mspx
protected int getLCID(String language)
{
int iLCID = 0;
// TODO: add lanaguage to LCID mapping
return iLCID;
}
// http://www.microsoft.com/globaldev/reference/lcid-all.mspx
protected String getLanguage(int LCID)
{
String language = "english";
// TODO: add lanaguage to LCID mapping
return language;
}
protected List<PersonName> getSpecificAuthors(String type, Element authors, String _bcol) {
List<PersonName> result = null;
NodeList nodeLst = authors.getElementsByTagName(_bcol+type);
if(nodeLst.getLength()<=0)
return result;
nodeLst = ((Element)(nodeLst.item(0))).getElementsByTagName(_bcol+"NameList");
if(nodeLst.getLength()<=0)
return result;
NodeList person = ((Element)(nodeLst.item(0))).getElementsByTagName(_bcol+"Person");
if(person.getLength()<=0)
return result;
result = new LinkedList<PersonName>();
for(int i=0;i<person.getLength();i++)
{
NodeList firstName = ((Element)(person.item(i))).getElementsByTagName(_bcol+"First");
NodeList lastName = ((Element)(person.item(i))).getElementsByTagName(_bcol+"Last");
NodeList middleName = ((Element)(person.item(i))).getElementsByTagName(_bcol+"Middle");
PersonName name = new PersonName();
if(firstName.getLength()>0)
name.setFirstname(firstName.item(0).getTextContent());
if(middleName.getLength()>0)
name.setMiddlename(middleName.item(0).getTextContent());
if(lastName.getLength()>0)
name.setSurname(lastName.item(0).getTextContent());
result.add(name);
}
return result;
}
protected void getAuthors(Element authorsElem, String _bcol) {
authors = getSpecificAuthors("Author",authorsElem,_bcol);
bookAuthors = getSpecificAuthors("BookAuthor",authorsElem,_bcol);
editors = getSpecificAuthors("Editor",authorsElem,_bcol);
translators = getSpecificAuthors("Translator",authorsElem,_bcol);
producerNames = getSpecificAuthors("ProducerName",authorsElem,_bcol);
composers = getSpecificAuthors("Composer",authorsElem,_bcol);
conductors = getSpecificAuthors("Conductor",authorsElem,_bcol);
performers = getSpecificAuthors("Performer",authorsElem,_bcol);
writers = getSpecificAuthors("Writer",authorsElem,_bcol);
directors = getSpecificAuthors("Director",authorsElem,_bcol);
compilers = getSpecificAuthors("Compiler",authorsElem,_bcol);
interviewers = getSpecificAuthors("Interviewer",authorsElem,_bcol);
interviewees = getSpecificAuthors("Interviewee",authorsElem,_bcol);
inventors = getSpecificAuthors("Inventor",authorsElem,_bcol);
counsels = getSpecificAuthors("Counsel",authorsElem,_bcol);
}
protected List<PersonName> getAuthors(String authors) {
List<PersonName> result = new LinkedList<PersonName>();
if (authors.indexOf(" and ") == -1)
{
result.add(new PersonName(authors));
}
else
{
String[] names = authors.split(" and ");
for (int i=0; i<names.length; i++)
{
result.add(new PersonName(names[i]));
}
}
return result;
}
/* construct a MSBib date object */
protected String getDate(BibtexEntry bibtex) {
String result = "";
if (bibtex.getField("year") != null)
result += (bibtex.getField("year").toString());
if (bibtex.getField("month") != null)
result += "-" + bibtex.getField("month").toString();
return result;
}
protected String getMSBibSourceType(BibtexEntry bibtex) {
String bibtexType = bibtex.getType().getName();
String result = "Misc";
if (bibtexType.equalsIgnoreCase("book"))
result = "Book";
else if(bibtexType.equalsIgnoreCase("inbook"))
result = "BookSection";
else if(bibtexType.equalsIgnoreCase("booklet"))
{ result = "BookSection"; bibTexEntry = "booklet"; }
else if(bibtexType.equalsIgnoreCase("incollection"))
{ result = "BookSection"; bibTexEntry = "incollection"; }
else if(bibtexType.equalsIgnoreCase("article"))
result = "JournalArticle";
else if(bibtexType.equalsIgnoreCase("inproceedings"))
result = "ConferenceProceedings";
else if(bibtexType.equalsIgnoreCase("conference"))
{ result = "ConferenceProceedings"; bibTexEntry = "conference"; }
else if(bibtexType.equalsIgnoreCase("proceedings"))
{ result = "ConferenceProceedings"; bibTexEntry = "proceedings"; }
else if(bibtexType.equalsIgnoreCase("collection"))
{ result = "ConferenceProceedings"; bibTexEntry = "collection"; }
else if(bibtexType.equalsIgnoreCase("techreport"))
result = "Report";
else if(bibtexType.equalsIgnoreCase("manual"))
{ result = "Report"; bibTexEntry = "manual"; }
else if(bibtexType.equalsIgnoreCase("mastersthesis"))
{ result = "Report"; bibTexEntry = "mastersthesis"; }
else if(bibtexType.equalsIgnoreCase("phdthesis"))
{ result = "Report"; bibTexEntry = "phdthesis"; }
else if(bibtexType.equalsIgnoreCase("unpublished"))
{ result = "Report"; bibTexEntry = "unpublished"; }
else if(bibtexType.equalsIgnoreCase("patent"))
result = "Patent";
else if(bibtexType.equalsIgnoreCase("misc"))
result = "Misc";
return result;
}
public Node getDOMrepresentation() {
Node result = null;
try {
DocumentBuilder d = DocumentBuilderFactory.newInstance().newDocumentBuilder();
result = getDOMrepresentation(d.newDocument());
}
catch (Exception e)
{
throw new Error(e);
}
return result;
}
public void addField(Document d,Element parent, String name, String value) {
if(value == null)
return;
Element elem = d.createElement(bcol+name);
// elem.appendChild(d.createTextNode(healXML(value)));
// Text txt = d.createTextNode(value);
// if(!txt.getTextContent().equals(value))
// System.out.println("Values dont match!");
// // throw new Exception("Values dont match!");
// elem.appendChild(txt);
elem.appendChild(d.createTextNode(stripNonValidXMLCharacters(value)));
parent.appendChild(elem);
}
public void addAuthor(Document d, Element allAuthors, String entryName, List<PersonName> authorsLst) {
if(authorsLst == null)
return;
Element authorTop = d.createElement(bcol+entryName);
Element nameList = d.createElement(bcol+"NameList");
for(Iterator<PersonName> iter = authorsLst.iterator(); iter.hasNext();) {
PersonName name = iter.next();
Element person = d.createElement(bcol+"Person");
addField(d, person,"Last",name.getSurname());
addField(d, person,"Middle",name.getMiddlename());
addField(d, person,"First",name.getFirstname());
nameList.appendChild(person);
}
authorTop.appendChild(nameList);
allAuthors.appendChild(authorTop);
}
public void addAdrress(Document d,Element parent, String address) {
if(address == null)
return;
// US address parser
// See documentation here http://regexlib.com/REDetails.aspx?regexp_id=472
// Pattern p = Pattern.compile("^(?n:(((?<address1>(\\d{1,5}(\\ 1\\/[234])?(\\x20[A-Z]([a-z])+)+ )|(P\\.O\\.\\ Box\\ \\d{1,5}))\\s{1,2}(?i:(?<address2>(((APT|B LDG|DEPT|FL|HNGR|LOT|PIER|RM|S(LIP|PC|T(E|OP))|TRLR|UNIT)\\x20\\w{1,5})|(BSMT|FRNT|LBBY|LOWR|OFC|PH|REAR|SIDE|UPPR)\\.?)\\s{1,2})?))?)(?<city>[A-Z]([a-z])+(\\.?)(\\x20[A-Z]([a-z])+){0,2})([,\\x20]+?)(?<state>A[LKSZRAP]|C[AOT]|D[EC]|F[LM]|G[AU]|HI|I[ADL N]|K[SY]|LA|M[ADEHINOPST]|N[CDEHJMVY]|O[HKR]|P[ARW]|RI|S[CD] |T[NX]|UT|V[AIT]|W[AIVY])([,\\x20]+?)(?<zipcode>(?!0{5})\\d{5}(-\\d {4})?)((([,\\x20]+?)(?<country>[A-Z]([a-z])+(\\.?)(\\x20[A-Z]([a-z])+){0,2}))?))$");
// the pattern above is for C#, may not work with java. Never tested though.
// reduced subset, supports only "CITY , STATE, COUNTRY"
// \b(\w+)\s?[,]?\s?(\w+)\s?[,]?\s?(\w+)\b
// WORD SPACE , SPACE WORD SPACE , SPACE WORD
// tested using http://www.javaregex.com/test.html
Pattern p = Pattern.compile("\\b(\\w+)\\s*[,]?\\s*(\\w+)\\s*[,]?\\s*(\\w+)\\b");
Matcher m = p.matcher(address);
if (m.matches() && m.groupCount()>3)
{
addField(d, parent,"City",m.group(1));
addField(d, parent,"StateProvince",m.group(2));
addField(d, parent,"CountryRegion",m.group(3));
}
}
public void addDate(Document d,Element parent, String date, String extra) {
if(date == null)
return;
// Allows 20.3-2007|||20/3- 2007 etc.
// (\d{1,2})\s?[.,-/]\s?(\d{1,2})\s?[.,-/]\s?(\d{2,4})
// 1-2 DIGITS SPACE SEPERATOR SPACE 1-2 DIGITS SPACE SEPERATOR SPACE 2-4 DIGITS
// tested using http://www.javaregex.com/test.html
Pattern p = Pattern.compile("(\\d{1,2})\\s*[.,-/]\\s*(\\d{1,2})\\s*[.,-/]\\s*(\\d{2,4})");
Matcher m = p.matcher(date);
if (m.matches() && m.groupCount()>3)
{
addField(d, parent,"Month"+extra,m.group(1));
addField(d, parent,"Day"+extra,m.group(2));
addField(d, parent,"Year"+extra,m.group(3));
}
}
public Element getDOMrepresentation(Document d) {
try {
Element msbibEntry = d.createElement(bcol+"Source");
addField(d,msbibEntry,"SourceType",sourceType);
addField(d,msbibEntry,BIBTEX+"Entry",bibTexEntry);
addField(d,msbibEntry,"Tag",tag);
addField(d,msbibEntry,"GUID",GUID);
if(LCID >= 0)
addField(d,msbibEntry,"LCID",Integer.toString(LCID));
addField(d,msbibEntry,"Title",title);
addField(d,msbibEntry,"Year",year);
addField(d,msbibEntry,"ShortTitle",shortTitle);
addField(d,msbibEntry,"Comments",comments);
Element allAuthors = d.createElement(bcol+"Author");
addAuthor(d,allAuthors,"Author",authors);
addAuthor(d,allAuthors,"BookAuthor",bookAuthors);
addAuthor(d,allAuthors,"Editor",editors);
addAuthor(d,allAuthors,"Translator",translators);
addAuthor(d,allAuthors,"ProducerName",producerNames);
addAuthor(d,allAuthors,"Composer",composers);
addAuthor(d,allAuthors,"Conductor",conductors);
addAuthor(d,allAuthors,"Performer",performers);
addAuthor(d,allAuthors,"Writer",writers);
addAuthor(d,allAuthors,"Director",directors);
addAuthor(d,allAuthors,"Compiler",compilers);
addAuthor(d,allAuthors,"Interviewer",interviewers);
addAuthor(d,allAuthors,"Interviewee",interviewees);
addAuthor(d,allAuthors,"Inventor",inventors);
addAuthor(d,allAuthors,"Counsel",counsels);
msbibEntry.appendChild(allAuthors);
if(pages !=null )
addField(d,msbibEntry,"Pages",pages.toString("-"));
addField(d,msbibEntry,"Volume",volume);
addField(d,msbibEntry,"NumberVolumes",numberOfVolumes);
addField(d,msbibEntry,"Edition",edition);
addField(d,msbibEntry,"StandardNumber",standardNumber);
addField(d,msbibEntry,"Publisher",publisher);
addAdrress(d,msbibEntry,address);
addField(d,msbibEntry,"BookTitle",bookTitle);
addField(d,msbibEntry,"ChapterNumber",chapterNumber);
addField(d,msbibEntry,"JournalName",journalName);
addField(d,msbibEntry,"Issue",issue);
addField(d,msbibEntry,"PeriodicalTitle",periodicalTitle);
addField(d,msbibEntry,"ConferenceName",conferenceName);
addField(d,msbibEntry,"Department",department);
addField(d,msbibEntry,"Institution",institution);
addField(d,msbibEntry,"ThesisType",thesisType);
addField(d,msbibEntry,"InternetSiteTitle",internetSiteTitle);
addDate(d,msbibEntry, dateAccessed, "Accessed");
addField(d,msbibEntry,"URL",url);
addField(d,msbibEntry,"ProductionCompany",productionCompany);
addField(d,msbibEntry,"PublicationTitle",publicationTitle);
addField(d,msbibEntry,"Medium",medium);
addField(d,msbibEntry,"AlbumTitle",albumTitle);
addField(d,msbibEntry,"RecordingNumber",recordingNumber);
addField(d,msbibEntry,"Theater",theater);
addField(d,msbibEntry,"Distributor",distributor);
addField(d,msbibEntry,"BroadcastTitle",broadcastTitle);
addField(d,msbibEntry,"Broadcaster",broadcaster);
addField(d,msbibEntry,"Station",station);
addField(d,msbibEntry,"Type",type);
addField(d,msbibEntry,"PatentNumber",patentNumber);
addField(d,msbibEntry,"Court",court);
addField(d,msbibEntry,"Reporter",reporter);
addField(d,msbibEntry,"CaseNumber",caseNumber);
addField(d,msbibEntry,"AbbreviatedCaseNumber",abbreviatedCaseNumber);
addField(d,msbibEntry,BIBTEX+"Series",bibTex_Series);
addField(d,msbibEntry,BIBTEX+"Abstract",bibTex_Abstract);
addField(d,msbibEntry,BIBTEX+"KeyWords",bibTex_KeyWords);
addField(d,msbibEntry,BIBTEX+"CrossRef",bibTex_CrossRef);
addField(d,msbibEntry,BIBTEX+"HowPublished",bibTex_HowPublished);
addField(d,msbibEntry,BIBTEX+"Affiliation",bibTex_Affiliation);
addField(d,msbibEntry,BIBTEX+"Contents",bibTex_Contents);
addField(d,msbibEntry,BIBTEX+"Copyright",bibTex_Copyright);
addField(d,msbibEntry,BIBTEX+"Price",bibTex_Price);
addField(d,msbibEntry,BIBTEX+"Size",bibTex_Size);
return msbibEntry;
}
catch (Exception e)
{
System.out.println("Exception caught..." + e);
e.printStackTrace();
throw new Error(e);
}
// return null;
}
protected void parseSingleStandardNumber(String type,String bibtype, String standardNum, HashMap<String, String> hm) {
// teste using http://www.javaregex.com/test.html
Pattern p = Pattern.compile(":"+type+":(.[^:]+)");
Matcher m = p.matcher(standardNum);
if (m.matches())
hm.put(bibtype,m.group(1));
}
protected void parseStandardNumber(String standardNum, HashMap<String, String> hm) {
if(standardNumber == null)
return;
parseSingleStandardNumber("ISBN","ISBN",standardNum,hm);
parseSingleStandardNumber("ISSN","ISSN",standardNum,hm);
parseSingleStandardNumber("LCCN","LCCN",standardNum,hm);
parseSingleStandardNumber("MRN","mrnumber",standardNum,hm);
}
public void addAuthor(HashMap<String, String> hm, String type, List<PersonName> authorsLst) {
if(authorsLst == null)
return;
String allAuthors = "";
boolean First = true;
for(Iterator<PersonName> iter = authorsLst.iterator(); iter.hasNext();) {
PersonName name = iter.next();
if(First == false)
allAuthors += " and ";
allAuthors += name.getFullname();
First = false;
}
hm.put(type,allAuthors);
}
// public String mapMSBibToBibtexTypeString(String msbib) {
// String bibtex = "other";
// if(msbib.equals("Book"))
// bibtex = "book";
// else if(msbib.equals("BookSection"))
// bibtex = "inbook";
// else if(msbib.equals("JournalArticle"))
// bibtex = "article";
// else if(msbib.equals("ArticleInAPeriodical"))
// bibtex = "article";
// else if(msbib.equals("ConferenceProceedings"))
// bibtex = "conference";
// else if(msbib.equals("Report"))
// bibtex = "techreport";
// else if(msbib.equals("InternetSite"))
// bibtex = "other";
// else if(msbib.equals("DocumentFromInternetSite"))
// bibtex = "other";
// else if(msbib.equals("DocumentFromInternetSite"))
// bibtex = "other";
// else if(msbib.equals("ElectronicSource"))
// bibtex = "other";
// else if(msbib.equals("Art"))
// bibtex = "other";
// else if(msbib.equals("SoundRecording"))
// bibtex = "other";
// else if(msbib.equals("Performance"))
// bibtex = "other";
// else if(msbib.equals("Film"))
// bibtex = "other";
// else if(msbib.equals("Interview"))
// bibtex = "other";
// else if(msbib.equals("Patent"))
// bibtex = "other";
// else if(msbib.equals("Case"))
// bibtex = "other";
// else if(msbib.equals("Misc"))
// bibtex = "misc";
// else
// bibtex = "misc";
//
// return bibtex;
// }
public BibtexEntryType mapMSBibToBibtexType(String msbib)
{
BibtexEntryType bibtex = BibtexEntryType.OTHER;
if(msbib.equals("Book"))
bibtex = BibtexEntryType.BOOK;
else if(msbib.equals("BookSection"))
bibtex = BibtexEntryType.INBOOK;
else if(msbib.equals("JournalArticle"))
bibtex = BibtexEntryType.ARTICLE;
else if(msbib.equals("ArticleInAPeriodical"))
bibtex = BibtexEntryType.ARTICLE;
else if(msbib.equals("ConferenceProceedings"))
bibtex = BibtexEntryType.CONFERENCE;
else if(msbib.equals("Report"))
bibtex = BibtexEntryType.TECHREPORT;
else if(msbib.equals("InternetSite"))
bibtex = BibtexEntryType.OTHER;
else if(msbib.equals("DocumentFromInternetSite"))
bibtex = BibtexEntryType.OTHER;
else if(msbib.equals("DocumentFromInternetSite"))
bibtex = BibtexEntryType.OTHER;
else if(msbib.equals("ElectronicSource"))
bibtex = BibtexEntryType.OTHER;
else if(msbib.equals("Art"))
bibtex = BibtexEntryType.OTHER;
else if(msbib.equals("SoundRecording"))
bibtex = BibtexEntryType.OTHER;
else if(msbib.equals("Performance"))
bibtex = BibtexEntryType.OTHER;
else if(msbib.equals("Film"))
bibtex = BibtexEntryType.OTHER;
else if(msbib.equals("Interview"))
bibtex = BibtexEntryType.OTHER;
else if(msbib.equals("Patent"))
bibtex = BibtexEntryType.OTHER;
else if(msbib.equals("Case"))
bibtex = BibtexEntryType.OTHER;
else if(msbib.equals("Misc"))
bibtex = BibtexEntryType.MISC;
else
bibtex = BibtexEntryType.MISC;
return bibtex;
}
public BibtexEntry getBibtexRepresentation() {
// BibtexEntry entry = new BibtexEntry(BibtexFields.DEFAULT_BIBTEXENTRY_ID,
// Globals.getEntryType(mapMSBibToBibtexTypeString(sourceType)));
// BibtexEntry entry = new BibtexEntry(BibtexFields.DEFAULT_BIBTEXENTRY_ID,
// mapMSBibToBibtexType(sourceType));
BibtexEntry entry = null;
if(tag == null)
entry = new BibtexEntry(BibtexFields.DEFAULT_BIBTEXENTRY_ID,
mapMSBibToBibtexType(sourceType));
else
entry = new BibtexEntry(tag,
mapMSBibToBibtexType(sourceType)); // id assumes an existing database so don't
// Todo: add check for BibTexEntry types
// BibtexEntry entry = new BibtexEntry();
// if(sourceType.equals("Book"))
// entry.setType(BibtexEntryType.BOOK);
// else if(sourceType.equals("BookSection"))
// entry.setType(BibtexEntryType.INBOOK);
// else if(sourceType.equals("JournalArticle"))
// entry.setType(BibtexEntryType.ARTICLE);
// else if(sourceType.equals("ArticleInAPeriodical"))
// entry.setType(BibtexEntryType.ARTICLE);
// else if(sourceType.equals("ConferenceProceedings"))
// entry.setType(BibtexEntryType.CONFERENCE);
// else if(sourceType.equals("Report"))
// entry.setType(BibtexEntryType.TECHREPORT);
// else if(sourceType.equals("InternetSite"))
// entry.setType(BibtexEntryType.OTHER);
// else if(sourceType.equals("DocumentFromInternetSite"))
// entry.setType(BibtexEntryType.OTHER);
// else if(sourceType.equals("DocumentFromInternetSite"))
// entry.setType(BibtexEntryType.OTHER);
// else if(sourceType.equals("ElectronicSource"))
// entry.setType(BibtexEntryType.OTHER);
// else if(sourceType.equals("Art"))
// entry.setType(BibtexEntryType.OTHER);
// else if(sourceType.equals("SoundRecording"))
// entry.setType(BibtexEntryType.OTHER);
// else if(sourceType.equals("Performance"))
// entry.setType(BibtexEntryType.OTHER);
// else if(sourceType.equals("Film"))
// entry.setType(BibtexEntryType.OTHER);
// else if(sourceType.equals("Interview"))
// entry.setType(BibtexEntryType.OTHER);
// else if(sourceType.equals("Patent"))
// entry.setType(BibtexEntryType.OTHER);
// else if(sourceType.equals("Case"))
// entry.setType(BibtexEntryType.OTHER);
// else if(sourceType.equals("Misc"))
// entry.setType(BibtexEntryType.MISC);
// else
// entry.setType(BibtexEntryType.MISC);
HashMap<String, String> hm = new HashMap<String, String>();
if(tag != null)
hm.put("bibtexkey",tag);
// if(GUID != null)
// hm.put("GUID",GUID);
if(LCID >= 0)
hm.put("language",getLanguage(LCID));
if(title != null)
hm.put("title",title);
if(year != null)
hm.put("year",year);
if(shortTitle != null)
hm.put(MSBIB+"shorttitle",shortTitle);
if(comments != null)
hm.put("note",comments);
addAuthor(hm,"author",authors);
addAuthor(hm,MSBIB+"bookauthor",bookAuthors);
addAuthor(hm,"editor",editors);
addAuthor(hm,MSBIB+"translator",translators);
addAuthor(hm,MSBIB+"producername",producerNames);
addAuthor(hm,MSBIB+"composer",composers);
addAuthor(hm,MSBIB+"conductor",conductors);
addAuthor(hm,MSBIB+"performer",performers);
addAuthor(hm,MSBIB+"writer",writers);
addAuthor(hm,MSBIB+"director",directors);
addAuthor(hm,MSBIB+"compiler",compilers);
addAuthor(hm,MSBIB+"interviewer",interviewers);
addAuthor(hm,MSBIB+"interviewee",interviewees);
addAuthor(hm,MSBIB+"inventor",inventors);
addAuthor(hm,MSBIB+"counsel",counsels);
if(pages !=null )
hm.put("pages",pages.toString("--"));
if(volume !=null )
hm.put("volume",volume);
if(numberOfVolumes !=null )
hm.put(MSBIB+"numberofvolume",numberOfVolumes);
if(edition !=null )
hm.put("edition",edition);
if(edition !=null )
hm.put("edition",edition);
parseStandardNumber(standardNumber,hm);
if(publisher !=null )
hm.put("publisher",publisher);
if(publisher !=null )
hm.put("publisher",publisher);
if(address !=null )
hm.put("address",address);
if(bookTitle !=null )
hm.put("booktitle",bookTitle);
if(chapterNumber !=null )
hm.put("chapter",chapterNumber);
if(journalName !=null )
hm.put("journal",journalName);
if(issue !=null )
hm.put("number",issue);
if(periodicalTitle !=null )
hm.put("organization",periodicalTitle);
if(conferenceName !=null )
hm.put("organization",conferenceName);
if(department !=null )
hm.put("school",department);
if(institution !=null )
hm.put("institution",institution);
// if(thesisType !=null )
// hm.put("type",thesisType);
// if(internetSiteTitle !=null )
// hm.put("title",internetSiteTitle);
if(dateAccessed !=null )
hm.put(MSBIB+"accessed",dateAccessed);
if(url !=null )
hm.put("url",url);
if(productionCompany !=null )
hm.put(MSBIB+"productioncompany",productionCompany);
// if(publicationTitle !=null )
// hm.put("title",publicationTitle);
if(medium !=null )
hm.put(MSBIB+"medium",medium);
// if(albumTitle !=null )
// hm.put("title",albumTitle);
if(recordingNumber !=null )
hm.put(MSBIB+"recordingnumber",recordingNumber);
if(theater !=null )
hm.put(MSBIB+"theater",theater);
if(distributor !=null )
hm.put(MSBIB+"distributor",distributor);
// if(broadcastTitle !=null )
// hm.put("title",broadcastTitle);
if(broadcaster !=null )
hm.put(MSBIB+"broadcaster",broadcaster);
if(station !=null )
hm.put(MSBIB+"station",station);
if(type !=null )
hm.put(MSBIB+"type",type);
if(patentNumber !=null )
hm.put(MSBIB+"patentnumber",patentNumber);
if(court !=null )
hm.put(MSBIB+"court",court);
if(reporter !=null )
hm.put(MSBIB+"reporter",reporter);
if(caseNumber !=null )
hm.put(MSBIB+"casenumber",caseNumber);
if(abbreviatedCaseNumber !=null )
hm.put(MSBIB+"abbreviatedcasenumber",abbreviatedCaseNumber);
if(bibTex_Series !=null )
hm.put("series",bibTex_Series);
if(bibTex_Abstract !=null )
hm.put("abstract",bibTex_Abstract);
if(bibTex_KeyWords !=null )
hm.put("keywords",bibTex_KeyWords);
if(bibTex_CrossRef !=null )
hm.put("crossref",bibTex_CrossRef);
if(bibTex_HowPublished !=null )
hm.put("howpublished",bibTex_HowPublished);
if(bibTex_Affiliation !=null )
hm.put("affiliation",bibTex_Affiliation);
if(bibTex_Contents !=null )
hm.put("contents",bibTex_Contents);
if(bibTex_Copyright !=null )
hm.put("copyright",bibTex_Copyright);
if(bibTex_Price !=null )
hm.put("price",bibTex_Price);
if(bibTex_Size !=null )
hm.put("size",bibTex_Size);
entry.setField(hm);
return entry;
}
/**
* This method ensures that the output String has only
* valid XML unicode characters as specified by the
* XML 1.0 standard. For reference, please see
* <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the
* standard</a>. This method will return an empty
* String if the input is null or empty.
*
* URL: http://cse-mjmcl.cse.bris.ac.uk/blog/2007/02/14/1171465494443.html
*
* @param in The String whose non-valid characters we want to remove.
* @return The in String, stripped of non-valid characters.
*/
public String stripNonValidXMLCharacters(String in) {
StringBuffer out = new StringBuffer(); // Used to hold the output.
char current; // Used to reference the current character.
if (in == null || ("".equals(in))) return ""; // vacancy test.
for (int i = 0; i < in.length(); i++) {
current = in.charAt(i); // NOTE: No IndexOutOfBoundsException caught here; it should not happen.
if ((current == 0x9) ||
(current == 0xA) ||
(current == 0xD) ||
((current >= 0x20) && (current <= 0xD7FF)) ||
((current >= 0xE000) && (current <= 0xFFFD)) ||
((current >= 0x10000) && (current <= 0x10FFFF)))
out.append(current);
}
return out.toString();
}
/*
* render as XML
*
* TODO This is untested.
*/
public String toString() {
StringWriter sresult = new StringWriter();
try {
DOMSource source = new DOMSource(getDOMrepresentation());
StreamResult result = new StreamResult(sresult);
Transformer trans = TransformerFactory.newInstance().newTransformer();
trans.setOutputProperty(OutputKeys.INDENT, "yes");
trans.transform(source, result);
}
catch (Exception e) {
throw new Error(e);
}
return sresult.toString();
}
}