/* Copyright (C) Abu Rizal, 2009.
*
* This file is part of QurText.
*
* QurText is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* QurText is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with QurText. If not, see <http://www.gnu.org/licenses/>.
*/
package qurtext.factory;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.jdo.PersistenceManager;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.json.JSONException;
import org.json.JSONObject;
import org.xml.sax.InputSource;
import qurtext.domain.Chapter;
import qurtext.domain.Section;
import qurtext.domain.Verse;
public class SectionFactory {
private static final String TRANSLATOR = "Free_Minds";
static final HashMap<Character,Character> buckwalter = new HashMap<Character, Character>();
static {
buckwalter.put('\u0621','\'');
buckwalter.put('\u0622','|');
buckwalter.put('\u0623','>');
buckwalter.put('\u0624','&');
buckwalter.put('\u0625','<');
buckwalter.put('\u0626','}');
buckwalter.put('\u0627','A');
buckwalter.put('\u0628','b');
buckwalter.put('\u0629','p');
buckwalter.put('\u062A','t');
buckwalter.put('\u062B','v');
buckwalter.put('\u062C','j');
buckwalter.put('\u062D','H');
buckwalter.put('\u062E','x');
buckwalter.put('\u062F','d');
buckwalter.put('\u0630','*');
buckwalter.put('\u0631','r');
buckwalter.put('\u0632','z');
buckwalter.put('\u0633','s');
buckwalter.put('\u0634','$');
buckwalter.put('\u0635','S');
buckwalter.put('\u0636','D');
buckwalter.put('\u0637','T');
buckwalter.put('\u0638','Z');
buckwalter.put('\u0639','E');
buckwalter.put('\u063A','g');
buckwalter.put('\u0640','_');
buckwalter.put('\u0641','f');
buckwalter.put('\u0642','q');
buckwalter.put('\u0643','k');
buckwalter.put('\u0644','l');
buckwalter.put('\u0645','m');
buckwalter.put('\u0646','n');
buckwalter.put('\u0647','h');
buckwalter.put('\u0648','w');
buckwalter.put('\u0649','Y');
buckwalter.put('\u064A','y');
buckwalter.put('\u064B','F');
buckwalter.put('\u064C','N');
buckwalter.put('\u064D','K');
buckwalter.put('\u064E','a');
buckwalter.put('\u064F','u');
buckwalter.put('\u0650','i');
buckwalter.put('\u0651','~');
buckwalter.put('\u0652','o');
buckwalter.put('\u0670','`');
buckwalter.put('\u0671','{');
buckwalter.put('\u067E','P');
buckwalter.put('\u0686','J');
buckwalter.put('\u06A4','V');
buckwalter.put('\u06AF','G');
};
static final String BAMA_URL_STRING = "http://www.xrce.xerox.com/cgi-bin/mltt/arabic/xarabic.pl?showglosses=YES&showscript=NO&showqpart=YES&showimpv=YES&show3pfplv=YES&show2pfsgv=YES&show1psgv=YES&show1psgposs=YES&showallcase=YES&showallmood=YES&itranslitname=Buckwalter&input=";
static final String GOOGLE_TRANSLATE_URL_STRING = "http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&langpair=ar%7Cen&q=";
private static final int WORD_COUNT_PER_REQUEST = 10;
private SourceFactory sourceFactory;
private TranslationParser translationParser;
public SectionFactory() {
sourceFactory=new SourceFactory();
}
public void initSectionVerses(int chapterNo, int verseNo) {
translationParser= new TranslationParser("WEB-INF/quran.txt");
PersistenceManager pm = PMF.get().getPersistenceManager();
try {
Section section = getSection(chapterNo, verseNo, pm);
JSONObject verses = getYaqbVerse(section);
Verse previousVerse=null;
String previousTransliteration="";;
for (int i=section.getStartVerse();i<=section.getEndVerse();i++) {
JSONObject versions=verses.getJSONObject("" + section.getChapterNo() + ":" + i);
String arabic = getTanzilText(section.getChapterNo(), i);
String transliteration = versions.getString("Transliteration");
transliteration=transliteration.replaceAll("[A][A]", "'");
transliteration=transliteration.replaceAll("[<][b][>]", "<s>");
transliteration=transliteration.replaceAll("[ ][<][/][b][>]", "</s>");
transliteration=transliteration.replaceAll("[<][/][b][>]", "</s>");
transliteration=transliteration.replaceAll("[<][s][t][r][o][n][g][>]", "<s>");
transliteration=transliteration.replaceAll("[<][/][s][t][r][o][n][g][>]", "</s>");
String token="" + section.getChapterNo() + ":" + i;
String translation = versions.getString(TRANSLATOR);
if ("Free_Minds".equals(TRANSLATOR)) {
translation=translationParser.getTranslations().get(token).trim();
}
TreeSet<String> topicList = translationParser.getVerseTopics().get(token);
String topics = flattenStringCollection(topicList);
transliteration = repairShiftedTransliteration(arabic,
transliteration);
Verse verse=section.getVerse(chapterNo,i);
if (null==verse) {
verse=new Verse();
section.addVerse(verse);
}
verse.setChapterNo(chapterNo);
verse.setVerseNo(i);
verse.setText(arabic);
verse.setUthmani(getTanzilUthmani(section.getChapterNo(), i));
verse.setTransliteration(transliteration);
verse.setTranslation(translation);
verse.setTopics(topics);
pm.makePersistent(verse);
if (previousTransliteration.equals(transliteration)) {//suspicious;
repairTransliteration(verse);
repairTransliteration(previousVerse);
}
previousTransliteration = transliteration;
previousVerse=verse;
}
} catch (JSONException e) {
e.printStackTrace();
} finally {
pm.close();
}
}
private String flattenStringCollection(Collection<String> texts) {
StringBuffer result=new StringBuffer();
if (null!=texts)
for (String text:texts) {
result.append(text);
result.append(',');
}
if (result.length()>0) result.setLength(result.length()-1);
return result.toString();
}
private JSONObject getYaqbVerse(Section section) throws JSONException {
StringBuffer message = new StringBuffer();
for (int i=section.getStartVerse();i<=section.getEndVerse();i++) {
message.append(section.getChapterNo());
message.append(":");
message.append(i);
message.append(" ");
}
// message=URLEncoder.encode(message, "UTF-8");
String urlPath = "http://www.yaqb.org/lookupReferences";
String params = "lookup_input=" + message;
// http://www.yaqb.org/lookupReferences?lookup_input=1:5+1:1 (Post operation)
String result = sourceFactory.getSourceContent(urlPath, params, true, "yaqb="+message);
JSONObject jsRoot = new JSONObject(result);
JSONObject verses=jsRoot.getJSONObject("verses");
return verses;
}
private String getTanzilText(int chapterNo, int verseNo) {
String filename = "WEB-INF/quran-simple.xml";
return getTanzil(chapterNo, verseNo, filename);
}
private String getTanzilUthmani(int chapterNo, int verseNo) {
String filename = "WEB-INF/quran-simple-enhanced.xml";
return getTanzil(chapterNo, verseNo, filename);
}
/**
* @param chapterNo
* @param verseNo
* @param filename
* @return
*/
private String getTanzil(int chapterNo, int verseNo, String filename) {
File xmlDocument =
new File(filename);
XPathFactory factory = new org.apache.xpath.jaxp.XPathFactoryImpl();
XPath xPath=factory.newXPath();
try {
XPathExpression xPathExpression=
xPath.compile("/quran/sura[@index='"+chapterNo+"']/aya[@index='"+verseNo+"']/@text");
InputSource inputSource =
new InputSource(new
FileInputStream(xmlDocument));
return xPathExpression.evaluate(inputSource);
} catch (XPathExpressionException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
return null;
}
@SuppressWarnings("unchecked")
private Section getSection(int chapterNo, int verseNo, PersistenceManager pm) {
String query = "select from " + Section.class.getName() + " where chapterNo==" + chapterNo + " && startVerse==" + verseNo;
Section section1 = ((Collection<Section>) pm.newQuery(query)
.execute()).iterator().next();
return section1;
}
public Section getSection(int chapterNo, int verseNo) {
PersistenceManager pm = PMF.get().getPersistenceManager();
try {
return getSection(chapterNo,verseNo,pm);
} catch (NoSuchElementException e) {
return null;
} finally {
pm.close();
}
}
private String unicodeToBuckwalter(String arabic) {
StringBuffer result=new StringBuffer(arabic.length());
for (char c:arabic.toCharArray()) {
Character newC=buckwalter.get(c);
if (null==newC) newC=c;
result.append(newC);
}
return result.toString();
}
private void repairTransliteration(Verse verse) {
if (verse.getChapterNo()==2 && verse.getVerseNo()==16) {
verse.setTransliteration("Ol<u>a</u>-ika alla<u>th</u>eena ishtarawoo a<s>l</s><u>dd</u>al<u>a</u>lata bi<s>a</s>lhud<u>a</u> fam<u>a</u> rabi<u>h</u>at tij<u>a</u>ratuhum wam<u>a</u> k<u>a</u>noo muhtadeen<s>a</s>");
return;
} else if (verse.getChapterNo()==2 && verse.getVerseNo()==12) {
verse.setTransliteration("Al<u>a</u> innahum humu almufsidoona wal<u>a</u>kin l<u>a</u> yash'uroon<s>a</s>");
return;
} else if (verse.getChapterNo()==7 && verse.getVerseNo()==45) {
verse.setTransliteration("Alla<u>th</u>eena ya<u>s</u>uddoona 'an sabeeli All<u>a</u>hi wayabghoonah<u>a</u> 'iwajan wahum bi<s>a</s>l-<u>a</u>khirati k<u>a</u>firoon<s>a</s>");
return;
} else if (verse.getChapterNo()==12 && verse.getVerseNo()==12) {
verse.setTransliteration("Arsilhu ma'an<u>a</u> ghadan yarta' wayal'ab wa-inn<u>a</u> lahu la<u>ha</u>fi<i><u>th</u></i>oon<s>a</s>");
return;
}
verse.setTransliteration(markTransliteration(verse.getText(), verse.getTransliteration()));
}
private String repairShiftedTransliteration(String arabic,
String transliteration) {
int arabicLength = arabic.split("[ ]").length;
int translitLength = transliteration.split("[ ]").length;
if (translitLength==arabicLength) return transliteration;
if (translitLength>arabicLength && transliteration.startsWith("Awa ")) {
transliteration=transliteration.replaceFirst("[ ]", "");
} else if (translitLength>arabicLength && transliteration.indexOf(" awa ")>=0) {
transliteration=transliteration.replaceAll("[ ][a][w][a][ ]", " awa");
} else if (translitLength>arabicLength && transliteration.indexOf("ayna m<u>a</u>")>=0) { //33:61
transliteration=transliteration.replaceAll("[a][y][n][a][ ][m][<][u][>][a][<][/][u][>]", "aynam<u>a</u>");
} else if (translitLength>arabicLength && transliteration.indexOf(" ba'da m<u>a</u> ")>=0) {
transliteration=transliteration.replaceAll("[ ][b][a]['][d][a][ ][m][<][u][>][a][<][/][u][>][ ]", " ba'dam<u>a</u> ");
} else if (translitLength>arabicLength && transliteration.indexOf(" likay l<u>a</u> ")>=0) {
transliteration=transliteration.replaceAll("[ ][l][i][k][a][y][ ][l][<][u][>][a][<][/][u][>][ ]", " likayl<u>a</u> ");
} else if (translitLength<arabicLength && transliteration.indexOf("aynam<u>a</u>")>=0) {
transliteration=transliteration.replaceAll("[a][y][n][a][m][<][u][>][a][<][/][u][>]", "ayna m<u>a</u>");
} else if (translitLength<arabicLength && transliteration.startsWith("H<u>a</u>antum")) {
transliteration=transliteration.replaceAll("[H][<][u][>][a][<][/][u][>][a][n][t][u][m]", "H<u>a</u> antum");
} else if (translitLength<arabicLength && transliteration.indexOf("feem<u>a</u>")>=0) {
transliteration=transliteration.replaceAll("[f][e][e][m][<][u][>][a][<][/][u][>]", "fee m<u>a</u>");
} else if (translitLength<arabicLength && transliteration.startsWith("Arsilhu")) { //12:11
transliteration="Q<u>a</u>loo y<u>a</u> ab<u>a</u>n<u>a</u> m<u>a</u> laka l<u>a</u> ta/mann<u>a</u> 'al<u>a</u> yoosufa wa-inn<u>a</u> lahu lan<u>as</u>i<u>h</u>oon<s>a</s>";
} else if (translitLength<arabicLength && transliteration.indexOf("yabnaomma")>=0) { //20:94
transliteration=transliteration.replaceAll("[y][a][b][n][a][o][m][m][a]", "ya bna omma");
} else if (translitLength<arabicLength && transliteration.indexOf("mimm<u>a</u>")>=0) { //30:28
transliteration=transliteration.replaceAll("[m][i][m][m][<][u][>][a][<][/][u][>]", "min m<u>a</u>");
} else if (translitLength<arabicLength && transliteration.indexOf("m<u>a</u>lee")>=0) { //40:41
transliteration=transliteration.replaceAll("[m][<][u][>][a][<][/][u][>][l][e][e]", "m<u>a</u> lee");
} else if (translitLength<arabicLength && transliteration.indexOf("amman")>=0) { //41:40
transliteration=transliteration.replaceAll("[a][m][m][a][n]", "am man");
} else if (translitLength<arabicLength && transliteration.startsWith("Waallawi")) { //72:16
transliteration=transliteration.replaceAll("[W][a][a][l][l][a][w][i]", "Waa llawi");
}
return transliteration;
}
private String markTransliteration(String arabic, String transliteration) {
int arabicLength = arabic.split("[ ]").length;
if (arabicLength!=transliteration.split("[ ]").length) {
transliteration = "";
for (int j=0;j<arabicLength;j++) {
transliteration+="TODO ";
}
transliteration+="TODO";
}
return transliteration;
}
@SuppressWarnings("unchecked")
public List<Section> getAllSectionList() {
PersistenceManager pm = PMF.get().getPersistenceManager();
try {
String query = "select from " + Section.class.getName() + " order by sectionNo";
List<Section> resultList = new ArrayList<Section>();
resultList.addAll((Collection<Section>) pm.newQuery(query)
.execute());
return resultList;
} finally {
pm.close();
}
}
@SuppressWarnings("unchecked")
public String initSectionLiterals(String start) {
PersistenceManager pm = PMF.get().getPersistenceManager();
try {
String[] location=start.split("[:]");
int chapterNo=Integer.valueOf(location[0]);
int verseNo=Integer.valueOf(location[1]);
int wordNo=Integer.valueOf(location[2]);
Chapter chapter = ((Collection<Chapter>) pm.newQuery("select from " + Chapter.class.getName() + " where chapterNo==" + chapterNo)
.execute()).iterator().next();
for (Section section:chapter.getSections()) {
if (section.getStartVerse()>verseNo || section.getEndVerse()<verseNo) continue;
for (Verse verse:section.getVerses()) {
if (verse.getVerseNo()!=verseNo) continue;
String[] texts = verse.getText().replaceAll("[\u0640]", "").split("[ ]");
int maxWordPerRequest=wordNo+WORD_COUNT_PER_REQUEST;
while (wordNo<texts.length && wordNo<maxWordPerRequest) {
if (wordNo==0) verse.setLiteral(unicodeToBuckwalter(verse.getText().replaceAll("[\u0640]", "")));
String[] words=verse.getLiteral().trim().split("[ ]");
String bamaResult=updateLiteralBama(words[wordNo]);
if (null==bamaResult) {
String googleResult=updateLiteralGoogle(texts[wordNo]);
if (null!=googleResult) {
words[wordNo]=googleResult;
}
} else {
if ( ! "SKIP".equals(bamaResult))
words[wordNo]=bamaResult;
}
StringBuffer result=new StringBuffer();
for (String word:words) {
result.append(word);
result.append(" ");
}
verse.setLiteral(result.toString().trim());
wordNo++;
}
if (wordNo>=texts.length) {
wordNo=0;
verseNo++;
}
if (verseNo>section.getEndVerse()) return null;
return ""+chapterNo+":"+verseNo+":"+wordNo;
}
}
} finally {
pm.close();
}
return null;
}
private String updateLiteralGoogle(String text) {
String result=null;
JSONObject jsRoot;
try {
String[] urlParts = (GOOGLE_TRANSLATE_URL_STRING + URLEncoder.encode(text,"UTF-8")).split("[?]");
result=sourceFactory.getSourceContent(urlParts[0], urlParts[1],false, "google="+text);
jsRoot = new JSONObject(result);
JSONObject responseData=jsRoot.getJSONObject("responseData");
result=responseData.getString("translatedText");
if (result==null) return null;
if ("".equals(result)) return null;
result=result.replaceAll("[ ]", "-");
} catch (JSONException e) {
return null;
} catch (UnsupportedEncodingException e) {
return null;
}
return result;
}
// static private final Pattern p = Pattern.compile("[<][T][R][ ][V][A][L][I][G][N][=][\"][t][o][p][\"][ ][A][L][I][G][N][=][\"][l][e][f][t][\"][>][<][T][D][>].*[<][/][T][D][>][<][/][T][R][>]");
static private final Pattern p = Pattern.compile("[<][T][A][B][L][E].*[<][/][T][A][B][L][E][>]");
//Input Word:</FONT> AloHamodu<HR>
//<TR VALIGN="top" ALIGN="left"><TD>the</TD><TD>commendation<BR>praise</TD></TR>
//<FONT SIZE=4>Solution 2:</FONT> Aalr~aHom`n<PRE>{al-raHom`n Funcwa</PRE>
private String updateLiteralBama(String buckString) {
String[] urlParts = (BAMA_URL_STRING+buckString.replaceAll("&","%26")).split("[?]");
String result=sourceFactory.getSourceContent(urlParts[0], urlParts[1],false, "bama="+buckString);
Matcher m = p.matcher(result);
if (m.find()) {
result = m.group(0);
result=result.replaceAll("[<][/][T][D][>][<][T][D][>]", "-");
result=result.replaceAll("[<][B][R][>]", "/");
result=result.replaceAll("[<F][O][N][T][ ][S][I][Z][E][=][4][>][S][o][l][u][t][i][o][n][ ].*?[<][/][P][R][E][>]", "");
result=result.replaceAll("\\<.*?\\>", "");
result=result.replaceAll("[ ]", "-");
} else {
return null;
}
if (result.length()>300) return "SKIP";
return result;
}
}