package de.peacei.gae.foodsupplier.parser;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.Locale;
import java.util.TimeZone;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import de.peacei.gae.foodsupplier.data.Food;
import de.peacei.gae.foodsupplier.data.Mensa;
import de.peacei.gae.foodsupplier.data.Menu;
import de.peacei.gae.foodsupplier.data.Weekplan;
import de.peacei.gae.foodsupplier.util.CalendarUtil;
public abstract class AbstractMensaParser implements MensaParser{
protected Mensa mensa;
protected FoodConfig[] configs;
protected AbstractMensaParser(Mensa mensa, FoodConfig[] configs) {
this.mensa = mensa;
this.configs = configs;
}
protected Date getWeekdayForWeekAndIndex(Date week, int index) {
Calendar calendar = CalendarUtil.getCalendar(week);
calendar.set(Calendar.DAY_OF_WEEK, index + 2);
return calendar.getTime();
}
protected int parseWeekNumber(String html, int weekNumber) {
int kw = -1;
int start = 0;
int end = 0;
if(html!=null) {
try {
Matcher matcher = Pattern.compile("<tr>").matcher(html);
if(!matcher.find()) return -1;
if(!matcher.find(matcher.end())) return -1;
start = matcher.end();
matcher = Pattern.compile("</td>").matcher(html);
if(!matcher.find(start)) return -1;
end = matcher.start();
String kwString = html.substring(start, end);
matcher = Pattern.compile("<b>").matcher(kwString);
if(!matcher.find()) return -1;
start = matcher.end();
matcher = Pattern.compile("</b>").matcher(kwString);
if(!matcher.find()) return -1;
end = matcher.start();
matcher = Pattern.compile("\\d{1,2}").matcher(kwString);
if(!matcher.find(start)) return -1;
kw = Integer.parseInt(kwString.substring(matcher.start(), matcher.end()));
}
catch(Exception ex) {}
}
if(kw != weekNumber) return parseWeekNumberAlt(html, weekNumber);
return kw;
}
public int parseWeekNumberAlt(String html, int weekNumber) {
int kw = -1;
int start = 0;
int end = 0;
try {
Matcher matcher = Pattern.compile("<tr>").matcher(html);
if(!matcher.find()) return -1;
if(!matcher.find(matcher.end())) return -1;
if(!matcher.find(matcher.end())) return -1;
start = matcher.end();
matcher = Pattern.compile("</td>").matcher(html);
if(!matcher.find(start)) return -1;
start = matcher.end();
if(!matcher.find(matcher.end())) return -1;
end = matcher.start();
String datumString = html.substring(start, end);
matcher = Pattern.compile("<b>").matcher(datumString);
if(!matcher.find()) return -1;
start = matcher.end();
matcher = Pattern.compile("</b>").matcher(datumString);
if(!matcher.find()) return -1;
end = matcher.start();
datumString = datumString.substring(start, end);
String[] datumArr = datumString.split("\\.");
//GregorianCalendar datum = new GregorianCalendar(2012, 0, 17);
GregorianCalendar datum = new GregorianCalendar(
Integer.valueOf(datumArr[2]).intValue(),
Integer.valueOf(datumArr[1]).intValue()-1,
Integer.valueOf(datumArr[0]).intValue());
datum.setTimeZone(TimeZone.getTimeZone("Europe/Berlin"));
kw = datum.get(Calendar.WEEK_OF_YEAR);
} catch (Exception e) {}
if(kw != weekNumber) return -1;
return kw;
}
//@Override
public Weekplan readWeekplanForWeek(String html, int weekNumber) {
if(weekNumber == parseWeekNumber(html, weekNumber)) {
Weekplan weekplan = new Weekplan();
weekplan.setMensa(mensa);
weekplan.setWeek(weekNumber);
weekplan.setMenus(readMenus(html, new Date()));
return weekplan;
}
else return null;
}
protected Food[] parse(String html, String keyWord) {
int startIndex = 0;
int endIndex = 0;
int index = 0;
String htmlPart = null;
Matcher matcher = null;
Food[] food = new Food[5];
try {
startIndex = html.indexOf(keyWord);
index = html.indexOf("</tr>", startIndex)-1;
htmlPart = html.substring(startIndex,
html.indexOf("</tr>", startIndex)-1);
startIndex = 10;
String description;
byte type;
byte extra;
for (byte i=0; i<5; i++) {
type = Food.NO_TYPE;
extra = Food.NO_EXTRA;
startIndex = htmlPart.indexOf("<td", startIndex)+3;
endIndex = htmlPart.indexOf("</td>", startIndex);
description = htmlPart.substring(startIndex-3, endIndex);
// Zeilenumbruch durch Leerzeichen ersetzen
description=Pattern.compile("<br>").matcher(description).
replaceAll(" ");
// Parsen der Kategorie
matcher = Pattern.compile("\\([VGSRFWL]\\)").matcher(description);
if(matcher.find()) {
type = (byte) description.charAt(matcher.start()+1);
description = matcher.replaceAll("");
}
else {
matcher = Pattern.compile(
"(schwein\\.gif|Rind\\.gif|Vegetarisch\\.gif|Huhn\\.gif|" +
"fisch\\.gif|SunFlower\\.gif|=\" \"|Wild\\.gif|wild\\.gif|Lamm\\.gif|lamm\\.gif)").matcher(description);
if(matcher.find()) {
char c = description.charAt(matcher.start());
switch (c) {
case 'R': type = Food.BEEF; break;
case 'V': type = Food.VEGETARIAN; break;
case 'H': type = Food.POULTRY; break;
case 's': type = Food.PORK; break;
case 'f': type = Food.FISH; break;
case 'S': type = Food.VEGAN; break;
case '=': case 'w': case 'W': type = Food.GAME; break;
case 'l': case 'L': type = Food.LAMB; break;
default: type = Food.NO_TYPE;
}
description = matcher.replaceAll("");
}
}
// Entfernen der HTML-Tags
//description=Pattern.compile("<[^>]+>").matcher(description).
// replaceAll("");
description = Pattern.compile(">>(.*)<<").matcher(description).replaceAll("$1");
description = Pattern.compile("<[a-zA-Z0-9\\ \"\\-=#&?\\/\\.\\\\]*>").matcher(description).replaceAll("");
description = Pattern.compile("\\*").matcher(description).replaceAll("");
// Parsen und Entfernen der Tagessuppe bzw. des Desserts
matcher = Pattern.compile("Tagessuppe").matcher(description);
if(matcher.find()) {
description = matcher.replaceAll("");
extra = Food.SOUP;
}
else {
matcher = Pattern.compile("(Dessert|Tagesdessert)").matcher(description);
if(matcher.find()) {
description = matcher.replaceAll("");
extra = Food.DESSERT;
}
}
// Aufräumen der Setzung von Anführungszeichen
//matcher = Pattern.compile(""[a-zA-Z]"").matcher(description);
matcher = Pattern.compile("\"[a-zA-Zäöüß\\s]+\"").matcher(description);
if(matcher.find()) {
String tmp = matcher.group();
tmp = tmp.substring(1, tmp.length()-1);
tmp = "\""+tmp.trim()+"\"";
description = description.replaceAll("\"[a-zA-Zäöüß\\s]+\"", tmp);
}
// Entfernen doppelter Leerzeichen
description=Pattern.compile("\\s{2,}").matcher(description).
replaceAll(" ");
// Ersetzen des &-Zeichens
description=Pattern.compile("\\s&\\s").matcher(description).
replaceAll(" und ");
// Entfernen eines "und" am Ende
if (description.endsWith(" und ")) {
description = description.substring(0, description.length()-5);
}
else if (description.endsWith(" und")) {
description = description.substring(0, description.length()-4);
}
// Entfernen ungültiger Zeichen
description=Pattern.compile("([*<>\\.*+#&/]|nbsp;)+").matcher(description).
replaceAll("");
// Entfernen eines Leerzeichens am Beginn oder Ende
description = description.trim();
description = String.valueOf(description.charAt(0)).toUpperCase(Locale.GERMANY)+description.substring(1);
if(description.length()==0) description = "Entfällt";
for (FoodConfig foodConfig : configs) {
boolean match = false;
if (foodConfig.matchOnlyFirstLetter) {
match = keyWord.charAt(0) == foodConfig.keyword.charAt(0);
} else {
match = keyWord.equals(foodConfig.keyword);
}
if (match) {
food[i] = new Food(foodConfig.label, description, type, extra);
break;
}
}
}
} catch(Exception e) {
return null;
}
setPrices(food, html.substring(index+17, html.indexOf("</tr>", index+4)), keyWord);
return food;
}
protected void setPrices(Food[] food, String priceSources, String keyWord) {
Matcher matcher;
//int startIndex = 4;
//int endIndex = 4;
int startIndex = priceSources.indexOf("<td ", 0);
int endIndex = priceSources.indexOf("</td>", 0);
/*
if(mensa.getId().equals(Mensa.Airport.getId()) && keyWord.equals("ESSEN II")) {
startIndex = priceSources.indexOf("<td ", endIndex);
endIndex = priceSources.indexOf("</td>", startIndex);
}
*/
for(int i=0; i<5; i++) {
try {
food[i].setStudentPrice("---");
food[i].setStaffPrice("---");
for(int j=0; j<4; j++) {
switch(j) {
case 1:
String bla = priceSources.substring(startIndex, endIndex);
matcher = Pattern.compile("\\d,\\d\\d").matcher(bla);
if(matcher.find()) {
food[i].setStudentPrice(matcher.group().trim()+'€');
} break;
case 3:
String blub = priceSources.substring(startIndex, endIndex);
matcher = Pattern.compile("\\d,\\d\\d").matcher(blub);
if(matcher.find()) {
food[i].setStaffPrice(matcher.group().trim()+'€');
}
default: break;
}
startIndex = priceSources.indexOf("<td ", endIndex);
endIndex = priceSources.indexOf("</td>", startIndex);
}
}
catch(Exception ex) { }
}
}
protected List<Menu> readMenus(final String html, final Date week) {
List<Menu> menus = new ArrayList<Menu>();
if(html != null) {
Vector<Food[]> foods = new Vector<Food[]>(0);
Food[] foodArr;
for (FoodConfig foodConfig : configs) {
if((foodArr=parse(html, foodConfig.keyword))!=null)foods.add(foodArr);
}
for (int i = 0; i < 5; i++) {
Menu menu = new Menu();
menu.setDay(getWeekdayForWeekAndIndex(week, i));
for (Food[] foods2 : foods) {
menu.getFoods().add(foods2[i]);
}
menus.add(menu);
}
}
return menus;
}
protected static class FoodConfig {
private String keyword;
private String label;
private boolean matchOnlyFirstLetter;
public FoodConfig(String keyword, String label, boolean matchOnlyFirstLetter) {
this.keyword = keyword;
this.label = label;
this.matchOnlyFirstLetter = matchOnlyFirstLetter;
}
}
}