public Variable execute(Scraper scraper, ScraperContext context) {
ScriptEngine scriptEngine = scraper.getScriptEngine();
BaseElementDef patternDef = regexpDef.getRegexpPatternDef();
Variable patternVar = getBodyTextContent(patternDef, scraper, context, true);
debug(patternDef, scraper, patternVar);
BaseElementDef sourceDef = regexpDef.getRegexpSourceDef();
Variable source = new BodyProcessor(sourceDef).run(scraper, context);
debug(sourceDef, scraper, source);
String replace = BaseTemplater.execute( regexpDef.getReplace(), scriptEngine);
boolean isReplace = CommonUtil.isBooleanTrue(replace);
boolean flagCaseInsensitive = CommonUtil.getBooleanValue( BaseTemplater.execute(regexpDef.getFlagCaseInsensitive(), scriptEngine), false );
boolean flagMultiline = CommonUtil.getBooleanValue( BaseTemplater.execute(regexpDef.getFlagMultiline(), scriptEngine), false );
boolean flagDotall = CommonUtil.getBooleanValue( BaseTemplater.execute(regexpDef.getFlagDotall(), scriptEngine), true );
boolean flagUnicodecase = CommonUtil.getBooleanValue( BaseTemplater.execute(regexpDef.getFlagUnicodecase(), scriptEngine), true );
boolean flagCanoneq = CommonUtil.getBooleanValue( BaseTemplater.execute(regexpDef.getFlagCanoneq(), scriptEngine), false );
this.setProperty("Is replacing", String.valueOf(isReplace));
this.setProperty("Flag CaseInsensitive", String.valueOf(flagCaseInsensitive));
this.setProperty("Flag MultiLine", String.valueOf(flagMultiline));
this.setProperty("Flag DotAll", String.valueOf(flagDotall));
this.setProperty("Flag UnicodeCase", String.valueOf(flagUnicodecase));
this.setProperty("Flag CanonEq", String.valueOf(flagCanoneq));
String maxLoopsString = BaseTemplater.execute( regexpDef.getMax(), scriptEngine);
double maxLoops = Constants.DEFAULT_MAX_LOOPS;
if (maxLoopsString != null && !"".equals(maxLoopsString.trim())) {
maxLoops = Double.parseDouble(maxLoopsString);
}
this.setProperty("Max loops", String.valueOf(maxLoops));
int flags = 0;
if (flagCaseInsensitive) {
flags |= Pattern.CASE_INSENSITIVE;
}
if (flagMultiline) {
flags |= Pattern.MULTILINE;
}
if (flagDotall) {
flags |= Pattern.DOTALL;
}
if (flagUnicodecase) {
flags |= Pattern.UNICODE_CASE;
}
if (flagCanoneq) {
flags |= Pattern.CANON_EQ;
}
Pattern pattern = Pattern.compile(patternVar.toString(), flags);
List resultList = new ArrayList();
List bodyList = source.toList();
Iterator it = bodyList.iterator();
while (it.hasNext()) {
Variable currVar = (Variable) it.next();
String text = currVar.toString();
Matcher matcher = pattern.matcher(text);
int groupCount = matcher.groupCount();
StringBuffer buffer = new StringBuffer();
int index = 0;
while ( matcher.find() ) {
index++;
// if index exceeds maximum number of matching sequences exists the loop
if (maxLoops < index) {
break;
}
for (int i = 0; i <= groupCount; i++) {
context.put("_"+i, new NodeVariable(matcher.group(i)));
}
BaseElementDef resultDef = regexpDef.getRegexpResultDef();
Variable result = getBodyTextContent(resultDef, scraper, context, true);
debug(resultDef, scraper, result);
String currResult = (result == null) ? matcher.group(0) : result.toString();
if (isReplace) {
matcher.appendReplacement(buffer, currResult);
} else {
resultList.add(new NodeVariable(currResult));
}