package org.untmpl;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.HashSet;
import java.util.Set;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.PrettyXmlSerializer;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.XmlSerializer;
/**
* A template common to two or more html pages.
*
* @author eskil.andreen
*
*/
public class Template {
private Set<Node> mapping;
private Template(Set<Node> mapping) {
this.mapping = mapping;
}
/**
* Cleans this template from the supplied document.
*
* @param document
* @return
* @throws IOException
* @throws TemplateNotFoundException
* if this template doesn't match the given document.
* @throws IOException
*/
public String clean(String document) throws IOException {
TagNode root = parse(document);
Traverser.traverse(root, new Pruner(mapping));
return write(root);
}
/**
* Serializes this template.
*
* @param writer
* @throws IOException
* @throws IOException
*/
public void serialize(Writer writer) throws IOException {
Node[] tmp = mapping.toArray(new Node[mapping.size()]);
for (int i = 0; i < tmp.length - 1; i++) {
writer.append(tmp[i].serialize() + "\n");
}
writer.append(tmp[tmp.length - 1].serialize());
}
/**
* Loads a serialized template.
*
* @param data
* @return
*/
public static Template unserialize(String serializedTemplate) {
String[] items = serializedTemplate.split("\n");
Set<Node> mapping = new HashSet<Node>(items.length);
for (String serializedPath : items) {
String cmps[] = serializedPath.split(",");
int hash = Integer.parseInt(cmps[0]);
int depth = Integer.parseInt(cmps[1]);
int height = Integer.parseInt(cmps[2]);
mapping.add(new Node(hash, depth, height));
}
return new Template(mapping);
}
private static HtmlCleaner getCleaner() {
HtmlCleaner cleaner = new HtmlCleaner();
cleaner.getProperties().setTranslateSpecialEntities(false);
cleaner.getProperties().setRecognizeUnicodeChars(false);
cleaner.getProperties().setUseEmptyElementTags(false);
return cleaner;
}
private static TagNode parse(String document) throws IOException {
return getCleaner().clean(document);
}
private static String write(TagNode node) throws IOException {
XmlSerializer serializer = new PrettyXmlSerializer(getCleaner()
.getProperties());
StringWriter writer = new StringWriter();
node.serialize(serializer, writer);
return writer.toString();
}
private static Set<Node> condense(String document) throws IOException {
TagNode root = parse(document);
Collector collector = Traverser.traverse(root, new Collector());
return new HashSet<Node>(collector.getNodes());
}
/**
* Creates the common template of the supplied documents.
*
* @param documents
* @return
* @throws IOException
*/
public static Template find(String[] documents) throws IOException {
assert documents.length >= 2;
Set<Node> T1 = condense(documents[0]);
Set<Node> T2 = condense(documents[1]);
System.out.println(T1.size());
System.out.println(T2.size());
T1.retainAll(T2);
for (int i = 2; i < documents.length; i++) {
T2 = condense(documents[i]);
T1.retainAll(T2);
System.out.println(T1.size());
}
return new Template(T1);
}
}