return html.replace("<", "<").replace(">", ">").replace(""", "\"").replace("&", "&");
}
public static FieldContentType parseFieldContentType(String ref, String html) {
ObjectFactory of = new ObjectFactory();
FieldContentType result = new FieldContentType();
result.setRef(ref);
int pos = html.indexOf('<');
while (pos != -1) {
if (pos != 0)
result.getContent().add(parseHTML(html.substring(0, pos)));
html = html.substring(pos);
if (html.startsWith("<br /><")) {
html = html.substring(6);
} else if (html.startsWith("<br />")) {
result.getContent().add(of.createFieldContentTypeUnnamedsubfield(of.createEmptyType()));
html = html.substring(6);
} else if (html.startsWith("<i><b>")) {
pos = html.indexOf("</b> </i>");
FieldContentType.Subfield sf = of.createFieldContentTypeSubfield();
sf.setEmph((short) 1);
sf.setName(parseHTML(html.substring(6, pos)));
result.getContent().add(of.createFieldContentTypeSubfield(sf));
html = html.substring(pos + 9);
} else if (html.startsWith("<i>")) {
pos = html.indexOf(" </i>");
FieldContentType.Subfield sf = of.createFieldContentTypeSubfield();
sf.setName(parseHTML(html.substring(3, pos)));
result.getContent().add(of.createFieldContentTypeSubfield(sf));
html = html.substring(pos + 5);
} else if (html.startsWith("<a href=\"https://www.cia.gov/library/publications/the-world-factbook/graphics/ref_maps/")) {
html = html.substring("<a href=\"https://www.cia.gov/library/publications/the-world-factbook/graphics/ref_maps/".length());
FieldContentType.Refmap refmap = of.createFieldContentTypeRefmap();
pos = html.indexOf("\">");
String file = html.substring(0, pos);
if (file.startsWith("pdf/")) {
file = file.substring(4);
} else {
file =":"+file.replace("/pdf/", "/");
}
refmap.setFile(file);
html = html.substring(pos + 2);
pos = html.indexOf("</a>");
refmap.setName(parseHTML(html.substring(0, pos)));
html = html.substring(pos + 4);
result.getContent().add(of.createFieldContentTypeRefmap(refmap));
} else {
throw new RuntimeException("Unparsable: " + html);
}
pos = html.indexOf('<');
}
if (html.length() > 0)
result.getContent().add(parseHTML(html));
return result;
}