}
private List<Field> extractFieldsFromTable(String html) {
log.debug("extracting fields from table: {}", html);
List<Field> extractedFields = new ArrayList<Field>();
Source source = new Source(html);
source.fullSequentialParse();
int cellCount = source.getAllElements(HTMLElementName.TD).size();
int rowCount = source.getAllElements(HTMLElementName.TR).size();
log.debug("found {} cells in {} rows", cellCount, rowCount);
if (cellCount == (rowCount * 2)) {
Field lastField = null;
log.debug("cells.size: {}", cellCount);
List<Element> cells = source.getAllElements(HTMLElementName.TD);
for (int i = 0; i < cellCount; i++) {
Element labelElement = cells.get(i);
Element valueElement = cells.get(++i);
String label = labelElement.getTextExtractor().toString().trim().replaceAll(":$", "");
String value = getValueFieldText(valueElement);
log.debug("found field: {}={}", label, value);
if (StringUtils.isEmpty(label) && lastField != null) {
lastField.addValue(value);
} else {
lastField = new ScrapedField(label, value);
extractedFields.add(lastField);
}
}
} else {
List<String> headers = new ArrayList<String>();
List<Element> rows = source.getAllElements(HTMLElementName.TR);
for (Element row : rows) {
List<Element> headerElements = row.getAllElements(HTMLElementName.TH);
if (headerElements.size() > 0) {
headers.clear();
}