org.w3c.dom.Document xml = taskFile == null ? dBuilder.parse(System.in) : dBuilder.parse(new FileInputStream(taskFile));
JAXBContext context = JAXBContext.newInstance(BM25.class);
Unmarshaller um = context.createUnmarshaller();
RetrievalModel model = null;
QuerySet querySet = null;
for(Element child: XMLUtils.elements(xml.getDocumentElement().getChildNodes())) {
if (XMLUtils.is(child, ADHOC_MODEL)) {
for(Element grandchild: XMLUtils.elements(child.getChildNodes())) {
model = (RetrievalModel) um.unmarshal(grandchild);
break;
}
}
if (XMLUtils.is(child, TOPICS)) {
final String type = child.getAttribute("type");
switch(type) {
case "trec":
try(BufferedReader reader = new BufferedReader(new FileReader(child.getAttribute("path")))) {
querySet = TRECTopic.readTopics(reader, false);
}
break;
default:
throw new RuntimeException(String.format("Cannot handle topics of type %s", type));
}
// Do something
}
}
if (model == null)
throw new IllegalArgumentException("No model was present in the XML description file");
if (querySet == null)
throw new IllegalArgumentException("No topics were present in the XML description file");
logger.info(String.format("Starting with model [%s] and %d topics", model, querySet.queries().size()));
// Dicarded documents
TRECJudgments discarded = discardedQRELFile == null ? null
: new TRECJudgments(discardedQRELFile);
// Queries
Set<String> topicIds = GenericHelper.newHashSet();
Map<String, ? extends Topic> topics = querySet.queries();
for (String id : topics.keySet()) {
logger.debug(new LazyString("Considering topic %s (%b/%b/%b)", id, topics.keySet()
.contains(id), onlyTopics.isEmpty(), onlyTopics
.contains(id)));
if (topics.keySet().contains(id)
&& (onlyTopics.isEmpty() || onlyTopics.contains(id))) {
topicIds.add(id);
}
}
if (topicIds.isEmpty()) {
logger.error("No topics to be answered");
return 1;
}
// Iterates on topics
timer.start();
TaskTimer.Task task = timer.new Task("Answering topics", "topics",
topicIds.size());
PrintStream output = System.out;
model.init(collection, index);
int totalRetrieved = 0;
for (String topicId : topicIds) {
logger.info(String.format("Answering topic %s", topicId));
Topic topic = topics.get(topicId);
ObjectArrayList<DocumentScoreInfo<Reference2ObjectMap<Index, SelectedInterval[]>>> results = new ObjectArrayList<>();
Set<String> discardedDocuments = null;
if (discarded != null) {
Map<String, Integer> map = discarded.get(topicId);
if (map != null)
discardedDocuments = map.keySet();
}
// Ask for results (add some documents in case we discard some
// after)
model.process(topic, results,
capacity
+ (discardedDocuments == null ? 0
: discardedDocuments.size()), timer);
final int retrieved = results.size();