package sagan.staticpage.support;
import sagan.Indexer;
import sagan.search.support.CrawledWebDocumentProcessor;
import sagan.search.support.CrawlerService;
import sagan.search.support.SearchService;
import sagan.support.StaticPagePathFinder;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.stream.Collectors;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
@Service
public class StaticPageIndexer implements Indexer<String> {
@Value(value = "${search.indexer.base_url:http://localhost:8080}")
private String baseUrl;
private final CrawlerService crawlerService;
private final StaticPagePathFinder staticPagePathFinder;
private final CrawledWebDocumentProcessor documentProcessor;
private static final Set<String> pagesToIgnore = new HashSet<>();
static {
pagesToIgnore.add("/error");
pagesToIgnore.add("/500");
pagesToIgnore.add("/404");
}
@Autowired
public StaticPageIndexer(CrawlerService crawlerService, SearchService searchService,
StaticPagePathFinder staticPagePathFinder) {
this.crawlerService = crawlerService;
this.staticPagePathFinder = staticPagePathFinder;
documentProcessor =
new CrawledWebDocumentProcessor(searchService, new LocalStaticPagesSearchEntryMapper());
}
@Override
public Iterable<String> indexableItems() {
try {
return staticPagePathFinder.findPaths().stream()
.filter(pagePaths -> !pagesToIgnore.contains(pagePaths.getUrlPath()))
.map(pagePaths -> baseUrl + pagePaths.getUrlPath())
.collect(Collectors.toList());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public void indexItem(String path) {
crawlerService.crawl(path, 0, documentProcessor);
}
@Override
public String counterName() {
return "static";
}
@Override
public String getId(String path) {
return path;
}
}