/*
Copyright (C) 2011 maik.jablonski@jease.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package jease.cms.service;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import jease.cms.domain.Content;
import jease.cms.domain.Linkcheck;
import jfix.db4o.Database;
import jfix.functor.Command;
import jfix.util.Regexps;
import jfix.util.Urls;
public class Linkchecker implements Runnable {
private static final Pattern INTERNAL_URL_PATH = Pattern.compile(
"^(.*?)(;|#|\\?)(.*)$", Pattern.DOTALL | Pattern.MULTILINE);
private static int TIMEOUT = 60;
private static boolean active = false;
public static synchronized boolean isActive() {
return active;
}
public static synchronized void start() {
new Thread(new Linkchecker()).start();
}
public synchronized void run() {
if (active == false) {
try {
active = true;
clear();
check();
} finally {
active = false;
}
}
}
/**
* Deletes all linkcheck objects from database with given path.
*/
public static void clear() {
Database.write(new Command() {
public void run() {
for (Linkcheck linkcheck : Database.query(Linkcheck.class)) {
Database.delete(linkcheck);
}
}
});
}
/**
* Deletes all linkcheck objects from database with given path.
*/
public static void clear(String path) {
for (Linkcheck linkcheck : Database.query(Linkcheck.class)) {
if (path.equals(linkcheck.getPath())) {
Database.delete(linkcheck);
}
}
}
/**
* Performs full link check and saves status to database.
*/
public static void check() {
Map<String, Integer> linkStates = new HashMap();
for (Content content : Database.query(Content.class)) {
String fulltext = content.getFulltext().toString();
for (String url : extractUrls(fulltext)) {
int status;
if (linkStates.containsKey(url)) {
status = linkStates.get(url);
} else {
status = getStatus(content, url);
linkStates.put(url, status);
}
Database.save(new Linkcheck(content.getPath(), url, status));
}
}
}
/**
* For every URL in given content object perform a linkcheck and save status
* to the database.
*/
public static void check(Content content) {
String fulltext = content.getFulltext().toString();
for (String url : extractUrls(fulltext)) {
Database.save(new Linkcheck(content.getPath(), url, getStatus(
content, url)));
}
}
private static Set<String> extractUrls(String fulltext) {
return new HashSet<String>(Regexps.extractUrlsFromHtml(fulltext));
}
private static int getStatus(Content content, String url) {
if (url.startsWith("http:") || url.startsWith("https:")) {
return Urls.getStatus(url, TIMEOUT);
} else {
// Unknown protocol (e.g mailto: or file:) or scripting element
if (url.contains(":") || url.startsWith("<") || url.startsWith("$")) {
return -1;
}
Matcher matcher = INTERNAL_URL_PATH.matcher(url);
if (matcher.matches()) {
url = matcher.group(1);
}
if (url.startsWith("./~")) {
url = url.substring(3);
}
return content.getChild(url) != null ? 200 : 404;
}
}
}