package cz.mallat.uasparser;
import cz.mallat.uasparser.fileparser.Entry;
import cz.mallat.uasparser.fileparser.PHPFileParser;
import cz.mallat.uasparser.fileparser.Section;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.concurrent.locks.ReentrantLock;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* User agent parser.
*
* @author oli
*
*/
public class UASparser {
private ReentrantLock lock = new ReentrantLock();
static final String INFO_URL = "http://user-agent-string.info";
private Map<String, RobotEntry> robotsMap;
private Map<Long, OsEntry> osMap;
private Map<Long, BrowserEntry> browserMap;
private Map<Long, String> browserTypeMap;
private Map<String, Long> browserRegMap;
private Map<Long, Long> browserOsMap;
private Map<Pattern, Long> osRegMap;
/**
* Use the given filename to load the definition file from the local filesystem
*
* @param localDefinitionFilename
* @throws IOException
*/
public UASparser(String localDefinitionFilename) throws IOException {
loadDataFromFile(new File(localDefinitionFilename));
}
/**
* Use the given inputstream to load the definition file from the local filesystem
*
* @param inputStreamToDefinitionFile
* @throws IOException
*/
public UASparser(InputStream inputStreamToDefinitionFile) throws IOException {
loadDataFromFile(inputStreamToDefinitionFile);
}
/**
* Constructor for inherented classes
*/
public UASparser() {
// empty
}
/**
* When a class inherents from this class, it probably has to override this method
*/
protected void checkDataMaps() throws IOException {
// empty for this base class
}
/**
* Parse the given user agent string and returns a UserAgentInfo object with the related data
*
* @param useragent
* @throws IOException
* may happen when the retrieval of the data file fails
* @return
*/
public UserAgentInfo parse(String useragent) throws IOException {
UserAgentInfo retObj = new UserAgentInfo();
if (useragent == null) {
return retObj;
}
useragent = useragent.trim();
// check that the data maps are up-to-date
checkDataMaps();
// first check if it's a robot
if (!processRobot(useragent, retObj)) {
// search for a browser on the browser regex patterns
boolean osFound = processBrowserRegex(useragent, retObj);
if (!osFound) {
// search the OS regex patterns for the used OS
processOsRegex(useragent, retObj);
}
}
return retObj;
}
/**
* Searches in the os regex table. if found a match copies the os data
*
* @param useragent
* @param retObj
*/
private void processOsRegex(String useragent, UserAgentInfo retObj) {
try {
lock.lock();
for (Map.Entry<Pattern, Long> entry : osRegMap.entrySet()) {
Matcher matcher = entry.getKey().matcher(useragent);
if (matcher.find()) {
// simply copy the OS data into the result object
Long idOs = entry.getValue();
OsEntry os = osMap.get(idOs);
if (os != null) {
os.copyTo(retObj);
}
break;
}
}
} finally {
lock.unlock();
}
}
/**
* Searchs in the browser regex table. if found a match copies the browser data and if possible os data
*
* @param useragent
* @param retObj
* @return
*/
private boolean processBrowserRegex(String useragent, UserAgentInfo retObj) {
try {
lock.lock();
boolean osFound = false;
for (Map.Entry<String, Long> entry : browserRegMap.entrySet()) {
Pattern pattern = Pattern.compile(entry.getKey(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
Matcher matcher = pattern.matcher(useragent);
if (matcher.find()) {
// if a browse was found...
Long idBrowser = entry.getValue();
// ... but the browser type from browser type map into the typ
copyType(retObj, idBrowser);
// get all the browser data from the browser map
BrowserEntry be = browserMap.get(idBrowser);
if (be != null) {
// first try to get the browser version from the first subgroup of the regex
String browserVersionInfo = null;
if (matcher.groupCount() > 0) {
browserVersionInfo = matcher.group(1);
}
// copy the browser data into the result
be.copyTo(retObj, browserVersionInfo);
}
// check if this browser has exactly one OS mapped
Long idOs = browserOsMap.get(idBrowser);
if (idOs != null) {
osFound = true;
OsEntry os = osMap.get(idOs);
if (os != null) {
os.copyTo(retObj);
}
}
break;
}
}
return osFound;
} finally {
lock.unlock();
}
}
/**
* Sets the source type, if possible
*
* @param retObj
* @param idBrowser
*/
private void copyType(UserAgentInfo retObj, Long idBrowser) {
try {
lock.lock();
BrowserEntry be = browserMap.get(idBrowser);
if (be != null) {
Long type = be.getType();
if (type != null) {
String typeString = browserTypeMap.get(type);
if (typeString != null) {
retObj.setTyp(typeString);
}
}
}
} finally {
lock.unlock();
}
}
/**
* Checks if the useragent comes from a robot. if yes copies all the data to the result object
*
* @param useragent
* @param retObj
* @return true if the useragent belongs to a robot, else false
*/
private boolean processRobot(String useragent, UserAgentInfo retObj) {
try {
lock.lock();
if (robotsMap.containsKey(useragent)) {
retObj.setTyp("Robot");
RobotEntry robotEntry = robotsMap.get(useragent);
robotEntry.copyTo(retObj);
if (robotEntry.getOsId() != null) {
OsEntry os = osMap.get(robotEntry.getOsId());
if (os != null) {
os.copyTo(retObj);
}
}
return true;
}
} finally {
lock.unlock();
}
return false;
}
/**
* loads the data file and creates all internal data structs
*
* @param definitionFile
* @throws IOException
*/
protected void loadDataFromFile(File definitionFile) throws IOException {
PHPFileParser fp = new PHPFileParser(definitionFile);
createInternalDataStructre(fp.getSections());
}
/**
* loads the data file and creates all internal data structs
*
* @param is
* @throws IOException
*/
protected void loadDataFromFile(InputStream is) throws IOException {
PHPFileParser fp = new PHPFileParser(is);
createInternalDataStructre(fp.getSections());
}
/**
* Creates the internal data structes from the seciontList
*
* @param sectionList
*/
protected void createInternalDataStructre(List<Section> sectionList) {
try {
lock.lock();
for (Section sec : sectionList) {
if ("robots".equals(sec.getName())) {
Map<String, RobotEntry> robotsMapTmp = new HashMap<String, RobotEntry>();
for (Entry en : sec.getEntries()) {
RobotEntry re = new RobotEntry(en.getData());
robotsMapTmp.put(re.getUserAgentString(), re);
}
robotsMap = robotsMapTmp;
} else if ("os".equals(sec.getName())) {
Map<Long, OsEntry> osMapTmp = new HashMap<Long, OsEntry>();
for (Entry en : sec.getEntries()) {
OsEntry oe = new OsEntry(en.getData());
osMapTmp.put(Long.parseLong(en.getKey()), oe);
}
osMap = osMapTmp;
} else if ("browser".equals(sec.getName())) {
Map<Long, BrowserEntry> browserMapTmp = new HashMap<Long, BrowserEntry>();
for (Entry en : sec.getEntries()) {
BrowserEntry be = new BrowserEntry(en.getData());
browserMapTmp.put(Long.parseLong(en.getKey()), be);
}
browserMap = browserMapTmp;
} else if ("browser_type".equals(sec.getName())) {
Map<Long, String> browserTypeMapTmp = new HashMap<Long, String>();
for (Entry en : sec.getEntries()) {
browserTypeMapTmp.put(Long.parseLong(en.getKey()), en.getData().iterator().next());
}
browserTypeMap = browserTypeMapTmp;
} else if ("browser_reg".equals(sec.getName())) {
Map<String, Long> browserRegMapTmp = new LinkedHashMap<String, Long>();
for (Entry en : sec.getEntries()) {
Iterator<String> it = en.getData().iterator();
browserRegMapTmp.put(convertPerlToJavaRegex(it.next()), Long.parseLong(it.next()));
}
browserRegMap = browserRegMapTmp;
} else if ("browser_os".equals(sec.getName())) {
Map<Long, Long> browserOsMapTmp = new HashMap<Long, Long>();
for (Entry en : sec.getEntries()) {
browserOsMapTmp.put(Long.parseLong(en.getKey()), Long.parseLong(en.getData().iterator().next()));
}
browserOsMap = browserOsMapTmp;
} else if ("os_reg".equals(sec.getName())) {
Map<Pattern, Long> osRegMapTmp = new LinkedHashMap<Pattern, Long>();
for (Entry en : sec.getEntries()) {
Iterator<String> it = en.getData().iterator();
Pattern pattern = Pattern.compile(convertPerlToJavaRegex(it.next()), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
osRegMapTmp.put(pattern, Long.parseLong(it.next()));
}
osRegMap = osRegMapTmp;
}
}
} finally {
lock.unlock();
}
}
/**
* Converts a PERL style regex into the Java style. That means in removes the leading and the last / and removes the modifiers
*
* @param regex
* @return
*/
private String convertPerlToJavaRegex(String regex) {
regex = regex.substring(1);
int lastIndex = regex.lastIndexOf('/');
regex = regex.substring(0, lastIndex);
return regex;
}
}