/**
* Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.tw;
import de.jetwick.util.Helper;
import de.jetwick.data.UrlEntry;
import de.jetwick.data.JTweet;
import java.net.URLEncoder;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.wicket.util.string.Strings;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class extracts links, users and hashtags of one tweet.
*
* Used for UI to render links, users and hashtags but also for indexing
* to detect users in retweets.
*
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public class Extractor {
private Logger logger = LoggerFactory.getLogger(Extractor.class);
protected JTweet tweet;
protected String text;
protected Map<Integer, UrlEntry> urlMap = new LinkedHashMap<Integer, UrlEntry>(3);
protected StringBuilder sb;
public Extractor setTweet(JTweet tweet) {
this.tweet = tweet;
Collection<UrlEntry> coll = tweet.getUrlEntries();
urlMap.clear();
for (UrlEntry e : coll) {
urlMap.put(e.getIndex(), e);
}
return setText(tweet.getText());
}
public Extractor setText(String text) {
this.text = text;
return this;
}
/**
*
* @deprecated use setText(str).run().toString instead
*/
public String toSaveHtml(String str) {
return setText(str).run().toString();
}
public Extractor run() {
if (text == null)
throw new NullPointerException("before usage set text via setText or indirectly via setTweet!");
sb = new StringBuilder();
int newLineCounter = 0;
for (int index = 0; index < text.length(); index++) {
if (text.charAt(index) == '@') {
// if @ is NOT at the beginning or if it could be part of an email:
if (index == 0 || index > 0 && !Character.isJavaIdentifierPart(text.charAt(index - 1))) {
int lastIndex = -1;
for (int i = index + 1; i < text.length(); i++) {
char c = text.charAt(i);
if (!Character.isJavaIdentifierPart(c)) {
lastIndex = i;
break;
}
}
if (lastIndex < 0)
lastIndex = text.length();
// preserve probably existing camel case (no toLowerCase)
String user = text.substring(index + 1, lastIndex).trim();
if (user.length() > 0) {
if (onNewUser(index, user)) {
index = lastIndex - 1;
continue;
}
}
}
} else if (text.charAt(index) == '#') {
// if # is NOT at the beginning or if it could be part of an http
if (index == 0 || index > 0 && !Character.isJavaIdentifierPart(text.charAt(index - 1))) {
int lastIndex = text.indexOf(" ", index + 1);
if (lastIndex < 0)
lastIndex = text.length();
String link = text.substring(index + 1, lastIndex).trim();
if (link.length() > 0) {
if (onNewHashTag(index, link)) {
index = lastIndex - 1;
continue;
}
}
}
} else if (text.charAt(index) == '\n') {
newLineCounter++;
// do not allow too 'high' tweets:
if (newLineCounter < 6)
sb.append("<br/>");
continue;
} else {
int lastIndex = onNewRawUrl(index, sb);
if (lastIndex > 0) {
index = lastIndex - 1;
continue;
}
}
// TODO allow bolding
sb.append(Strings.escapeMarkup("" + text.charAt(index)));
}
return this;
}
@Override
public String toString() {
if (sb == null)
return "";
return sb.toString();
}
public boolean onNewHashTag(int index, String tag) {
try {
tag = "#" + tag;
String cleanTag = Helper.stripOutLuceneHighlighting(tag);
cleanTag = URLEncoder.encode(cleanTag, Helper.UTF8);
String newLink = createTagMarkup(tag, cleanTag);
sb.append(newLink);
return true;
} catch (Exception ex) {
logger.warn("Cannot create link for " + tag, ex);
}
return false;
}
public String createTagMarkup(String tag, String cleanTag) {
return Helper.toJetwickSearch(tag, cleanTag);
}
public boolean onNewUser(int index, String user) {
try {
user = "@" + user;
String cleanUserName = Helper.stripOutLuceneHighlighting(user);
cleanUserName = URLEncoder.encode(cleanUserName, Helper.UTF8);
String newUser = Helper.toJetwickUser(user, cleanUserName);
sb.append(newUser);
return true;
} catch (Exception ex) {
logger.warn("Cannot create link for " + user, ex);
}
return false;
}
public int onNewRawUrl(int index, StringBuilder tmpSb) {
String tmpStr = text.substring(index);
int minLength = 0;
if (tmpStr.startsWith("http://"))
minLength = 7;
else if (tmpStr.startsWith("https://"))
minLength = 8;
else if (tmpStr.startsWith("www."))
minLength = 4;
if (minLength > 0) {
// if http starts NOT with a space
if (index == 0 || index > 0 && (text.charAt(index - 1) == ' ' || text.charAt(index - 1) == '\n')) {
int maxIter = text.length() - index;
if (maxIter > 0) {
StringBuilder sb = new StringBuilder(maxIter);
int lastIndex = index;
for (; lastIndex < text.length(); lastIndex++) {
char c = text.charAt(lastIndex);
if (c == ' ' || c == '\n' || c == '"')
break;
sb.append(c);
}
String url = sb.toString();
if (url.length() > minLength) {
String title = url;
UrlEntry entry = urlMap.get(index);
if (entry != null) {
if (lastIndex == entry.getLastIndex()) {
if (!Helper.isEmpty(entry.getResolvedTitle()))
title = Strings.escapeMarkup(entry.getResolvedTitle()).toString();
if(entry.getResolvedUrl() != null)
url = entry.getResolvedUrl();
}
}
tmpSb.append(toLink(url, title));
return lastIndex;
}
}
}
}
return -1;
}
int getUrlEntrySize() {
return urlMap.size();
}
public String toLink(String url, String title) {
if (url.startsWith("www."))
url = "http://" + url;
String shortTitle = title;
if (title.length() > 50)
shortTitle = title.substring(0, 47) + "...";
return createLinkMarkup(shortTitle, title, Helper.stripOutLuceneHighlighting(url), "ex-tw-link");
}
public String createLinkMarkup(String shortTitle, String title, String url, String clazz) {
return "<a title=\"" + title + "\" class=\"" + clazz + "\" target=\"_blank\" href=\"" + url + "\">" + shortTitle + "</a>";
}
}