/*
* Copyright 2013 mpowers
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.trsst.server;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.UnknownHostException;
import java.security.PublicKey;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.servlet.http.HttpUtils;
import javax.xml.crypto.dsig.XMLSignatureException;
import javax.xml.namespace.QName;
import org.apache.abdera.Abdera;
import org.apache.abdera.ext.rss.RssConstants;
import org.apache.abdera.i18n.iri.IRI;
import org.apache.abdera.i18n.iri.IRISyntaxException;
import org.apache.abdera.i18n.templates.Template;
import org.apache.abdera.model.AtomDate;
import org.apache.abdera.model.Category;
import org.apache.abdera.model.Content;
import org.apache.abdera.model.Document;
import org.apache.abdera.model.Element;
import org.apache.abdera.model.Entry;
import org.apache.abdera.model.Feed;
import org.apache.abdera.model.Link;
import org.apache.abdera.model.Person;
import org.apache.abdera.model.Text;
import org.apache.abdera.parser.ParseException;
import org.apache.abdera.protocol.Response.ResponseType;
import org.apache.abdera.protocol.client.AbderaClient;
import org.apache.abdera.protocol.client.ClientResponse;
import org.apache.abdera.protocol.client.RequestOptions;
import org.apache.abdera.protocol.server.ProviderHelper;
import org.apache.abdera.protocol.server.RequestContext;
import org.apache.abdera.protocol.server.RequestContext.Scope;
import org.apache.abdera.protocol.server.ResponseContext;
import org.apache.abdera.protocol.server.TargetType;
import org.apache.abdera.protocol.server.context.MediaResponseContext;
import org.apache.abdera.protocol.server.context.RequestContextWrapper;
import org.apache.abdera.protocol.server.context.ResponseContextException;
import org.apache.abdera.protocol.server.context.StreamWriterResponseContext;
import org.apache.abdera.security.AbderaSecurity;
import org.apache.abdera.security.Signature;
import org.apache.abdera.security.SignatureOptions;
import org.apache.abdera.util.Constants;
import org.apache.abdera.util.EntityTag;
import org.apache.abdera.util.MimeTypeHelper;
import org.apache.abdera.writer.StreamWriter;
import org.apache.commons.codec.binary.Base64;
import com.trsst.Common;
/**
* Trsst-specific extensions to atompub, which mainly consists of accepting
* Feeds instead of Entries, and validating that all Entries and Feeds are
* signed.
*
* Servers don't deal with encryption or private keys at all.
*
* All persistence is delegated to an instance of Storage.
*
* Callers may serve multiple request for the same feed id to the same
* TrsstAdapter and instances might be retained in cache, so you need to be
* thread-safe and resource-friendly.
*
* @author mpowers
*/
@SuppressWarnings("deprecation")
public class TrsstAdapter extends AbstractMultipartAdapter {
private final static Template paging_template = new Template(
"{collection}?{-join|&|q,verb,mention,tag,before,after,count,page}");
protected String feedId;
protected Storage persistence;
protected Map<String, String> accepts;
/**
* Callers may serve multiple request for the same feed id to the same
* TrsstAdapter and instances might be retained in cache, so you need to be
* thread-safe and resource-friendly.
*
* @param id
* the feed id in question
* @param storage
* the persistence engine
* @throws FileNotFoundException
* if requested feed is not known to this server
* @throws IOException
* for other kinds of persistence issues
*/
public TrsstAdapter(String feedId, Storage storage)
throws FileNotFoundException, IOException {
this.persistence = storage;
this.feedId = feedId;
}
/**
* Returns the current feed to service this request, fetching from the
* current request, from local storage, or from remote peers as needed.
*/
protected Feed currentFeed(RequestContext request) throws ParseException,
FileNotFoundException, IOException {
Feed feed = null;
RequestContextWrapper wrapper = new RequestContextWrapper(request);
// fetch from request context
feed = (Feed) wrapper.getAttribute(Scope.REQUEST, "com.trsst.Feed");
if (feed != null) {
// shortcut for very common case
return feed;
}
System.err.println(new Date().toString() + " "
+ wrapper.getTargetPath());
// if async fetch is allowed
if (wrapper.getParameter("sync") == null) {
// return latest from local storage
feed = fetchFeedFromStorage(feedId, persistence);
if (feed != null) {
// trigger async fetch in case we're stale
pullLaterFromRelay(feedId, request);
// pullFromRelay(request);
}
}
// otherwise fetch synchronously
if (feed == null) {
// attempt to fetch from relay peer
feed = pullFromRelay(feedId, request);
}
if (feed != null) {
// ensure it's a naked feed:
// entries matching query params will get added later
for (Entry e : feed.getEntries()) {
e.discard();
}
// store in request context
wrapper.setAttribute(Scope.REQUEST, "com.trsst.Feed", feed);
return feed;
}
throw new FileNotFoundException("Not found: " + feedId);
}
protected static Feed fetchFeedFromStorage(String feedId, Storage storage) {
Feed feed = null;
try {
log.debug("fetchFeedFromStorage: " + feedId);
feed = (Feed) Abdera.getInstance().getParser()
.parse(new StringReader(storage.readFeed(feedId)))
.getRoot();
} catch (FileNotFoundException fnfe) {
log.debug("Not found in local storage: " + feedId);
} catch (ParseException e) {
log.debug("Could not parse feed from local storage: " + feedId, e);
} catch (IOException e) {
log.debug("Unexpected error reading from local storage: " + feedId,
e);
}
return feed;
}
/**
* Called to trigger an asynchronous fetch, usually after we have returned
* possibly stale data and we want to make sure it's refreshed on the next
* pull. This implementation uses a short fuse timer task queue, but others
* should implement a heuristic to queue this task for later based on the
* likelihood that a refetch is needed, e.g. factoring in time since last
* update and frequency of updates, etc.
*/
protected void pullLaterFromRelay(final String feedId,
final RequestContext request) {
if (TASK_QUEUE == null) {
TASK_QUEUE = new Timer();
}
final String uri = request.getResolvedUri().toString();
log.debug("fetchLaterFromRelay: queuing: " + uri);
if (!COALESCING_TIMERS.containsKey(uri)) {
log.debug("fetchLaterFromRelay: creating: " + uri);
TimerTask task = new TimerTask() {
public void run() {
log.debug("fetchLaterFromRelay: starting: " + uri);
pullFromRelay(feedId, request);
COALESCING_TIMERS.remove(uri);
}
};
COALESCING_TIMERS.put(uri, task);
TASK_QUEUE.schedule(task, 6000); // six seconds
}
}
public static Timer TASK_QUEUE;
private static Map<String, TimerTask> COALESCING_TIMERS = new Hashtable<String, TimerTask>();
protected Feed pullFromRelay(String feedIdentifier, RequestContext request) {
Feed result = null;
RequestContextWrapper wrapper = new RequestContextWrapper(request);
int limit = 5; // arbitrary
try {
if (wrapper.getParameter("relayLimit") != null) {
limit = Integer.parseInt(wrapper.getParameter("relayLimit"));
if (limit > 10) {
log.warn("Arbitrarily capping specified limit to 10: "
+ limit);
limit = 10; // arbitrary
}
}
} catch (Throwable t) {
log.warn("Could not parse relayLimit; defaulting to: " + limit);
}
// if relay peer count is less than search limit
List<String> relays = wrapper.getParameters("relay");
URL relayPeer = null;
if (relays == null || relays.size() <= limit) {
relayPeer = getRelayPeer(relays);
if (relayPeer != null) {
log.debug("Using relay peer: " + relayPeer);
result = pullFromServiceUrl(request, relayPeer);
} else {
log.debug("No relay peer available for request: "
+ request.getResolvedUri());
}
}
if (result == null) {
if (Common.isExternalId(feedIdentifier)) {
// attempt to fetch directly
log.debug("Fetching direct: " + feedIdentifier);
result = fetchFromExternalSource(feedIdentifier);
}
}
// if we got a result
if (result != null) {
result = ingestFromRelay(persistence, result, relayPeer, relays);
}
return result;
}
protected Feed ingestFromRelay(Storage storage, Feed feed, URL relayPeer,
List<String> relays) {
try {
String feedIdentifier = Common.fromFeedUrn(feed.getId());
if (feedIdentifier == null || Common.isExternalId(feedIdentifier)) {
// convert from rss if needed
if (feed.getClass().getName().indexOf("RssFeed") != -1) {
// our adapter feed id is the url
feed = convertFromRSS(feed);
}
if (feed != null) {
// use new feed id if any
feedIdentifier = Common.fromFeedUrn(feed.getId());
// process and persist external feed
ingestExternalFeed(feedIdentifier, feed, 25);
}
} else if (Common.isAggregateId(feedIdentifier)
&& relayPeer != null) {
// if it's an aggregate feed fetched from a relay
ingestAggregateFeed(storage, feed, relayPeer, relays);
} else {
// ingest the native feed
ingestFeed(storage, feed);
}
} catch (Throwable t) {
log.error("Could not ingest feed: " + feed.getId(), t);
}
return feed; // returns after rss conversion if any
}
/**
* Returns a relay peer to use to fetch contents. Implementors should return
* a url chosen from an evenly or randomly distributed mix of known trsst
* servers based on the home urls of this servers hosted content. This
* implementation currently returns a relay from the
* com.trsst.client.storage property, or null if the property does not
* exist.
*
* @param relays
* may not return any relay on this list
*/
protected URL getRelayPeer(List<String> excludeHashes) {
if (RELAYS == null) {
String property = System.getProperty("com.trsst.server.relays");
if (property == null) {
RELAYS = new String[0];
} else {
RELAYS = property.split(",");
}
}
// return a random relay that's not on the exclude list
Set<String> excludes = new HashSet<String>();
if (excludeHashes != null) {
excludes.addAll(excludeHashes);
}
List<String> relays = new LinkedList<String>();
for (String relay : RELAYS) {
relays.add(relay);
}
Collections.shuffle(relays);
for (String relay : relays) {
try {
if (!excludes.contains(relay)) {
return new URL(relay);
}
} catch (MalformedURLException e) {
log.error("getRelayPeer: bad relay specified: " + relay, e);
}
}
return null;
}
/**
* Return a one-way hash token for the specified relay url.
*/
protected String getLocalRelayId() {
if (RELAY_ID == null) {
// shared across all instances
try {
RELAY_ID = Integer.toHexString(InetAddress.getLocalHost()
.hashCode());
} catch (UnknownHostException e) {
log.error("Could not obtain local IP address: falling back to loopback address");
RELAY_ID = Integer.toHexString(InetAddress.getLoopbackAddress()
.hashCode());
}
}
// FLAG: hash name for a bit of extra obscurity
// would this be overkill? #paranoid
// byte[] hostBytes;
// try {
// hostBytes = hostName.getBytes("UTF-8");
// hashName = Base64.encodeBase64String(Common.hash(hostBytes, 0,
// hostBytes.length));
// } catch (UnsupportedEncodingException e1) {
// log.error("Should never happen", e1);
// hashName = hostName;
// }
return RELAY_ID;
}
private static String RELAY_ID;
private static String[] RELAYS;
/**
* Fetch from the specified trsst service url, validate it, ingest it, and
* return the returned feed.
*/
private Feed pullFromServiceUrl(RequestContext request, URL serviceUrl) {
String feedIdentifier = request.getTarget().getParameter("collection");
String uri = request.getResolvedUri().toString();
String path = null;
String query = null;
if (feedIdentifier == null) {
// global query
int index = uri.indexOf('?');
if (index != -1) {
query = uri.substring(index + 1);
path = "";
} else {
log.error("Could not find query in service request: "
+ request.getResolvedUri());
return null;
}
} else {
// feed query
int index = uri.indexOf(feedIdentifier);
if (index != -1) {
path = uri.substring(index);
index = path.indexOf('?');
if (index != -1) {
query = path.substring(index + 1);
path = path.substring(0, index);
}
} else {
log.error("Could not find feed id in service request: "
+ request.getResolvedUri());
return null;
}
}
return pullFromServiceUrl(serviceUrl, path, query);
}
/**
* Fetch from the specified trsst service url, validate it, ingest it, and
* return the returned feed.
*/
private Feed pullFromServiceUrl(URL serviceUrl, String path,
String queryString) {
log.trace("pullFromServiceUrl: uri: " + serviceUrl.toString() + " : "
+ path + " : " + queryString);
if (queryString == null) {
queryString = "";
}
if (queryString.indexOf("relay=" + getLocalRelayId()) != -1) {
// if we're alerady in the list of relay peers
log.error("Unexpected relay loopback: ignoring request");
return null;
}
if (queryString.length() > 0) {
queryString = queryString + '&';
}
// add self as relay
queryString = queryString + "relay=" + getLocalRelayId();
return pullFromService(serviceUrl.toString(), path, queryString);
}
/**
* For external feed ids: fetch directly from external source, convert to a
* trsst feed, (optionally validate it), (optionally persist it), and return
* the feed.
*/
protected Feed fetchFromExternalSource(String feedId) {
Feed result = null;
try {
AbderaClient client = new AbderaClient(Abdera.getInstance(),
Common.getBuildString());
feedId = Common.decodeURL(feedId);
new URL(feedId); // validates as a url
ClientResponse response = client.get(feedId);
if (response.getType() == ResponseType.SUCCESS) {
Document<Feed> document = response.getDocument();
if (document != null) {
return document.getRoot();
} else {
log.warn("fetchFromExternalSource: no document for: "
+ feedId);
}
} else {
log.debug("fetchFromExternalSource: no document found for: "
+ feedId + " : " + response.getType());
}
} catch (MalformedURLException urle) {
log.error("Not a valid external feed id: " + feedId);
} catch (ClassCastException cce) {
log.error("Not a valid feed: " + feedId, cce);
} catch (Exception e) {
log.error("Could not process external feed: " + feedId, e);
}
return result;
}
@Override
public String getId(RequestContext request) {
return feedId;
}
@Override
public String getAuthor(RequestContext request)
throws ResponseContextException {
Person author = null;
try {
author = currentFeed(request).getAuthor();
} catch (FileNotFoundException e) {
log.trace("Could not find feed: " + feedId, e);
} catch (ParseException e) {
log.error("Could not parse stored feed: " + feedId, e);
} catch (IOException e) {
log.error("Unexpected error reading feed: " + feedId, e);
}
if (author != null) {
return author.getName();
}
return null;
}
@Override
protected Feed createFeedBase(RequestContext request)
throws ResponseContextException {
try {
return (Feed) currentFeed(request).clone();
} catch (FileNotFoundException e) {
log.debug("Could not find feed: " + feedId, e);
} catch (ParseException e) {
log.error("Could not parse stored feed: " + feedId, e);
} catch (IOException e) {
log.error("Unexpected error reading feed: " + feedId, e);
}
return null;
}
public String getTitle(RequestContext request) {
try {
return currentFeed(request).getTitle();
} catch (FileNotFoundException e) {
log.debug("Could not find feed: " + feedId, e);
} catch (ParseException e) {
log.error("Could not parse stored feed: " + feedId, e);
} catch (IOException e) {
log.error("Unexpected error reading feed: " + feedId, e);
}
return null;
}
public String getHref(RequestContext request) {
Map<String, Object> params = new HashMap<String, Object>();
params.put("collection", getId(request));
return request.urlFor(TargetType.TYPE_COLLECTION, params);
}
/**
* Returns a feed document containing all the entries for this feed, subject
* to pagination.
*/
public ResponseContext getFeed(RequestContext request) {
try {
Feed result = currentFeed(request);
fetchEntriesFromStorage(request, result);
// validity check:
// sometimes abdera is failing to parse its own etags
EntityTag etag = ProviderHelper.calculateEntityTag(result);
EntityTag[] matches = request.getIfMatch();
for (EntityTag match : matches) {
if (etag.equals(match)) {
// no change: return 304 Not Modified
return ProviderHelper.notmodified(request);
}
}
try {
EntityTag.parse(etag.toString());
} catch (IllegalArgumentException e) {
// FIXME: Abdera's etag creator sometimes creates invalid etags
log.error("Bad etag: " + feedId + " : " + etag, e);
}
return ProviderHelper.returnBase(result, 200, result.getUpdated())
.setEntityTag(etag);
} catch (IllegalArgumentException e) {
log.error("Bad request: " + feedId, e);
return ProviderHelper.badrequest(request, e.getMessage());
} catch (FileNotFoundException e) {
log.debug("Could not find feed: " + feedId, e);
return ProviderHelper.notfound(request);
} catch (ParseException e) {
log.error("Could not parse stored feed: " + feedId, e);
return ProviderHelper.servererror(request, e);
} catch (IOException e) {
log.error("Unexpected error reading feed: " + feedId, e);
return ProviderHelper.servererror(request, e);
}
}
/**
* Returns a feed document containing the single requested entry. NOTE: this
* is a deviation from atompub. TODO: not much point in returning feed now;
* prolly should conform to spec.
*/
public ResponseContext getEntry(RequestContext request) {
// make a copy of the current template
Feed result;
try {
result = currentFeed(request);
// add requested entry
String entryId = request.getTarget().getParameter("entry");
Document<Entry> entry = getEntry(persistence, feedId,
Common.toEntryId(entryId));
if (entry != null) {
result.addEntry(entry.getRoot());
} else {
return ProviderHelper.notfound(request);
}
return ProviderHelper.returnBase(result, 200, result.getUpdated());
// FIXME: Abdera's etag creator doesn't create valid etags
// .setEntityTag(ProviderHelper.calculateEntityTag(result));
} catch (FileNotFoundException e) {
log.debug("Could not find feed: " + feedId, e);
return ProviderHelper.notfound(request);
} catch (ParseException e) {
log.error("Could not parse stored feed: " + feedId, e);
return ProviderHelper.servererror(request, e);
} catch (IOException e) {
log.error("Unexpected error reading feed: " + feedId, e);
return ProviderHelper.servererror(request, e);
}
}
protected static Document<Entry> getEntry(Storage storage, String feedId,
long entryId) {
try {
// NOTE: by this point currentFeed() will have fetched
// the requested entry via relay if needed
// FIXME: this is not currently working; need a test case
// fetch from local storage
return Abdera
.getInstance()
.getParser()
.parse(new StringReader(storage.readEntry(feedId, entryId)));
} catch (FileNotFoundException fnfe) {
// fall through
} catch (Exception e) {
log.error("Unexpected error: " + feedId + " : " + entryId, e);
}
return null;
}
/**
* Accepts a signed feed document containing one or more signed entries and
* attachments for some or all of those entries.
*/
public ResponseContext postMedia(RequestContext request) {
try {
if (MimeTypeHelper.isMultipart(request.getContentType().toString())) {
byte[] requestData = Common.readFully(request.getInputStream());
List<MultipartRelatedPost> posts = getMultipartRelatedData(
request, new ByteArrayInputStream(requestData));
Feed incomingFeed = null;
if (posts != null) {
Map<String, Entry> contentIdToEntry = new HashMap<String, Entry>();
Map<String, String> contentIdToType = new HashMap<String, String>();
Map<String, InputStream> contentIdToData = new HashMap<String, InputStream>();
for (MultipartRelatedPost post : posts) {
String type = post.getDataHeaders().get("content-type");
String cid = post.getDataHeaders().get("content-id");
if (cid != null) {
if (cid.startsWith("<cid:")) {
cid = cid.substring(5);
cid = cid.substring(0, cid.length() - 1);
}
// find content id in entry list
List<Entry> entries;
if (post.getSource().getRoot() instanceof Feed) {
incomingFeed = ((Feed) post.getSource()
.getRoot());
entries = incomingFeed.getEntries();
} else if (post.getSource().getRoot() instanceof Entry) {
log.warn("Single entries not supported: "
+ post.getSource().getRoot());
entries = new LinkedList<Entry>();
entries.add((Entry) post.getSource().getRoot());
return ProviderHelper
.badrequest(request,
"Single entries not currently supported.");
} else {
log.error("Unrecognized source: "
+ post.getSource());
return ProviderHelper.badrequest(request,
"Unrecognized source.");
}
for (Entry entry : entries) {
if (entry.getContentSrc() != null
&& entry.getContentSrc().toString()
.endsWith(cid)) {
// getContentSrc resolves against baseurl
contentIdToEntry.put(cid, entry);
contentIdToType.put(cid, type);
contentIdToData.put(cid, post.getData());
}
}
}
}
// if all content ids match an entry content element
if (contentIdToEntry.size() == posts.size()) {
ingestFeed(persistence, incomingFeed);
for (Map.Entry<String, Entry> i : contentIdToEntry
.entrySet()) {
String cid = i.getKey();
Entry entry = i.getValue();
// TODO: grab from attribute instead
// String algorithm = "ripemd160";
String hash = cid;
int dot = hash.indexOf('.');
if (dot != -1) {
// remove any mimetype hint
// (some feed readers like to see
// a file extension on enclosures)
hash = hash.substring(0, dot);
}
byte[] data = Common.readFully(contentIdToData
.get(cid));
String digest = new Base64(0, null, true)
.encodeToString(Common.ripemd160(data));
if (digest.equals(hash)) {
// only store if hash matches content id
persistence.updateFeedEntryResource(feedId,
Common.toEntryId(entry.getId()), cid,
contentIdToType.get(cid),
entry.getPublished(), data);
} else {
log.error("Content digests did not match: "
+ hash + " : " + digest);
return ProviderHelper.badrequest(request,
"Could not verify content digest for: "
+ hash);
}
}
pushRawPostIfNeeded(incomingFeed, request, requestData);
return ProviderHelper.returnBase(incomingFeed, 201,
null);
}
}
}
} catch (Exception pe) {
log.error("postMedia: ", pe);
return ProviderHelper.badrequest(request,
"Could not process multipart request: " + pe.getMessage());
}
return ProviderHelper.badrequest(request,
"Could not process multipart request");
}
/**
* Validate then persist incoming feed and entries. Any exception thrown
* means no feed or entries are persisted.
*
* @param feed
* with zero or more entries to be validated and persisted.
* @throws XMLSignatureException
* if signature verification fails
* @throws IllegalArgumentException
* if data validation fails
* @throws Exception
* any other problem
*/
protected void ingestFeed(Storage storage, Feed feed)
throws XMLSignatureException, IllegalArgumentException, Exception {
// clone a copy so we can manipulate
feed = (Feed) feed.clone();
// validate feed
Date lastUpdated = feed.getUpdated();
if (lastUpdated == null) {
throw new IllegalArgumentException(
"Feed update timestamp is required: " + feed.getId());
}
if (lastUpdated.after(new Date(
System.currentTimeMillis() + 1000 * 60 * 5))) {
// allows five minutes of variance
throw new IllegalArgumentException(
"Feed update timestamp cannot be in the future: "
+ feed.getId());
}
// grab the signing key
Element signingElement = feed.getFirstChild(new QName(Common.NS_URI,
Common.SIGN));
if (signingElement == null) {
throw new XMLSignatureException(
"Could not find signing key for feed: " + feed.getId());
}
// verify that the key matches the id
PublicKey publicKey = Common.toPublicKeyFromX509(signingElement
.getText());
if (Common.fromFeedUrn(feed.getId()) == null
|| !Common.fromFeedUrn(feed.getId()).equals(
Common.toFeedId(publicKey))) {
throw new XMLSignatureException(
"Signing key does not match feed id: "
+ Common.fromFeedUrn(feed.getId()) + " : "
+ Common.toFeedId(publicKey));
}
// prep the verifier
AbderaSecurity security = new AbderaSecurity(Abdera.getInstance());
Signature signature = security.getSignature();
SignatureOptions options = signature.getDefaultSignatureOptions();
options.setSigningAlgorithm("http://www.w3.org/2001/04/xmldsig-more#ecdsa-sha1");
options.setSignLinks(false);
options.setPublicKey(publicKey);
// validate, persist, and remove each entry
List<Entry> entries = new LinkedList<Entry>();
entries.addAll(feed.getEntries()); // make a copy
String existingEntryXml;
for (Entry entry : feed.getEntries()) {
String feedId = Common.toFeedIdString(feed.getId());
long entryId = Common.toEntryId(entry.getId());
try {
try {
existingEntryXml = persistence.readEntry(feedId, entryId);
} catch (FileNotFoundException fnfe) {
existingEntryXml = null;
}
if (existingEntryXml != null) {
Entry parsed = (Entry) Abdera.getInstance().getParser()
.parse(new StringReader(existingEntryXml))
.getRoot();
if (entry.getUpdated().after(parsed.getUpdated())) {
// discard what we have in cache
existingEntryXml = null;
}
}
} catch (Exception e) {
existingEntryXml = null;
log.warn(
"Unexpected error parsing existing entry before validation: "
+ entry.getId(), e);
}
if (existingEntryXml != null) {
log.trace("Skipping validation for existing entry: "
+ entry.getId());
} else {
if (!signature.verify(entry, options)) {
// failed validation
Element activity = entry.getExtension(new QName(
"http://activitystrea.ms/spec/1.0/", "verb",
"activity"));
// if not a 'deleted' entry
if (activity == null
|| !"deleted".equals(activity.getText())) {
// TODO: should validate that the 'delete' entry that
// this entry mentions is mentioning this entry
log.warn("Could not verify signature for entry with id: "
+ feed.getId());
// fail ingest
throw new XMLSignatureException(
"Could not verify signature for entry with id: "
+ entry.getId() + " : " + feed.getId());
} else {
log.warn("Skipping signature verification for deleted entry: "
+ feed.getId());
}
}
try {
// yield a bit while validating entries
Thread.sleep(100);
} catch (InterruptedException e) {
log.error("Should never happen: ", e);
}
}
// remove from feed parent
entry.discard();
try {
// see if this file already exists
storage.readEntry(Common.toFeedIdString(feed.getId()),
Common.toEntryId(entry.getId()));
// this file exists; remove from processing
entries.remove(entry);
} catch (FileNotFoundException e) {
// file does not already exist: resume
}
}
// setEditDetail(request, entry, key);
// String edit = entry.getEditLinkResolvedHref().toString();
// remove all navigation links before signing
for (Link link : feed.getLinks()) {
if (Link.REL_FIRST.equals(link.getRel())
|| Link.REL_LAST.equals(link.getRel())
|| Link.REL_CURRENT.equals(link.getRel())
|| Link.REL_NEXT.equals(link.getRel())
|| Link.REL_PREVIOUS.equals(link.getRel())) {
link.discard();
}
}
// remove all opensearch elements before verifying
for (Element e : feed
.getExtensions("http://a9.com/-/spec/opensearch/1.1/")) {
e.discard();
}
// now validate feed signature sans entries
if (!signature.verify(feed, options)) {
log.warn("Could not verify signature for feed with id: "
+ feed.getId());
throw new XMLSignatureException(
"Could not verify signature for feed with id: "
+ feed.getId());
}
// persist feed
String existingFeedXml;
try {
String feedId = Common.toFeedIdString(feed.getId());
try {
existingFeedXml = persistence.readFeed(feedId);
} catch (FileNotFoundException fnfe) {
existingFeedXml = null;
}
if (existingFeedXml != null) {
Feed parsed = (Feed) Abdera.getInstance().getParser()
.parse(new StringReader(existingFeedXml)).getRoot();
if (feed.getUpdated().after(parsed.getUpdated())) {
// discard what we have in cache
existingFeedXml = null;
}
}
} catch (Exception e) {
existingFeedXml = null;
log.warn("Unexpected error parsing existing feed: " + feedId, e);
}
if (existingFeedXml == null) {
persistence.updateFeed(feedId, feed.getUpdated(), feed.toString());
}
// only now persist each entry
for (Entry entry : entries) {
Date date = entry.getPublished();
if (date == null) {
// fall back to updated if publish not set
date = entry.getUpdated();
}
storage.updateEntry(Common.toFeedIdString(feed.getId()),
Common.toEntryId(entry.getId()), date, entry.toString());
// check for delete operation
String verb = entry.getSimpleExtension(new QName(
"http://activitystrea.ms/spec/1.0/", "verb", "activity"));
if ("delete".equals(verb)) {
// get mentions
List<Category> mentions = entry.getCategories();
for (Category mention : mentions) {
IRI scheme = mention.getScheme();
if (scheme != null
&& (Common.MENTION_URN.equals(scheme.toString()) || Common.MENTION_URN_LEGACY
.equals(scheme.toString()))) {
Entry deleted = null;
try {
deleted = deleteEntry(storage,
Common.toFeedIdString(feed.getId()),
Common.toEntryId(mention.getTerm()),
Common.toEntryId(entry.getId()));
} catch (IOException exc) {
log.error(
"Could not delete entry: " + entry.getId(),
exc);
}
if (deleted != null) {
log.debug("Deleted entry: " + entry.getId());
} else {
log.error("Failed to delete entry: "
+ entry.getId());
}
}
}
}
}
}
/**
* Convert external feed and entries and persist. Any exception thrown means
* no feed or entries are persisted.
*
* External feeds are existing RSS and Atom feeds that are ingested by a
* trsst server on behalf of a user request and converted into unsigned
* trsst feeds and entries.
*
* Note that unsigned or external feeds are never pushed to a trsst server:
* they are only ever fetched on behalf of a request from a client. Trsst
* servers never accept a push of unsigned feeds or entries.
*
* @param feed
* with zero or more entries to be validated and persisted.
* @throws XMLSignatureException
* if signature verification fails
* @throws IllegalArgumentException
* if data validation fails
* @throws Exception
* any other problem
*/
protected void ingestExternalFeed(String feedId, Feed feed, int limit)
throws XMLSignatureException, IllegalArgumentException, Exception {
// clone a copy so we can manipulate
feed = (Feed) feed.clone();
// for our purposes: replace the existing feed id with the URL
feed.setId(Common.toFeedUrn(feedId));
// validate, persist, and remove each entry
List<Entry> entries = new LinkedList<Entry>();
entries.addAll(feed.getEntries()); // make a copy
// restrict to limit count
entries = entries.subList(0, Math.min(limit, entries.size()));
int count = 0;
for (Entry entry : feed.getEntries()) {
if (count++ < limit) {
// convert existing entry id to a trsst timestamp-based id
String existing = entry.getId().toString();
long timestamp = entry.getUpdated().getTime();
// RSS feeds don't have millisecond precision
// so we need to add it to avoid duplicate ids
if (timestamp % 1000 == 0) {
// need a deterministic source
String hash = existing.toString() + entry.getTitle();
timestamp = timestamp + hash.hashCode() % 1000;
}
try {
// see if this file already exists
persistence.readEntry(feedId, timestamp);
// this file exists; remove from processing
entries.remove(entry);
} catch (FileNotFoundException e) {
// we don't already have it:
// if it's not in trsst id format
if (!existing.startsWith(Common.ENTRY_URN_PREFIX)) {
// construct a trsst id for this entry
entry.setId(Common.toEntryUrn(feedId, timestamp));
}
}
}
// remove from feed parent
entry.discard();
}
if (entries.isEmpty()) {
// no new entries to update;
// prevent the update of this feed
return;
}
// remove all navigation links before persisting
for (Link link : feed.getLinks()) {
if (Link.REL_FIRST.equals(link.getRel())
|| Link.REL_LAST.equals(link.getRel())
|| Link.REL_CURRENT.equals(link.getRel())
|| Link.REL_NEXT.equals(link.getRel())
|| Link.REL_PREVIOUS.equals(link.getRel())) {
link.discard();
}
}
// remove all opensearch elements before verifying
for (Element e : feed
.getExtensions("http://a9.com/-/spec/opensearch/1.1/")) {
e.discard();
}
// persist feed
persistence.updateFeed(feedId, feed.getUpdated(), feed.toString());
// only now persist each entry
String existingEntryXml;
for (Entry entry : entries) {
Date date = entry.getPublished();
if (date == null) {
// fall back to updated if publish not set
date = entry.getUpdated();
}
long entryId = Common.toEntryId(entry.getId());
try {
try {
existingEntryXml = persistence.readEntry(feedId, entryId);
} catch (FileNotFoundException fnfe) {
existingEntryXml = null;
}
if (existingEntryXml != null) {
Entry parsed = (Entry) Abdera.getInstance().getParser()
.parse(new StringReader(existingEntryXml))
.getRoot();
if (date.after(parsed.getUpdated())) {
// discard what we have in cache
existingEntryXml = null;
}
}
} catch (Exception e) {
existingEntryXml = null;
log.warn(
"Unexpected error parsing existing entry: "
+ entry.getId(), e);
}
if (existingEntryXml == null) {
persistence
.updateEntry(feedId, entryId, date, entry.toString());
}
}
}
/**
* Aggregate feeds contain signed entries from a number of feeds. To ingest,
* we refetch each entry directly with its feed.
*/
protected void ingestAggregateFeed(Storage storage, Feed feed,
URL relayUrl, List<String> relays) {
Object id;
String feedIdentifier;
String entryIdentifier;
// for each entry
for (Entry entry : feed.getEntries()) {
id = entry.getId();
feedIdentifier = Common.toFeedIdString(id);
entryIdentifier = Common.toEntryIdString(id);
try {
// see if this file already exists locally
persistence.readEntry(feedIdentifier,
Common.toEntryId(entryIdentifier));
log.info("Entry found: skipping: " + id);
} catch (FileNotFoundException e) {
log.info("Entry not found: fetching: " + id);
// we don't already have it:
String queryString = null;
if (relays != null) {
// reconstruct the relays parameter
queryString = "";
for (String relay : relays) {
queryString = queryString + "relay=" + relay + '&';
}
queryString = queryString.substring(0,
queryString.length() - 1);
}
// fetch enclosing feed
Feed result = pullFromServiceUrl(relayUrl, feedIdentifier + '/'
+ entryIdentifier, queryString);
// and ingest
if (result != null) {
ingestFromRelay(storage, result, relayUrl, relays);
}
} catch (IOException ioe) {
log.error("Unexpected exception from readEntry", ioe);
}
}
}
/**
* Converts from RSS parser's read-only Feed to a mutable Feed.
*/
protected Feed convertFromRSS(Feed feed) {
Feed result = Abdera.getInstance().newFeed();
// for our purposes: replace the existing feed id with the URL
result.setId(Common.toFeedUrn(feedId));
Date mostRecent = null;
result.setBaseUri(feed.getBaseUri());
result.setUpdated(feed.getUpdated());
if (feed.getIcon() != null) {
result.setIcon(feed.getIcon().toString());
}
if (feed.getLogo() != null) {
result.setLogo(feed.getLogo().toString());
}
result.setTitle(feed.getTitle());
result.setSubtitle(feed.getSubtitle());
if (feed.getAuthor() != null) {
Person existingAuthor = feed.getAuthor();
Person author = Abdera.getInstance().getFactory().newAuthor();
author.setName(existingAuthor.getName());
author.setEmail(existingAuthor.getEmail());
if (existingAuthor.getUri() != null) {
author.setUri(existingAuthor.getUri().toString());
}
result.addAuthor(author);
}
// for (Category category : feed.getCategories()) {
// result.addCategory(category.getTerm());
// java.lang.ClassCastException:
// org.apache.abdera.parser.stax.FOMExtensibleElement cannot be cast to
// org.apache.abdera.model.Category
// }
for (Link link : feed.getLinks()) {
result.addLink(link);
}
Pattern hashtagsExp = Pattern.compile("([\\#]\\w+)");
for (Entry entry : feed.getEntries()) {
try {
// convert existing entry id to a trsst timestamp-based id
Entry converted = Abdera.getInstance().newEntry();
Date updated = entry.getUpdated();
if (updated == null) {
updated = entry.getPublished();
if (updated == null) {
// fall back on dc:date
Element dcDate = entry.getExtension(new QName(
"http://purl.org/dc/elements/1.1/", "date"));
try {
updated = new AtomDate(dcDate.getText()).getDate();
} catch (Throwable t) {
log.warn("Could not parse date for feed: " + dcDate
+ " : " + feed.getId());
}
}
}
long timestamp = updated.getTime();
if (mostRecent == null || mostRecent.before(updated)) {
mostRecent = updated;
}
Object existing = null;
try {
existing = entry.getId();
} catch (IRISyntaxException irie) {
// EFF's entry ids have spaces
// "<guid isPermaLink="false">78822 at https://www.eff.org</guid>"
}
if (existing == null) {
existing = updated;
}
// RSS feeds don't have millisecond precision
// so we need to add it to avoid duplicate ids
if (timestamp % 1000 == 0) {
// need a deterministic source
String hash = existing.toString() + entry.getTitle();
timestamp = timestamp + hash.hashCode() % 1000;
}
converted.setId(Common.toEntryUrn(feedId, timestamp));
converted.setUpdated(new Date(timestamp));
converted.setPublished(entry.getPublished());
converted.setTitle(entry.getTitle());
// let RSS feeds participate in our hashtag conversations
Matcher matcher = hashtagsExp.matcher(entry.getTitle());
while (matcher.find()) {
// add tag; remove the hash.
converted.addCategory(Common.TAG_URN, matcher.group()
.substring(1), "Tag");
}
// find "link"
String linkSrc = null;
if (entry.getExtension(RssConstants.QNAME_LINK) != null) {
Element existingLink = entry
.getExtension(RssConstants.QNAME_LINK);
linkSrc = existingLink.getText();
Link link = Abdera.getInstance().getFactory().newLink();
link.setAttributeValue("src", linkSrc);
link.setRel("alternate");
link.setMimeType("text/html");
converted.addLink(link);
}
// convert content
Content existingContent = entry.getContentElement();
if (existingContent != null) {
Content convertedContent = Abdera.getInstance()
.getFactory().newContent();
List<QName> attributes = existingContent.getAttributes();
for (QName attribute : attributes) {
convertedContent.setAttributeValue(attribute,
existingContent.getAttributeValue(attribute));
}
converted.setContentElement(convertedContent);
} else if (entry.getExtension(RssConstants.QNAME_ENCLOSURE) != null) {
Element enclosure = entry
.getExtension(RssConstants.QNAME_ENCLOSURE);
Content convertedContent = Abdera.getInstance()
.getFactory().newContent();
convertedContent.setAttributeValue("src",
enclosure.getAttributeValue("url"));
convertedContent.setAttributeValue("type",
enclosure.getAttributeValue("type"));
convertedContent.setAttributeValue("length",
enclosure.getAttributeValue("length"));
converted.setContentElement(convertedContent);
Link link = Abdera.getInstance().getFactory().newLink();
link.setAttributeValue("src",
enclosure.getAttributeValue("url"));
link.setAttributeValue("type",
enclosure.getAttributeValue("type"));
link.setAttributeValue("length",
enclosure.getAttributeValue("length"));
link.setRel("enclosure");
converted.addLink(link);
} else if (linkSrc != null) {
Content convertedContent = Abdera.getInstance()
.getFactory().newContent();
convertedContent.setAttributeValue("src", linkSrc);
convertedContent.setAttributeValue("type", "text/html");
converted.setContentElement(convertedContent);
}
if (entry.getAuthor() != null) {
Person existingAuthor = entry.getAuthor();
Person author = Abdera.getInstance().getFactory()
.newAuthor();
author.setName(existingAuthor.getName());
author.setEmail(existingAuthor.getEmail());
if (existingAuthor.getUri() != null) {
author.setUri(existingAuthor.getUri().toString());
}
converted.addAuthor(author);
}
for (Link link : entry.getLinks()) {
converted.addLink(link);
}
converted.setRights(entry.getRights());
String summary = entry.getSummary();
if (summary != null) {
if (Text.Type.HTML.equals(converted.getSummaryType())) {
converted
.setSummary(entry.getSummary(), Text.Type.HTML);
} else {
converted
.setSummary(entry.getSummary(), Text.Type.TEXT);
}
}
// remove from feed parent
result.addEntry(converted);
} catch (Throwable t) {
log.warn("Could not convert RSS entry: " + entry.toString(), t);
}
}
// workaround: some RSS feeds have no update timestamp
// and that throws abdera for an NPE.
Date updated = feed.getUpdated();
if (updated == null) {
log.debug("Ingesting RSS feed with no update timestamp: using most recent entry"
+ feedId);
updated = mostRecent;
}
if (updated == null) {
log.debug("Ingesting RSS feed with no update timestamp: using last known time"
+ feedId);
Feed existingFeed = fetchFeedFromStorage(feedId, persistence);
if (existingFeed != null) {
updated = existingFeed.getUpdated();
}
}
if (updated == null) {
log.debug("Ingesting RSS feed with no update timestamp: using one day ago"
+ feedId);
updated = new Date(System.currentTimeMillis()
- (1000 * 60 * 60 * 24));
}
result.setUpdated(updated);
return result;
}
/**
* Accepts a signed feed document containing one or more signed entries. All
* signatures must be valid or the entire transaction will be rejected.
* NOTE: this is a deviation from atompub.
*/
public ResponseContext postEntry(RequestContext request) {
if (request.isAtom()) {
try {
// FIXME: using SSL, this line fails from erroneously loading a
// UTF-32 reader
// CharConversionException: Invalid UTF-32 character 0x6565663c
// at char #0, byte #3)
// at
// com.ctc.wstx.io.UTF32Reader.reportInvalid(UTF32Reader.java:197)
// Feed incomingFeed = (Feed) request.getDocument().getRoot();
// WORKAROUND:
// loading the stream and making our own parser works
byte[] bytes = Common.readFully(request.getInputStream());
// System.out.println(new String(bytes, "UTF-8"));
Feed incomingFeed = (Feed) Abdera.getInstance().getParser()
.parse(new ByteArrayInputStream(bytes)).getRoot();
// we require a feed entity (not solo entries like atompub)
ingestFeed(persistence, incomingFeed);
pushRawPostIfNeeded(incomingFeed, request, bytes);
return ProviderHelper.returnBase(incomingFeed, 201, null);
} catch (XMLSignatureException xmle) {
log.error("Could not verify signature: ", xmle);
return ProviderHelper.badrequest(request,
"Could not verify signature: " + xmle.getMessage());
} catch (FileNotFoundException fnfe) {
return ProviderHelper.notfound(request, "Not found: " + feedId);
} catch (Exception e) {
log.warn("Bad request: " + feedId, e);
return ProviderHelper.badrequest(request, e.toString());
}
} else {
return ProviderHelper.notsupported(request);
}
}
/**
* PUT operations are treated as POST operations. NOTE: this is a deviation
* from atompub.
*/
public ResponseContext putEntry(RequestContext request) {
return postEntry(request);
}
/**
* Replaces the mentioned entry with a new entry that retains only the
* following elements: id, updated, published, predecessor, signature;
* adding only the verb 'deleted' and a single mention of the 'delete'
* entry.
*
* @param deletedId
* the id to be deleted
* @param deletingId
* the id to be mentioned
*/
private static Entry deleteEntry(Storage storage, String feedId,
long deletedId, long deletingId) throws IOException {
Document<Entry> document = getEntry(storage, feedId, deletedId);
Element element;
if (document != null) {
// copy with only minimum of elements
Entry existing = document.getRoot();
Entry replacement = Abdera.getInstance().newEntry();
replacement.setId(existing.getId().toString());
replacement.setUpdated(existing.getUpdated());
replacement.setPublished(existing.getPublished());
element = existing.getFirstChild(new QName(
"http://www.w3.org/2000/09/xmldsig#", "Signature"));
replacement.addExtension(element);
element = existing.getFirstChild(new QName(Common.NS_URI,
Common.PREDECESSOR));
// might not have predecessor if genesis entry
if (element != null) {
replacement.addExtension(element);
}
// add verb 'deleted'
replacement.addSimpleExtension(new QName(
"http://activitystrea.ms/spec/1.0/", "verb", "activity"),
"deleted");
// add reference to deleting id
replacement.addCategory(Common.MENTION_URN,
Common.toEntryUrn(feedId, deletingId), "Mention");
// write the entry
storage.updateEntry(feedId, deletedId, replacement.getUpdated(),
replacement.toString());
return replacement;
}
return null;
}
/**
* DELETE operations are not permitted.
*
* Instead: post an entry with verb "delete" and mentioning one or more
* entries. The act of deleting an entry in this way is a revocation by the
* author of publication and distribution rights to the specified entry.
*
* Trsst servers that receive "delete" entries must immediately replace
* their stored copies of the mentioned entries with new entries that retain
* only the following elements: id, updated, published, predecessor,
* signature; adding only the verb 'deleted' and a single mention of the
* 'delete' entry.
*
* The signature will no longer validate, but is required for blockchain
* integrity, and relays can verify the referenced "delete" entry to allow
* redistribution of the deleted entry.
*/
public ResponseContext deleteEntry(RequestContext request) {
return ProviderHelper.notallowed(request);
}
public ResponseContext extensionRequest(RequestContext request) {
return ProviderHelper.notallowed(request, "Method Not Allowed",
ProviderHelper.getDefaultMethods(request));
}
/**
* Categories map to feed ids available on this server. This might be only
* the feeds belonging to a server's "registered users" or all feeds cached
* by a server or some logical place inbetween.
*/
public ResponseContext getCategories(RequestContext request) {
return new StreamWriterResponseContext(request.getAbdera()) {
protected void writeTo(StreamWriter sw) throws IOException {
sw.startDocument().startCategories(false);
for (String id : persistence.getFeedIds(0, 100)) {
sw.writeCategory(id);
}
sw.endCategories().endDocument();
}
}.setStatus(200).setContentType(Constants.CAT_MEDIA_TYPE);
}
@SuppressWarnings({ "rawtypes" })
protected void fetchEntriesFromStorage(RequestContext context, Feed feed)
throws FileNotFoundException, IOException {
// NOTE: occasionally (<1%) jetty and/or abdera give us a servlet
// request that has a valid query string but returns no parameters;
// we now just parse the query manually every time just to be safe
Hashtable params = new Hashtable();
String uri = context.getUri().toString();
int i = uri.indexOf('?');
if (i != -1) {
params = HttpUtils.parseQueryString(uri.substring(i + 1));
}
// System.out.println("fetchEntriesFromStorage: " + params + " : " +
// uri);
String searchTerms = params.get("q") == null ? null
: ((String[]) params.get("q"))[0];
Date beginDate = null;
String verb = params.get("verb") == null ? null : ((String[]) params
.get("verb"))[0];
String[] mentions = (String[]) params.get("mention");
String[] tags = (String[]) params.get("tag");
String after = params.get("after") == null ? null : ((String[]) params
.get("after"))[0];
if (after != null) {
try {
// try to parse an entry id timestamp
beginDate = new Date(Long.parseLong(after, 16));
} catch (NumberFormatException nfe) {
// try to parse as ISO date
String begin = after;
String beginTemplate = "0000-01-01T00:00:00.000Z";
if (begin.length() < beginTemplate.length()) {
begin = begin + beginTemplate.substring(begin.length());
}
try {
beginDate = new AtomDate(begin).getDate();
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(
"Could not parse begin date: " + begin);
}
}
}
Date endDate = null;
String before = params.get("before") == null ? null
: ((String[]) params.get("before"))[0];
if (before != null) {
try {
// try to parse an entry id timestamp
endDate = new Date(Long.parseLong(before, 16));
} catch (NumberFormatException nfe) {
// try to parse as ISO date
String end = before;
String endTemplate = "9999-12-31T23:59:59.999Z";
if (end.length() < endTemplate.length()) {
end = end + endTemplate.substring(end.length());
}
try {
endDate = new AtomDate(end).getDate();
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(
"Could not parse end date: " + end);
}
}
}
// note: "default" to getPageSize was actually max page size
int length = ProviderHelper.getPageSize(context, "count", 99);
String _count = params.get("count") == null ? null : ((String[]) params
.get("count"))[0];
if (_count != null) {
try {
int newLength = Integer.parseInt(_count);
if (length != newLength) {
// BUG in abdera?
log.error("Abdera returning no count from valid count parameter: "
+ context.getUri());
}
length = newLength;
} catch (NumberFormatException exc) {
log.trace("Unrecognized count parameter: " + _count);
}
}
// int offset = ProviderHelper.getOffset(context, "page", length);
String _page = params.get("page") == null ? null : ((String[]) params
.get("page"))[0];
int page = (_page != null) ? Integer.parseInt(_page) : 0;
int begin = page * length;
int total = 0;
if (length > 0) {
total = addEntriesFromStorage(feed, begin, length, beginDate,
endDate, searchTerms, mentions, tags, verb);
} else {
total = countEntriesFromStorage(beginDate, endDate, searchTerms,
mentions, tags, verb);
}
if (feed.getEntries().size() > length) {
log.error("Returned count exceeded limit: "
+ feed.getEntries().size() + " : " + length + " : "
+ context.getUri());
}
addPagingLinks(context, feed, page, length, total, searchTerms, before,
after, mentions, tags, verb);
// ARGH:
// because having links appear after entries is invalid
// we have to remove and then reinsert each entry.
// would have expected Feed.addLink to insert before entries.
List<Entry> entries = feed.getEntries();
for (Entry entry : entries) {
entry.discard();
}
for (Entry entry : entries) {
feed.addEntry(entry);
}
}
/**
* Adds entries to the specified feed for the specified search and paging
* parameters. Importantly, this method MUST call addPagingLinks before
* adding entries in order to generate valid atom xml.
*
* @return the total number of entries matching the query.
*/
protected int addEntriesFromStorage(Feed feed, int start, int length,
Date after, Date before, String query, String[] mentions,
String[] tags, String verb) {
long[] entryIds = persistence.getEntryIdsForFeedId(feedId, 0, length,
after, before, query, mentions, tags, verb);
int end = Math.min(entryIds.length, start + length);
Document<Entry> document;
for (int i = start; i < end; i++) {
document = getEntry(persistence, feedId, entryIds[i]);
if (document != null) {
feed.addEntry((Entry) document.getRoot().clone());
} else {
log.error("Could not find entry for id: " + feedId + " : "
+ Long.toHexString(entryIds[i]));
}
}
return entryIds.length;
}
/**
* Counts entries specified search parameters.
*
* @return the total number of entries matching the query.
*/
protected int countEntriesFromStorage(Date after, Date before,
String query, String[] mentions, String[] tags, String verb) {
return persistence.getEntryCountForFeedId(feedId, after, before, query,
mentions, tags, verb);
}
private void addPagingLinks(RequestContext request, Feed feed,
int currentPage, int itemsPerPage, int totalCount,
String searchTerms, String before, String after, String[] mentions,
String[] tags, String verb) {
Map<String, Object> params = new HashMap<String, Object>();
if (searchTerms != null) {
params.put("q", searchTerms);
}
if (before != null) {
params.put("before", before);
}
if (after != null) {
params.put("after", after);
}
if (mentions != null) {
// hack: template doesn't support duplicate keys with different
// values
String value = mentions[0];
for (int i = 1; i < mentions.length; i++) {
value = value + "&mention=" + mentions[i];
}
params.put("mention", value);
// FIXME: this doesn't even work because string gets escaped
}
if (tags != null) {
// hack: template doesn't support duplicate keys with different
// values
String value = tags[0];
for (int i = 1; i < tags.length; i++) {
value = value + "&tag=" + tags[i];
}
params.put("tag", value);
// FIXME: this doesn't even work because string gets escaped
}
params.put("collection", request.getTarget().getParameter("collection"));
params.put("count", itemsPerPage);
params.put("page", currentPage);
String current = paging_template.expand(params);
// current = request.getResolvedUri().resolve(current).toString();
feed.addLink(current, "current");
if (totalCount > (currentPage + 1) * itemsPerPage) {
params.put("page", currentPage + 1);
String next = paging_template.expand(params);
// next = request.getResolvedUri().resolve(next).toString();
feed.addLink(next, "next");
}
if (currentPage > 0) {
params.put("page", currentPage - 1);
String prev = paging_template.expand(params);
// prev = request.getResolvedUri().resolve(prev).toString();
feed.addLink(prev, "previous");
}
// add opensearch tags
feed.addSimpleExtension(new QName(
"http://a9.com/-/spec/opensearch/1.1/", "totalResults",
"opensearch"), Integer.toString(totalCount));
feed.addSimpleExtension(new QName(
"http://a9.com/-/spec/opensearch/1.1/", "startIndex",
"opensearch"), Integer.toString(currentPage * itemsPerPage + 1));
feed.addSimpleExtension(new QName(
"http://a9.com/-/spec/opensearch/1.1/", "itemsPerPage",
"opensearch"), Integer.toString(itemsPerPage));
}
public Map<String, String> getAlternateAccepts(RequestContext request) {
if (accepts == null) {
accepts = new HashMap<String, String>();
// NOTE: currently accepting only "media" types; no zip, pdf, etc.
accepts.put("video/mp4", Constants.LN_ALTERNATE_MULTIPART_RELATED);
accepts.put("audio/mp3", Constants.LN_ALTERNATE_MULTIPART_RELATED);
accepts.put("audio/mp4", Constants.LN_ALTERNATE_MULTIPART_RELATED);
accepts.put("image/png", Constants.LN_ALTERNATE_MULTIPART_RELATED);
accepts.put("image/jpeg", Constants.LN_ALTERNATE_MULTIPART_RELATED);
accepts.put("image/gif", Constants.LN_ALTERNATE_MULTIPART_RELATED);
accepts.put("image/svg+xml",
Constants.LN_ALTERNATE_MULTIPART_RELATED);
}
return accepts;
}
/**
* Get a media resource
*/
@Override
public ResponseContext getMedia(RequestContext request) {
String feedId = request.getTarget().getParameter("collection");
String entryId = request.getTarget().getParameter("entry");
String resourceId = request.getTarget().getParameter("resource");
InputStream input;
try {
// FIXME: this requires a double-fetch of content;
// storage should return a struct with mimetype and content length
// and data
String mimetype = persistence.readFeedEntryResourceType(feedId,
Common.toEntryId(entryId), resourceId);
input = persistence.readFeedEntryResource(feedId,
Common.toEntryId(entryId), resourceId);
MediaResponseContext response = new MediaResponseContext(input,
new EntityTag(resourceId), 200);
response.setContentType(mimetype);
return response;
} catch (FileNotFoundException e) {
return ProviderHelper.notfound(request);
} catch (IOException e) {
return ProviderHelper.badrequest(request,
"Could not parse resource request");
}
}
/**
* Get metdata for a media resource
*/
@Override
public ResponseContext headMedia(RequestContext request) {
// TODO: implement HEAD support
return getMedia(request);
}
/**
* Checks to see if this request needs to be forwarded, and spawns tasks to
* do so if needed.
*
* @param context
* @param hostUrl
*/
protected void pushRawPostIfNeeded(Feed feed, RequestContext request,
byte[] requestData) {
IRI ourUri = request.getBaseUri();
IRI theirUri = feed.getBaseUri();
if (theirUri != null) {
String url = theirUri.toString();
if (!url.startsWith(ourUri.toString())) {
// TODO: we want to eventually post to naked service url
String feedId = Common.toFeedIdString(feed.getId());
int index = url.indexOf(feedId);
if (index != -1) {
url = url.substring(0, index - 1); // trailing slash
}
syncToService(feedId, persistence, url);
pushRawPost(feed, request, requestData, url);
}
}
}
/**
* Copies the current request and sends it to the specified host. Called
* when someone posts to us an entry whose home is on another server: we
* still ingest a copy but we make sure it gets where it needs to go.
*
* @param context
* @param hostUrl
*/
protected void pushRawPost(Feed feed, RequestContext request,
byte[] requestData, String hostUrl) {
try {
// FIXME: eventually want to move off feed ids in POST
hostUrl = hostUrl + "/" + Common.toFeedIdString(feed.getId());
new URL(hostUrl); // validates url
AbderaClient client = new AbderaClient(Abdera.getInstance(),
Common.getBuildString());
ClientResponse response = client.post(hostUrl,
new ByteArrayInputStream(requestData), new RequestOptions()
.setContentType(request.getContentType()));
log.debug("Response: " + response.getStatus() + " : "
+ response.getStatusText());
log.debug("Forwarded to: " + hostUrl);
} catch (IOException ioe) {
log.warn("Connection error while connecting to: " + hostUrl, ioe);
} catch (Throwable t) {
log.error("Unexpected error while forwarding to: " + hostUrl, t);
}
}
protected boolean syncToService(String id, Storage storage,
String serviceUrl) {
Feed localFeed = fetchFeedFromStorage(id, storage);
Feed remoteFeed = pullFromService(serviceUrl, id, "count=1");
if (localFeed != null && remoteFeed != null) {
// find which is most recent
long[] entryIds = storage.getEntryIdsForFeedId(id, 0, 1, null,
null, null, null, null, null);
List<Entry> remoteEntries = remoteFeed.getEntries();
if (entryIds.length == 0) {
// no local entries: treat as no feed and drop below
localFeed = null;
}
if (remoteEntries.size() == 0) {
// no remote entries: treat as no feed and drop below
remoteFeed = null;
}
if (localFeed != null && remoteFeed != null) {
// compare timestamps
Date localDate = new Date(entryIds[0]);
Date remoteDate = remoteEntries.get(0).getUpdated();
if (localDate.before(remoteDate)) {
// remote has latest info: pull difference
try {
remoteFeed = pullFromService(
serviceUrl,
id,
"count=99&after="
+ Long.toHexString(localDate.getTime()));
ingestFeed(storage, remoteFeed);
return true;
} catch (IllegalArgumentException e) {
log.warn("syncToService: ingest latest remote: invalid feed: "
+ id
+ " : "
+ serviceUrl
+ " : "
+ Long.toHexString(localDate.getTime()));
} catch (XMLSignatureException e) {
log.warn("syncToService: ingest latest remote: invalid signature: "
+ id
+ " : "
+ serviceUrl
+ " : "
+ Long.toHexString(localDate.getTime()));
} catch (Exception e) {
log.error("syncToService: ingest latest remote: unexpected error: "
+ id
+ " : "
+ serviceUrl
+ " : "
+ Long.toHexString(localDate.getTime()));
}
} else if (remoteDate.before(localDate)) {
// local has latest info: push difference
entryIds = storage.getEntryIdsForFeedId(id, 0, 99,
remoteDate, null, null, null, null, null);
for (long entryId : entryIds) {
localFeed.addEntry(getEntry(storage, id, entryId)
.getRoot());
}
return pushToService(localFeed, serviceUrl);
}
// otherwise: feeds are in sync
return true;
}
}
if (localFeed == null && remoteFeed != null) {
// local is missing: ingest remote
try {
ingestFeed(storage, remoteFeed);
return true;
} catch (IllegalArgumentException e) {
log.warn("syncToService: ingest remote: invalid feed: " + id
+ " : " + serviceUrl);
} catch (XMLSignatureException e) {
log.warn("syncToService: ingest remote: invalid signature: "
+ id + " : " + serviceUrl);
} catch (Exception e) {
log.error("syncToService: ingest remote: unexpected error: "
+ id + " : " + serviceUrl);
}
} else if (localFeed != null && remoteFeed == null) {
// remote is missing: push local with (all?) entries
long[] entryIds = storage.getEntryIdsForFeedId(id, 0, 99, null,
null, null, null, null, null);
for (long entryId : entryIds) {
localFeed.addEntry(getEntry(storage, id, entryId).getRoot());
}
return pushToService(localFeed, serviceUrl);
}
return false;
}
protected boolean pushToService(Feed feed, String serviceUrl) {
try {
URL url = new URL(serviceUrl + "/"
+ Common.toFeedIdString(feed.getId()));
AbderaClient client = new AbderaClient(Abdera.getInstance(),
Common.getBuildString());
ByteArrayOutputStream output = new ByteArrayOutputStream();
feed.writeTo(output);
ClientResponse response = client
.post(url.toString(),
new ByteArrayInputStream(output.toByteArray()),
new RequestOptions()
.setContentType("application/atom+xml; type=feed; charset=utf-8"));
log.debug("Response: " + response.getStatus() + " : "
+ response.getStatusText());
log.debug("Pushed: " + feed.getId() + " : " + serviceUrl);
return true;
} catch (MalformedURLException e) {
log.error("pushToService: bad url: " + serviceUrl + "/"
+ Common.toFeedIdString(feed.getId()));
} catch (IOException e) {
log.warn("pushToService: could not connect: " + serviceUrl + "/"
+ Common.toFeedIdString(feed.getId()));
}
return false;
}
protected Feed pullFromService(String serviceUrl, String entityId,
String queryString) {
Feed result = null;
if (!entityId.startsWith("/")) {
entityId = "/" + entityId;
}
if (queryString != null) {
queryString = "?" + queryString;
} else {
queryString = "";
}
String combined = serviceUrl + entityId + queryString;
log.info("pullFromService: " + combined);
try {
AbderaClient client = new AbderaClient(Abdera.getInstance(),
Common.getBuildString());
ClientResponse response = client.get(combined);
if (response.getType() == ResponseType.SUCCESS) {
Document<Feed> document = response.getDocument();
if (document != null) {
return document.getRoot();
} else {
log.warn("pull: no document for: " + combined);
}
} else {
log.debug("pull: no document found for: " + combined + " : "
+ response.getType());
}
} catch (ClassCastException cce) {
log.error("Not a valid feed: " + combined, cce);
} catch (Exception e) {
log.error("Could not process feed from relay: " + combined, e);
}
return result;
}
private static final org.slf4j.Logger log = org.slf4j.LoggerFactory
.getLogger(TrsstAdapter.class);
}