Package com.google.enterprise.connector.gdata

Source Code of com.google.enterprise.connector.gdata.GdConnector

// Copyright 2007 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.enterprise.connector.gdata;

import java.util.logging.*;
import java.util.*;
import java.net.*;
import java.io.*;

import com.google.enterprise.connector.spi.Connector;
import com.google.enterprise.connector.spi.Property;
import com.google.enterprise.connector.spi.TraversalManager;
import com.google.enterprise.connector.spi.AuthenticationManager;
import com.google.enterprise.connector.spi.AuthorizationManager;
import com.google.enterprise.connector.spi.Session;
import com.google.enterprise.connector.spi.SpiConstants;
import com.google.enterprise.connector.spi.DocumentList;
import com.google.enterprise.connector.spi.Document;
import com.google.enterprise.connector.spi.SimpleDocument;
import com.google.enterprise.connector.spi.SimpleProperty;
import com.google.enterprise.connector.spi.Value;
import com.google.enterprise.connector.spi.RepositoryException;

import com.google.gdata.client.Service;
import com.google.gdata.client.Query;

import com.google.gdata.util.ServiceException;
import com.google.gdata.util.NotModifiedException;

import com.google.gdata.data.Feed;
import com.google.gdata.data.Entry;
import com.google.gdata.data.DateTime;
import com.google.gdata.data.Category;
import com.google.gdata.data.Person;
import com.google.gdata.data.Content;
import com.google.gdata.data.TextContent;
import com.google.gdata.data.MediaContent;
import com.google.gdata.data.OtherContent;

import com.google.gdata.data.media.MediaSource;

/**
* This class fills the role of Connector, Session and TraversalManager for
* a connector that can grab entries from a GData feed and packages their
* content and metadata for consumption by the connector manager.  Entries are
* processed in order of increasing last modified time.
*
* A single property, feedUrl, controls the functioning of this connector.
*
* Only, public data is considered in this connector so no authentication or
* authorization is needed (or implemented).
*
* @author amsmith@google.com (Adam Smith)
*/
public class GdConnector implements Connector, Session, TraversalManager {
 
    /**
     * These strings define the key names for connector-specific meta data
     * that will be packaged along with the required keys as defined in the SPI.
     */
    private static final String PROPNAME_TITLE = "title";
    private static final String PROPNAME_SUMMARY = "summary";
    private static final String PROPNAME_CATEGORY = "category";
    private static final String PROPNAME_AUTHOR = "author";
   
    /**
     * The maximum number of entries to be fetched when requesting new items
     * from the feed.  If more than this number of results are available,
     * last-updated-time traversal order is NOT guaranteed and some updates
     * can be missed.
     */
    private static final int MAX_RESULTS = Integer.MAX_VALUE;
 
    /** The logger for this class. */
    private static final Logger LOGGER =
        Logger.getLogger(GdConnector.class.getName());

    /** The URL for the feed to be consumed. */
    private URL feedUrl;
   
    /** The GData service that will run queries on the feeds. */
    private Service service;
   
    /** Sets the URL of the feed to consume. */
    public void setFeedUrl(URL feedUrl) {
      if (LOGGER.isLoggable(Level.CONFIG))
        LOGGER.config("FEED URL: " + feedUrl);
      this.feedUrl = feedUrl;
    }
 
    /** Gets the URL of the feed to consume. */
    public URL getFeedUrl() {
      return feedUrl;
    }
   
    /** Sets the GData Service to be used when fetching the feed. */
    public void setService(Service service) {
      if (LOGGER.isLoggable(Level.CONFIG))
        LOGGER.config("SERVICE: " + service.toString());
      this.service = service;
    }
   
    /** Gets the GData Service to be used when fetching the feed. */
    public Service getService() {
      return service;
    }
   
    /** {@inheritDoc} */
    public Session login() {
      return this; // this class behaves as a Session also
    }
 
    /** {@inheritDoc} */
    public AuthenticationManager getAuthenticationManager() {
      return null; // no authentication used
    }
   
    /** {@inheritDoc} */
    public AuthorizationManager getAuthorizationManager() {
      return null; // no authorization used
    }
   
    /** {@inheritDoc} */
    public TraversalManager getTraversalManager() {
      return this; // this class behaves as a TraversalManager also
    }

    /**
     * This connector ignores the batch size hint.
     *
     * The GData reference states that result ordering is up to the
     * implementation.  Therefore, we fetch ALL new items every time to ensure
     * that we can traverse them in order of last update.
     *
     * http://code.google.com/apis/gdata/reference.html#Queries
     */
    public void setBatchHint(int batchHint) {
      return;
    }
 
    /**
     * {@inheritDoc}
     */
    public DocumentList startTraversal() throws RepositoryException {
      return fetchResults(null);
    }

    /**
     * {@inheritDoc}
     *
     * In this connector, the checkpoint is the ISO 8601 formatted date for
     * which entries in the feed with a earlier update date will attempt to be
     * ignored.
     *
     * @param checkPoint ifModifiedSince date
     */
    public DocumentList resumeTraversal(String checkPoint)
      throws RepositoryException {
      DateTime ifModifiedSince = DateTime.parseDateTime(checkPoint);
      if (LOGGER.isLoggable(Level.INFO))
        LOGGER.info("Using ifModifiedSince of " + ifModifiedSince);
      return fetchResults(ifModifiedSince);
    }
   
    /**
     * Builds a sorted DocumentList of all of the entries from the this
     * connector's feed, as fetched by the service.
     *
     * @param ifModifiedSince a DateTime (@see resumeTraversal)
     * @return a DocumentList of entries from the feed
     */
    private DocumentList fetchResults (DateTime ifModifiedSince)
      throws RepositoryException {
      List documents = new LinkedList();
      DateTime fetchTime = DateTime.now();
      Query query = new Query(feedUrl);
      query.setMaxResults(MAX_RESULTS);
      if (ifModifiedSince != null) {
        // The use of ifModifiedSince here filters out entries that were
        // modified before the given date.  Logically, we only care about those
        // entries that were modified recently.
        query.setUpdatedMin(ifModifiedSince);
      }
     
      try {
        // The use of ifModifiedSince here tells the server this can avoid
        // returning a result feed if the feed contained only entries that
        // have been modified after the given date.  Without this, when there
        // are no changes, we would still have all of the overhead of fetching
        // the feed's meta data but get zero entries.  In terms of efficiency,
        // we don't care about the feed unless it is going to tell us something
        // new.
        Feed feed = (Feed) service.query(query, Feed.class, ifModifiedSince);
        List entries = feed.getEntries();
        LOGGER.info("Fetched " + entries.size() + " of " +
            feed.getTotalResults() + " total updated entries.");
        Collections.sort(entries, new EntryUpdatedAscendingComparator());
        for (ListIterator ei = entries.listIterator(); ei.hasNext();) {
          Entry entry = (Entry) ei.next();
          documents.add(makeDocument(entry));
        }
      } catch (NotModifiedException nme) {
        // silently return empty result set
        if (LOGGER.isLoggable(Level.INFO))
          LOGGER.info(nme.toString());
      } catch (IOException ioe) {
        throw new RepositoryException(ioe);
      } catch (ServiceException se) {
        throw new RepositoryException(se);
      }
     
      return new GdDocumentList(documents, fetchTime.toString());
    }
   
   
    /** Makes a Document from the given Entry object. */
    public static Document makeDocument (Entry entry)
    throws RepositoryException {
     
      Map map = new HashMap();
     
      // Copy required properties from entry.
     
      map.put(SpiConstants.PROPNAME_DOCID,
          makeProperty(entry.getId()));
     
      map.put(SpiConstants.PROPNAME_LASTMODIFIED,
          makeProperty(entry.getUpdated().toStringRfc822()));
         
      map.put(SpiConstants.PROPNAME_DISPLAYURL,
          makeProperty(entry.getHtmlLink().getHref()));
     
      // Build PROPNAME_MIMETYPE and PROPNAME_CONTENT from entry.
     
      Content content = entry.getContent();
     
      if (content instanceof TextContent) {
       
        TextContent textContent = (TextContent) content;
       
        map.put(SpiConstants.PROPNAME_MIMETYPE,
            makeProperty("text/html"));
       
        map.put(SpiConstants.PROPNAME_CONTENT,
            makeProperty(
                "<html><head><title>" +
                (entry.getTitle() == null ? "" :
                  entry.getTitle().getPlainText()) +
                "</title><body>" +
                textContent.getContent().getPlainText() +
                "</body></html>"));
       
      } else if (content instanceof MediaContent) {
       
        MediaContent mediaContent = (MediaContent) content;
        map.put(SpiConstants.PROPNAME_MIMETYPE,
            makeProperty(mediaContent.getMimeType().getMediaType() ));
        map.put(SpiConstants.PROPNAME_CONTENT,
            makeProperty(mediaContent.getMediaSource()));
       
      } else if (content instanceof OtherContent) {
       
        OtherContent otherContent = (OtherContent) content;
        map.put(SpiConstants.PROPNAME_MIMETYPE,
            makeProperty(otherContent.getMimeType().getMediaType() ));
        map.put(SpiConstants.PROPNAME_CONTENT,
            makeProperty(otherContent.getBytes() ));
       
      } else {
        if (LOGGER.isLoggable(Level.SEVERE))
          LOGGER.severe("Unhandled content: " + content);
        throw new RepositoryException();
      }
     
      // Extract additional properties to be sent as meta data.
      // (not defined by SpiConstants)

      if (entry.getTitle() != null)
        map.put(PROPNAME_TITLE,
            makeProperty(entry.getTitle().getPlainText()));
     
      if (entry.getSummary() != null)
        map.put(PROPNAME_SUMMARY,
            makeProperty(entry.getSummary().getPlainText()));
    
      // these are strings like "http://schemas.google.com/g/2005#event"
      if (entry.getCategories() != null) {
        List categoryList = new LinkedList();
        for (Iterator ci = entry.getCategories().iterator(); ci.hasNext(); ) {
          Category category = (Category) ci.next();
          categoryList.add( Value.getStringValue(category.getTerm()) );
        }
        map.put(PROPNAME_CATEGORY, new SimpleProperty(categoryList));
      }
     
      if (entry.getAuthors() != null) {
        List authorList = new LinkedList();
        for (Iterator ai = entry.getAuthors().iterator(); ai.hasNext(); ) {
          Person person = (Person) ai.next();
          authorList.add(Value.getStringValue(person.getName()) );
          if (person.getEmail() != null) {
            authorList.add(Value.getStringValue( person.getEmail()) );
          }
        }
        map.put(PROPNAME_AUTHOR, new SimpleProperty(authorList));
      }

      return new SimpleDocument(map);
    }

    /** Make a SimpleProperty with one value using the given string. */
    public static Property makeProperty(String str) {
      List strList = new LinkedList();
      strList.add(Value.getStringValue(str));
      return new SimpleProperty(strList);
    }
   
    /**
     * Make a SimpleProperty with one value using the bytes read from the given
     * MediaSource.
     */
    public static Property makeProperty(MediaSource source)
      throws RepositoryException {
      ByteArrayOutputStream stream = new ByteArrayOutputStream();
      try {
        MediaSource.Output.writeTo(source, stream);
      } catch (IOException ioe) {
        throw new RepositoryException(ioe);
      }
      List list = new LinkedList();
      list.add(Value.getBinaryValue(stream.toByteArray()));
      return new SimpleProperty(list);
    }
   
    /**
     * Make a SimpleProperty with one value using the given byte array directly.
     */
    public static Property makeProperty(byte [] bytes) {
      List list = new LinkedList();
      list.add(Value.getBinaryValue(bytes));
      return new SimpleProperty(list);
    }
   
    /**
     * This is a helper class to aid in the Collections.sort() call in the
     * fetchResults method.  The progress through a document list of entries in
     * this connector is supported by GdConnectorType which returns a meaningful
     * checkpoint value for documents sorted in this manner.
     */
    public static class EntryUpdatedAscendingComparator implements Comparator {
      public int compare(Object a, Object b) {
        return ((Entry) a).getUpdated().compareTo(((Entry) b).getUpdated());
      }
      public boolean equals (Object a, Object b) {
        return a.equals(b);
      }
    }
}
TOP

Related Classes of com.google.enterprise.connector.gdata.GdConnector

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.